├── .gitignore ├── LICENSE ├── chapter 2 ├── sampleCode1.py ├── sampleCode2.py ├── sampleCode3.py ├── sampleCode4.py ├── sampleCode5.py ├── sampleCode6.py ├── sampleCode7.py ├── sampleCode8.py └── sampleCode9.py ├── chapter 3 ├── GitHub Tracking Application │ ├── GitHub Sample Application - Part 1.ipynb │ ├── GitHub Sample Application - Part 2.ipynb │ ├── GitHub Sample Application - Part 3.ipynb │ └── GitHub Sample Application - Part 4.ipynb ├── sampleCode1.py ├── sampleCode10.py ├── sampleCode11.py ├── sampleCode12.py ├── sampleCode13.py ├── sampleCode14.py ├── sampleCode15.py ├── sampleCode16.py ├── sampleCode17.py ├── sampleCode18.py ├── sampleCode2.py ├── sampleCode3.py ├── sampleCode4.py ├── sampleCode5.py ├── sampleCode6.py ├── sampleCode7.py ├── sampleCode8.py └── sampleCode9.py ├── chapter 4 ├── sampleCode1.py ├── sampleCode2.py └── sampleCode3.html ├── chapter 5 ├── sampleCode1.py ├── sampleCode10.py ├── sampleCode11.html ├── sampleCode12.html ├── sampleCode13.html ├── sampleCode14.py ├── sampleCode15.py ├── sampleCode16.py ├── sampleCode17.py ├── sampleCode18.py ├── sampleCode19.py ├── sampleCode2.py ├── sampleCode20.py ├── sampleCode21.py ├── sampleCode22.py ├── sampleCode23.py ├── sampleCode24.py ├── sampleCode25.py ├── sampleCode26.py ├── sampleCode27.py ├── sampleCode28.py ├── sampleCode29.html ├── sampleCode3.py ├── sampleCode30.html ├── sampleCode31.html ├── sampleCode32.html ├── sampleCode33.html ├── sampleCode34.html ├── sampleCode35.html ├── sampleCode36.html ├── sampleCode37.html ├── sampleCode38.html ├── sampleCode39.py ├── sampleCode4.py ├── sampleCode5.py ├── sampleCode6.py ├── sampleCode7.json ├── sampleCode8.py └── sampleCode9.py ├── chapter 6 ├── TensorFlow classification.ipynb ├── Tensorflow VR Part 1.ipynb ├── Tensorflow VR Part 2.ipynb ├── Tensorflow VR Part 3.ipynb ├── Tensorflow VR Part 4.ipynb ├── Visual Recognition │ └── mobilenet_v1_0.50_224 │ │ ├── frozen_graph.pb │ │ ├── labels.txt │ │ └── quantized_graph.pb ├── sampleCode1.py ├── sampleCode10.py ├── sampleCode11.py ├── sampleCode12.py ├── sampleCode13.py ├── sampleCode14.py ├── sampleCode15.py ├── sampleCode16.py ├── sampleCode17.py ├── sampleCode18.py ├── sampleCode19.py ├── sampleCode2.py ├── sampleCode20.py ├── sampleCode21.py ├── sampleCode22.py ├── sampleCode23.py ├── sampleCode24.py ├── sampleCode25.py ├── sampleCode26.py ├── sampleCode27.py ├── sampleCode28.py ├── sampleCode29.py ├── sampleCode3.py ├── sampleCode30.py ├── sampleCode31.py ├── sampleCode32.py ├── sampleCode4.py ├── sampleCode5.py ├── sampleCode6.py ├── sampleCode7.py ├── sampleCode8.py └── sampleCode9.py ├── chapter 7 ├── Twitter Sentiment Analysis - Part 1.ipynb ├── Twitter Sentiment Analysis - Part 2.ipynb ├── Twitter Sentiment Analysis - Part 3.ipynb ├── Twitter Sentiment Analysis - Part 4.ipynb ├── sampleCode1.py ├── sampleCode10.py ├── sampleCode11.py ├── sampleCode12.py ├── sampleCode13.py ├── sampleCode14.py ├── sampleCode15.py ├── sampleCode16.py ├── sampleCode17.py ├── sampleCode18.py ├── sampleCode19.py ├── sampleCode2.py ├── sampleCode20.py ├── sampleCode21.py ├── sampleCode22.py ├── sampleCode23.py ├── sampleCode24.py ├── sampleCode25.py ├── sampleCode26.py ├── sampleCode27.py ├── sampleCode28.py ├── sampleCode29.py ├── sampleCode3.py ├── sampleCode30.py ├── sampleCode31.py ├── sampleCode32.py ├── sampleCode33.py ├── sampleCode34.json ├── sampleCode4.py ├── sampleCode5.py ├── sampleCode6.py ├── sampleCode7.py ├── sampleCode8.py └── sampleCode9.py ├── chapter 8 ├── StockExplorer - Part 1.ipynb ├── StockExplorer - Part 2.ipynb ├── sampleCode1.py ├── sampleCode10.py ├── sampleCode11.py ├── sampleCode12.py ├── sampleCode13.py ├── sampleCode14.py ├── sampleCode15.py ├── sampleCode16.py ├── sampleCode17.py ├── sampleCode18.py ├── sampleCode19.py ├── sampleCode2.py ├── sampleCode20.py ├── sampleCode21.py ├── sampleCode22.py ├── sampleCode23.py ├── sampleCode24.json ├── sampleCode25.py ├── sampleCode26.py ├── sampleCode27.py ├── sampleCode28.py ├── sampleCode29.py ├── sampleCode3.py ├── sampleCode30.py ├── sampleCode31.py ├── sampleCode32.py ├── sampleCode33.py ├── sampleCode34.py ├── sampleCode35.py ├── sampleCode36.py ├── sampleCode37.py ├── sampleCode38.py ├── sampleCode39.py ├── sampleCode4.json ├── sampleCode40.py ├── sampleCode41.py ├── sampleCode5.py ├── sampleCode6.py ├── sampleCode7.py ├── sampleCode8.py └── sampleCode9.py └── chapter 9 ├── USFlightsAnalysis ├── US Flight data analysis - Part 1.ipynb ├── US Flight data analysis - Part 2.ipynb ├── US Flight data analysis - Part 3.ipynb ├── US Flight data analysis - Part 4.ipynb ├── airlines.csv ├── airports.csv └── flights.zip ├── sampleCode1.py ├── sampleCode10.py ├── sampleCode11.py ├── sampleCode12.py ├── sampleCode13.py ├── sampleCode14.py ├── sampleCode15.py ├── sampleCode16.py ├── sampleCode17.py ├── sampleCode18.py ├── sampleCode19.py ├── sampleCode2.py ├── sampleCode20.html ├── sampleCode21.py ├── sampleCode22.py ├── sampleCode23.json ├── sampleCode24.py ├── sampleCode25.py ├── sampleCode26.py ├── sampleCode27.py ├── sampleCode28.py ├── sampleCode29.py ├── sampleCode3.py ├── sampleCode30.py ├── sampleCode31.py ├── sampleCode32.py ├── sampleCode33.py ├── sampleCode34.py ├── sampleCode35.py ├── sampleCode36.py ├── sampleCode37.py ├── sampleCode38.py ├── sampleCode39.py ├── sampleCode4.py ├── sampleCode40.py ├── sampleCode41.py ├── sampleCode42.py ├── sampleCode43.py ├── sampleCode44.py ├── sampleCode5.py ├── sampleCode6.py ├── sampleCode7.py ├── sampleCode8.py └── sampleCode9.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /chapter 2/sampleCode1.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | data_url = "https://data.cityofnewyork.us/api/views/e98g-f8hy/rows.csv?accessType=DOWNLOAD" 3 | building_df = pandas.read_csv(data_url) 4 | -------------------------------------------------------------------------------- /chapter 2/sampleCode2.py: -------------------------------------------------------------------------------- 1 | #Spark CSV Loading 2 | from pyspark.sql import SparkSession 3 | try: 4 | from urllib import urlretrieve 5 | except ImportError: 6 | #urlretrieve package has been refactored in Python 3 7 | from urllib.request import urlretrieve 8 | 9 | data_url = "https://data.cityofnewyork.us/api/views/e98g-f8hy/rows.csv?accessType=DOWNLOAD" 10 | urlretrieve (data_url, "building.csv") 11 | 12 | spark = SparkSession.builder.getOrCreate() 13 | building_df = spark.read\ 14 | .format('org.apache.spark.sql.execution.datasources.csv.CSVFileFormat')\ 15 | .load("building.csv") 16 | -------------------------------------------------------------------------------- /chapter 2/sampleCode3.py: -------------------------------------------------------------------------------- 1 | import pixiedust 2 | cars = pixiedust.sampleData(1) 3 | -------------------------------------------------------------------------------- /chapter 2/sampleCode4.py: -------------------------------------------------------------------------------- 1 | import pixiedust 2 | data_url = "https://data.cityofnewyork.us/api/views/e98g-f8hy/rows.csv?accessType=DOWNLOAD" 3 | building_dataframe = pixiedust.sampleData(data_url, forcePandas=True) 4 | -------------------------------------------------------------------------------- /chapter 2/sampleCode5.py: -------------------------------------------------------------------------------- 1 | import pixiedust 2 | london_info = pixiedust.sampleData("https://files.datapress.com/london/dataset/london-borough-profiles/2015-09-24T15:50:01/London-borough-profiles.zip") 3 | display(london_info) 4 | -------------------------------------------------------------------------------- /chapter 2/sampleCode6.py: -------------------------------------------------------------------------------- 1 | import pixiedust 2 | data_url = "https://server/path" 3 | pixiedust.wrangleData(data_url) 4 | -------------------------------------------------------------------------------- /chapter 2/sampleCode7.py: -------------------------------------------------------------------------------- 1 | import pixiedust 2 | cars = pixiedust.sampleData(1, forcePandas=True) #car performance data 3 | display(cars) 4 | -------------------------------------------------------------------------------- /chapter 2/sampleCode8.py: -------------------------------------------------------------------------------- 1 | import pixiedust 2 | homes = pixiedust.sampleData(6, forcePandas=True) #Million dollar home sales in NE Mass 3 | display(homes) 4 | -------------------------------------------------------------------------------- /chapter 2/sampleCode9.py: -------------------------------------------------------------------------------- 1 | #import the pixieapp decorators 2 | from pixiedust.display.app import * 3 | 4 | #Load the cars dataframe into the Notebook 5 | cars = pixiedust.sampleData(1) 6 | 7 | @PixieApp #decorator for making the class a PixieApp 8 | class HelloWorldApp(): 9 | @route() #decorator for making a method a route (no arguments means default route) 10 | def main_screen(self): 11 | return """ 12 | 13 | 14 |
15 | """ 16 | 17 | @route(show_chart="true") 18 | def chart(self): 19 | #Return a div bound to the cars dataframe using the pd_entity attribute 20 | #pd_entity can refer a class variable or a global variable scoped to the notebook 21 | return """ 22 |
23 | 24 | { 25 | "title": "Average Mileage by Horsepower", 26 | "aggregation": "AVG", 27 | "clusterby": "origin", 28 | "handlerId": "barChart", 29 | "valueFields": "mpg", 30 | "rendererId": "bokeh", 31 | "keyFields": "horsepower" 32 | } 33 | 34 |
35 | """ 36 | 37 | #Instantiate the application and run it 38 | app = HelloWorldApp() 39 | app.run() 40 | -------------------------------------------------------------------------------- /chapter 3/GitHub Tracking Application/GitHub Sample Application - Part 1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# GitHub Tracking Application Part 1" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "pixiedust": { 15 | "displayParams": {} 16 | } 17 | }, 18 | "outputs": [ 19 | { 20 | "data": { 21 | "text/html": [ 22 | "
Hey, there's something awesome here! To see it, open this notebook outside GitHub, in a viewer like Jupyter
" 23 | ], 24 | "text/plain": [ 25 | "" 26 | ] 27 | }, 28 | "metadata": {}, 29 | "output_type": "display_data" 30 | } 31 | ], 32 | "source": [ 33 | "from pixiedust.display.app import *\n", 34 | "import requests\n", 35 | "import pandas\n", 36 | "\n", 37 | "@PixieApp\n", 38 | "class GitHubTracking():\n", 39 | " @route()\n", 40 | " def main_screen(self):\n", 41 | " return \"\"\"\n", 42 | "\n", 50 | "
\n", 51 | "
\n", 52 | "
\n", 53 | "
\n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | "
\n", 59 | "
\n", 60 | "
\n", 61 | "\"\"\"\n", 62 | " @route(query=\"*\")\n", 63 | " @templateArgs\n", 64 | " def do_search(self, query):\n", 65 | " self.first_url = \"https://api.github.com/search/repositories?q={}\".format(query)\n", 66 | " self.prev_url = None\n", 67 | " self.next_url = None\n", 68 | " self.last_url = None\n", 69 | " \n", 70 | " response = requests.get(self.first_url)\n", 71 | " if not response.ok:\n", 72 | " return \"
An Error occurred: {{response.text}}
\"\n", 73 | "\n", 74 | " total_count = response.json()['total_count']\n", 75 | " self.next_url = response.links.get('next', {}).get('url', None)\n", 76 | " self.last_url = response.links.get('last', {}).get('url', None)\n", 77 | " return \"\"\"\n", 78 | "

{{total_count}} repositories were found

\n", 79 | "\n", 85 | "\n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " {{this.invoke_route(this.do_retrieve_page, page='first_url')}}\n", 96 | " \n", 97 | "
Repo NameLastnameURLStars
\n", 98 | "\"\"\"\n", 99 | " @route(page=\"*\")\n", 100 | " @templateArgs\n", 101 | " def do_retrieve_page(self, page):\n", 102 | " url = getattr(self, page)\n", 103 | " if url is None:\n", 104 | " return \"
No more rows
\"\n", 105 | " response = requests.get(url)\n", 106 | " self.prev_url = response.links.get('prev', {}).get('url', None)\n", 107 | " self.next_url = response.links.get('next', {}).get('url', None)\n", 108 | " items = response.json()['items']\n", 109 | " return \"\"\"\n", 110 | "{%for row in items%}\n", 111 | "\n", 112 | " {{row['name']}}\n", 113 | " {{row.get('owner',{}).get('login', 'N/A')}}\n", 114 | " {{row['html_url']}}\n", 115 | " {{row['stargazers_count']}}\n", 116 | "\n", 117 | "{%endfor%}\n", 118 | " \"\"\"\n", 119 | "\n", 120 | "app = GitHubTracking()\n", 121 | "app.run()" 122 | ] 123 | } 124 | ], 125 | "metadata": { 126 | "kernelspec": { 127 | "display_name": "Python 3", 128 | "language": "python", 129 | "name": "python3" 130 | }, 131 | "language_info": { 132 | "codemirror_mode": { 133 | "name": "ipython", 134 | "version": 3 135 | }, 136 | "file_extension": ".py", 137 | "mimetype": "text/x-python", 138 | "name": "python", 139 | "nbconvert_exporter": "python", 140 | "pygments_lexer": "ipython3", 141 | "version": "3.5.4" 142 | } 143 | }, 144 | "nbformat": 4, 145 | "nbformat_minor": 2 146 | } 147 | -------------------------------------------------------------------------------- /chapter 3/GitHub Tracking Application/GitHub Sample Application - Part 4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# GitHub Tracking Application Part 4\n", 8 | "Include a Repo Analysis Page \n", 9 | "Include a checkbox for switching between line chart and statistical summary \n", 10 | "Make the checkbox responsible by directly updating the statistics table " 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Setup the GitHub Credentials" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "github_user = \"dtaieb\"\n", 27 | "github_token = \"XXXXX\"" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "## Create the loader function for the commit activity GitHub API" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": { 41 | "pixiedust": { 42 | "displayParams": { 43 | "aggregation": "SUM", 44 | "handlerId": "lineChart", 45 | "keyFields": "week", 46 | "rendererId": "bokeh", 47 | "rowCount": "500", 48 | "timeseries": "false", 49 | "valueFields": "total" 50 | } 51 | } 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "from datetime import datetime\n", 56 | "import requests\n", 57 | "import pixiedust\n", 58 | "import pandas\n", 59 | "def load_commit_activity(owner, repo_name):\n", 60 | " response = requests.get(\n", 61 | " \"https://api.github.com/repos/{}/{}/stats/commit_activity\".format(owner, repo_name),\n", 62 | " auth=(github_user, github_token)\n", 63 | " ).json()\n", 64 | " pdf = pandas.DataFrame([\n", 65 | " {\"total\": item[\"total\"], \"week\":datetime.fromtimestamp(item[\"week\"])} for item in response\n", 66 | " ])\n", 67 | " \n", 68 | " return {\n", 69 | " \"pdf\":pdf,\n", 70 | " \"chart_options\": {\n", 71 | " \"handlerId\": \"lineChart\",\n", 72 | " \"keyFields\": \"week\",\n", 73 | " \"valueFields\": \"total\",\n", 74 | " \"aggregation\": \"SUM\",\n", 75 | " \"rendererId\": \"bokeh\"\n", 76 | " }\n", 77 | " }" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "## Use display to get the chart options JSON payload" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": { 91 | "pixiedust": { 92 | "displayParams": { 93 | "aggregation": "SUM", 94 | "chartsize": "100", 95 | "handlerId": "lineChart", 96 | "keyFields": "week", 97 | "logx": "false", 98 | "logy": "false", 99 | "rendererId": "bokeh", 100 | "rowCount": "500", 101 | "valueFields": "total" 102 | } 103 | } 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "display(load_commit_activity(\"ibm-watson-data-lab\",\"pixiedust\")[\"pdf\"])" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "## Create the array that controls the type of data being analyzed" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "analyses = [(\"Commit Activity\", load_commit_activity)]" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "## Implement the RepoAnalysis PixieApp that is responsible for visualizing the data" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": { 137 | "pixiedust": { 138 | "displayParams": {} 139 | } 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "from pixiedust.display.app import *\n", 144 | "import requests\n", 145 | "import pandas\n", 146 | "\n", 147 | "class RepoAnalysis():\n", 148 | " def setup(self):\n", 149 | " self.show_stats = False\n", 150 | " self.analyse_type = None\n", 151 | "\n", 152 | " @route(analyse_repo_owner=\"*\", analyse_repo_name=\"*\")\n", 153 | " @templateArgs\n", 154 | " def do_analyse_repo(self, analyse_repo_owner, analyse_repo_name):\n", 155 | " self._analyse_repo_owner = analyse_repo_owner\n", 156 | " self._analyse_repo_name = analyse_repo_name\n", 157 | " return \"\"\"\n", 158 | "
\n", 159 | "
\n", 160 | "
\n", 161 | " \n", 165 | " \n", 175 | "
\n", 176 | "
\n", 177 | " \n", 184 | "
\n", 185 | "
\n", 186 | "
\n", 188 | "
\n", 189 | "\"\"\"\n", 190 | " def get_pdf(self):\n", 191 | " if self.show_stats:\n", 192 | " summary = self.pdf.describe()\n", 193 | " summary.insert(0, \"Stat\", summary.index)\n", 194 | " return summary\n", 195 | " return self.pdf\n", 196 | " \n", 197 | " @route(display_analysis=\"*\")\n", 198 | " @templateArgs\n", 199 | " def do_analyse_type(self):\n", 200 | " fn = [analysis_fn for a_type,analysis_fn in analyses if a_type == self.analyse_type]\n", 201 | " if len(fn) == 0:\n", 202 | " return \"No loader function found for {{analyse_type}}\"\n", 203 | " vis_info = fn[0](self._analyse_repo_owner, self._analyse_repo_name)\n", 204 | " self.pdf = vis_info[\"pdf\"]\n", 205 | " chart_options = {\"handlerId\":\"dataframe\"} if self.show_stats else vis_info[\"chart_options\"]\n", 206 | " return \"\"\"\n", 207 | "
\n", 208 | " {{chart_options | tojson}}\n", 209 | "
\n", 210 | " \"\"\"" 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | "## GitHubTracking PixieApp class\n", 218 | "Main PixieApp class that inherits from RepoAnalysis" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": { 225 | "pixiedust": { 226 | "displayParams": {}, 227 | "pixieapp": { 228 | "query": "pixiedust" 229 | } 230 | } 231 | }, 232 | "outputs": [], 233 | "source": [ 234 | "@PixieApp\n", 235 | "class GitHubTracking(RepoAnalysis):\n", 236 | " \"\"\"\n", 237 | " GitHub Tracking Sample Application\n", 238 | " \"\"\"\n", 239 | " @route()\n", 240 | " def main_screen(self):\n", 241 | " return \"\"\"\n", 242 | "\n", 250 | "
\n", 251 | "
\n", 252 | "
\n", 253 | "
\n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | "
\n", 259 | "
\n", 260 | "
\n", 261 | "\"\"\"\n", 262 | " @route(query=\"*\", persist_args='true')\n", 263 | " @templateArgs\n", 264 | " def do_search(self, query):\n", 265 | " self.first_url = \"https://api.github.com/search/repositories?q={}\".format(query)\n", 266 | " self.prev_url = None\n", 267 | " self.next_url = None\n", 268 | " self.last_url = None\n", 269 | " \n", 270 | " response = requests.get(self.first_url)\n", 271 | " if not response.ok:\n", 272 | " return \"
An Error occurred: {{response.text}}
\"\n", 273 | "\n", 274 | " total_count = response.json()['total_count']\n", 275 | " self.next_url = response.links.get('next', {}).get('url', None)\n", 276 | " self.last_url = response.links.get('last', {}).get('url', None)\n", 277 | " return \"\"\"\n", 278 | "

{{total_count}} repositories were found

\n", 279 | "\n", 285 | "\n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " {{this.invoke_route(this.do_retrieve_page, page='first_url')}}\n", 297 | " \n", 298 | "
Repo NameLastnameURLStarsActions
\n", 299 | "\"\"\"\n", 300 | " @route(page=\"*\")\n", 301 | " @templateArgs\n", 302 | " def do_retrieve_page(self, page):\n", 303 | " url = getattr(self, page)\n", 304 | " if url is None:\n", 305 | " return \"
No more rows
\"\n", 306 | " response = requests.get(url)\n", 307 | " self.prev_url = response.links.get('prev', {}).get('url', None)\n", 308 | " self.next_url = response.links.get('next', {}).get('url', None)\n", 309 | " items = response.json()['items']\n", 310 | " return \"\"\"\n", 311 | "{%for row in items%}\n", 312 | "\n", 313 | " {{row['name']}}\n", 314 | " {{row.get('owner',{}).get('login', 'N/A')}}\n", 315 | " {{row['html_url']}}\n", 316 | " {{row['stargazers_count']}}\n", 317 | " \n", 318 | " \n", 322 | " \n", 323 | "\n", 324 | "{%endfor%}\n", 325 | " \"\"\"\n", 326 | "\n", 327 | "app = GitHubTracking()\n", 328 | "app.run()" 329 | ] 330 | } 331 | ], 332 | "metadata": { 333 | "celltoolbar": "Edit Metadata", 334 | "kernelspec": { 335 | "display_name": "Python 3", 336 | "language": "python", 337 | "name": "python3" 338 | }, 339 | "language_info": { 340 | "codemirror_mode": { 341 | "name": "ipython", 342 | "version": 3 343 | }, 344 | "file_extension": ".py", 345 | "mimetype": "text/x-python", 346 | "name": "python", 347 | "nbconvert_exporter": "python", 348 | "pygments_lexer": "ipython3", 349 | "version": "3.5.4" 350 | } 351 | }, 352 | "nbformat": 4, 353 | "nbformat_minor": 2 354 | } 355 | -------------------------------------------------------------------------------- /chapter 3/sampleCode1.py: -------------------------------------------------------------------------------- 1 | #import the pixieapp decorators 2 | from pixiedust.display.app import * 3 | 4 | @PixieApp #decorator for making the class a PixieApp 5 | class HelloWorldApp(): 6 | @route() #decorator for making a method a route (no arguments means default route) 7 | def main_screen(self): 8 | return """
Hello World
""" 9 | 10 | #Instantiate the application and run it 11 | app = HelloWorldApp() 12 | app.run() 13 | -------------------------------------------------------------------------------- /chapter 3/sampleCode10.py: -------------------------------------------------------------------------------- 1 | from pixiedust.display.app import * 2 | @PixieApp 3 | class TestEntity(): 4 | @route() 5 | def main_screen(self): 6 | return """ 7 |

Simple PixieApp with dynamically computed dataframe

8 |
9 | """ 10 | test = TestEntity() 11 | test.run() 12 | -------------------------------------------------------------------------------- /chapter 3/sampleCode11.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import requests 3 | import pandas 4 | def load_commit_activity(owner, repo_name): 5 | response = requests.get( 6 | "https://api.github.com/repos/{}/{}/stats/commit_activity".format(owner, repo_name), 7 | auth=(github_user, github_token) 8 | ).json() 9 | pdf = pandas.DataFrame([ 10 | {"total": item["total"], "week":datetime.fromtimestamp(item["week"])} for item in response 11 | ]) 12 | 13 | return { 14 | "pdf":pdf, 15 | "chart_options": { 16 | "handlerId": "lineChart", 17 | "keyFields": "week", 18 | "valueFields": "total", 19 | "aggregation": "SUM", 20 | "rendererId": "bokeh" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /chapter 3/sampleCode12.py: -------------------------------------------------------------------------------- 1 | [[RepoAnalysis]] 2 | @route(analyse_type="*") 3 | @templateArgs 4 | def do_analyse_type(self, analyse_type): 5 | fn = [analysis_fn for a_type,analysis_fn in analyses if a_type == analyse_type] 6 | if len(fn) == 0: 7 | return "No loader function found for {{analyse_type}}" 8 | vis_info = fn[0](self._analyse_repo_owner, self._analyse_repo_name) 9 | self.pdf = vis_info["pdf"] 10 | return """ 11 |
12 | {{vis_info["chart_options"] | tojson}} 13 |
14 | """ 15 | -------------------------------------------------------------------------------- /chapter 3/sampleCode13.py: -------------------------------------------------------------------------------- 1 | @PixieApp 2 | class GitHubTracking(RepoAnalysis): 3 | @route() 4 | def main_screen(self): 5 | <> 6 | 7 | @route(query="*") 8 | @templateArgs 9 | def do_search(self, query): 10 | <> 11 | 12 | @route(page="*") 13 | @templateArgs 14 | def do_retrieve_page(self, page): 15 | <> 16 | 17 | app = GitHubTracking() 18 | app.run() 19 | -------------------------------------------------------------------------------- /chapter 3/sampleCode14.py: -------------------------------------------------------------------------------- 1 | from pixiedust.display.app import * 2 | 3 | def call_me(): 4 | print("Hello from call_me") 5 | 6 | @PixieApp 7 | class Test(): 8 | @route() 9 | def main_screen(self): 10 | return """ 11 | 12 | 13 |
14 | """ 15 | Test().run() 16 | -------------------------------------------------------------------------------- /chapter 3/sampleCode15.py: -------------------------------------------------------------------------------- 1 | @PixieApp 2 | class Test(): 3 | @route() 4 | def main_screen(self): 5 | return """ 6 | 15 | 16 |
17 | """ 18 | Test().run() 19 | -------------------------------------------------------------------------------- /chapter 3/sampleCode16.py: -------------------------------------------------------------------------------- 1 | [[RepoAnalysis]] 2 | @route(analyse_type="*") 3 | @templateArgs 4 | def do_analyse_type(self, analyse_type): 5 | fn = [analysis_fn for a_type,analysis_fn in analyses if a_type == analyse_type] 6 | if len(fn) == 0: 7 | return "No loader function found for {{analyse_type}}" 8 | vis_info = fn[0](self._analyse_repo_owner, self._analyse_repo_name) 9 | self.pdf = vis_info["pdf"] 10 | chart_options = {"handlerId":"dataframe"} if self.show_stats else vis_info["chart_options"] 11 | return """ 12 |
13 | {{chart_options | tojson}} 14 |
15 | """ 16 | -------------------------------------------------------------------------------- /chapter 3/sampleCode17.py: -------------------------------------------------------------------------------- 1 | def get_pdf(self): 2 | if self.show_stats: 3 | summary = self.pdf.describe() 4 | summary.insert(0, "Stat", summary.index) 5 | return summary 6 | return self.pdf 7 | -------------------------------------------------------------------------------- /chapter 3/sampleCode18.py: -------------------------------------------------------------------------------- 1 | from pixiedust.display.app import * 2 | 3 | @PixieApp 4 | class WidgetApp(): 5 | @route(widget="my_widget") 6 | def widget_main_screen(self): 7 | return "
Hello World Widget
" 8 | 9 | @PixieApp 10 | class ConsumerApp(WidgetApp): 11 | @route() 12 | def main_screen(self): 13 | return """
""" 14 | 15 | ConsumerApp().run() 16 | -------------------------------------------------------------------------------- /chapter 3/sampleCode2.py: -------------------------------------------------------------------------------- 1 | @route(state1="*", state2="*") 2 | def my_method(self, state1, state2): 3 | return "
State1 is {{state1}}. State2 is {{state2}}
" 4 | -------------------------------------------------------------------------------- /chapter 3/sampleCode3.py: -------------------------------------------------------------------------------- 1 | @route() 2 | @templateArgs 3 | def main_screen(self): 4 | var1 = self.compute_something() 5 | var2 = self.compute_something_else() 6 | return "
var1 is {{var1}}. var2 is {{var2}}
" 7 | -------------------------------------------------------------------------------- /chapter 3/sampleCode4.py: -------------------------------------------------------------------------------- 1 | from pixiedust.display.app import * 2 | 3 | @PixieApp 4 | class GitHubTracking(): 5 | @route() 6 | def main_screen(self): 7 | return """ 8 | 16 |
17 |
18 |
19 |
20 | 21 | 22 | 23 | 24 |
25 |
26 |
27 | """ 28 | 29 | app = GitHubTracking() 30 | app.run() 31 | -------------------------------------------------------------------------------- /chapter 3/sampleCode5.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import pandas 3 | [[GitHubTracking]] 4 | @route(query="*") 5 | @templateArgs 6 | def do_search(self, query): 7 | response = requests.get( "https://api.github.com/search/repositories?q={}".format(query)) 8 | frames = [pandas.DataFrame(response.json()['items'])] 9 | while response.ok and "next" in response.links: 10 | response = requests.get(response.links['next']['url']) 11 | frames.append(pandas.DataFrame(response.json()['items'])) 12 | 13 | pdf = pandas.concat(frames) 14 | response = requests.get( "https://api.github.com/search/repositories?q={}".format(query)) 15 | if not response.ok: 16 | return "
An Error occurred: {{response.text}}
" 17 | return """

{{pdf|length}} repositories were found

""" 18 | -------------------------------------------------------------------------------- /chapter 3/sampleCode6.py: -------------------------------------------------------------------------------- 1 | [[GitHubTracking]] 2 | @route(query="*") 3 | @templateArgs 4 | def do_search(self, query): 5 | self.first_url = "https://api.github.com/search/repositories?q={}".format(query) 6 | self.prev_url = None 7 | self.next_url = None 8 | self.last_url = None 9 | 10 | response = requests.get(self.first_url) 11 | if not response.ok: 12 | return "
An Error occurred: {{response.text}}
" 13 | 14 | total_count = response.json()['total_count'] 15 | self.next_url = response.links.get('next', {}).get('url', None) 16 | self.last_url = response.links.get('last', {}).get('url', None) 17 | return """ 18 |

{{total_count}} repositories were found

19 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | {{this.invoke_route(this.do_retrieve_page, page='first_url')}} 36 | 37 |
Repo NameLastnameURLStars
38 | """ 39 | -------------------------------------------------------------------------------- /chapter 3/sampleCode7.py: -------------------------------------------------------------------------------- 1 | [[GitHubTracking]] 2 | @route(page="*") 3 | @templateArgs 4 | def do_retrieve_page(self, page): 5 | url = getattr(self, page) 6 | if url is None: 7 | return "
No more rows
" 8 | response = requests.get(url) 9 | self.prev_url = response.links.get('prev', {}).get('url', None) 10 | self.next_url = response.links.get('next', {}).get('url', None) 11 | items = response.json()['items'] 12 | return """ 13 | {%for row in items%} 14 | 15 | {{row['name']}} 16 | {{row.get('owner',{}).get('login', 'N/A')}} 17 | {{row['html_url']}} 18 | {{row['stargazers_count']}} 19 | 20 | {%endfor%} 21 | """ 22 | -------------------------------------------------------------------------------- /chapter 3/sampleCode8.py: -------------------------------------------------------------------------------- 1 | @PixieApp 2 | class RepoAnalysis(): 3 | @route(analyse_repo_owner="*", analyse_repo_name="*") 4 | @templateArgs 5 | def do_analyse_repo(self, analyse_repo_owner, analyse_repo_name): 6 | self._analyse_repo_owner = analyse_repo_owner 7 | self._analyse_repo_name = analyse_repo_name 8 | return """ 9 |
10 | 26 |
27 |
28 | """ 29 | -------------------------------------------------------------------------------- /chapter 3/sampleCode9.py: -------------------------------------------------------------------------------- 1 | def compute_pdf(key): 2 | return pandas.DataFrame([ 3 | {"col{}".format(i): "{}{}-{}".format(key,i,j) for i in range(4)} for j in range(10) 4 | ]) 5 | -------------------------------------------------------------------------------- /chapter 4/sampleCode1.py: -------------------------------------------------------------------------------- 1 | @route(query="*", persist_args='true') 2 | @templateArgs 3 | def do_search(self, query): 4 | self.first_url = "https://api.github.com/search/repositories?q={}".format(query) 5 | self.prev_url = None 6 | self.next_url = None 7 | self.last_url = None 8 | ... 9 | -------------------------------------------------------------------------------- /chapter 4/sampleCode2.py: -------------------------------------------------------------------------------- 1 | import pixiedust 2 | cars = pixiedust.sampleData(1, forcePandas=True) #car performance data 3 | display(cars) 4 | -------------------------------------------------------------------------------- /chapter 4/sampleCode3.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Example page with embedded chart 6 | 7 | 8 |

Embedded a PixieDust Chart in a custom HTML Page

9 |
10 | 12 | View Chart 13 | 14 |
15 | 16 | 17 | -------------------------------------------------------------------------------- /chapter 5/sampleCode1.py: -------------------------------------------------------------------------------- 1 | from pixiedust.display.app import * 2 | import requests 3 | from wordcloud import WordCloud 4 | import matplotlib.pyplot as plt 5 | 6 | @PixieApp 7 | class WordCloudApp(): 8 | @route() 9 | def main_screen(self): 10 | return """ 11 |
12 | 13 | 14 | 18 |
19 |
20 | """ 21 | 22 | @route(url="*") 23 | @captureOutput 24 | def generate_word_cloud(self, url): 25 | text = requests.get(url).text 26 | plt.axis("off") 27 | plt.imshow( 28 | WordCloud(max_font_size=40).generate(text), 29 | interpolation='bilinear' 30 | ) 31 | 32 | app = WordCloudApp() 33 | app.run() 34 | -------------------------------------------------------------------------------- /chapter 5/sampleCode10.py: -------------------------------------------------------------------------------- 1 | def newDisplayHandler(self, options, entity): 2 | if self.streamingDisplay is None: 3 | self.streamingDisplay = LineChartStreamingDisplay(options, entity) 4 | else: 5 | self.streamingDisplay.options = options 6 | return self.streamingDisplay 7 | -------------------------------------------------------------------------------- /chapter 5/sampleCode11.html: -------------------------------------------------------------------------------- 1 | 4 | 5 | or 6 | 7 | 16 | -------------------------------------------------------------------------------- /chapter 5/sampleCode12.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 |
Row 1
Row 2
Row 3
7 | -------------------------------------------------------------------------------- /chapter 5/sampleCode13.html: -------------------------------------------------------------------------------- 1 |
2 | Listening to button event 3 | 7 | 8 |
9 | -------------------------------------------------------------------------------- /chapter 5/sampleCode14.py: -------------------------------------------------------------------------------- 1 | from pixiedust.display.app import * 2 | @PixieApp 3 | class TestEvents(): 4 | @route() 5 | def main_screen(self): 6 | return """ 7 |
8 | 17 | 18 | 19 | 20 | 21 |
Row 1
Row 2
Row 3
22 |
23 |
24 |
25 |
26 | Listening to button event 27 | 28 | 29 |
30 |
31 | Listening to table event 32 | 33 | 34 |
35 |
36 |
37 | """ 38 | app = TestEvents() 39 | app.run() 40 | -------------------------------------------------------------------------------- /chapter 5/sampleCode15.py: -------------------------------------------------------------------------------- 1 | from pixiedust.display.display import * 2 | import pandas 3 | @PixiedustDisplay() 4 | class SimpleDisplayMeta(DisplayHandlerMeta): 5 | @addId 6 | def getMenuInfo(self,entity,dataHandler): 7 | if type(entity) is pandas.core.frame.DataFrame: 8 | return [ 9 | {"categoryId": "Table", "title": "Simple Table", "icon": "fa-table", "id": "simpleTest"} 10 | ] 11 | return [] 12 | def newDisplayHandler(self,options,entity): 13 | return SimpleDisplay(options,entity) 14 | -------------------------------------------------------------------------------- /chapter 5/sampleCode16.py: -------------------------------------------------------------------------------- 1 | class SimpleDisplay(Display): 2 | def doRender(self, handlerId): 3 | self._addHTMLTemplateString(""" 4 | 5 | 6 | {%for column in entity.columns.tolist()%} 7 | 8 | {%endfor%} 9 | 10 | 11 | {%for _, row in entity.iterrows()%} 12 | 13 | {%for value in row.tolist()%} 14 | 15 | {%endfor%} 16 | 17 | {%endfor%} 18 | 19 |
{{column}}
{{value}}
20 | """) 21 | -------------------------------------------------------------------------------- /chapter 5/sampleCode17.py: -------------------------------------------------------------------------------- 1 | from pixiedust.display.chart.renderers import PixiedustRenderer 2 | from pixiedust.display.chart.renderers.baseChartDisplay import BaseChartDisplay 3 | 4 | @PixiedustRenderer(rendererId="simpletable", id="tableView") 5 | class SimpleDisplayWithRenderer(BaseChartDisplay): 6 | def get_options_dialog_pixieapp(self): 7 | return None #No options needed 8 | 9 | def doRenderChart(self): 10 | return self.renderTemplateString(""" 11 | 12 | 13 | {%for column in entity.columns.tolist()%} 14 | 15 | {%endfor%} 16 | 17 | 18 | {%for _, row in entity.iterrows()%} 19 | 20 | {%for value in row.tolist()%} 21 | 22 | {%endfor%} 23 | 24 | {%endfor%} 25 | 26 |
{{column}}
{{value}}
27 | """) 28 | -------------------------------------------------------------------------------- /chapter 5/sampleCode18.py: -------------------------------------------------------------------------------- 1 | import pdb 2 | def my_function(arg1, arg2): 3 | pdb.set_trace() 4 | do_something_here() 5 | -------------------------------------------------------------------------------- /chapter 5/sampleCode19.py: -------------------------------------------------------------------------------- 1 | %%pixie_debugger 2 | import pixiedust 3 | cars = pixiedust.sampleData(1, forcePandas=True) 4 | 5 | def count_cars(name): 6 | count = 0 7 | for row in cars.itertuples(): 8 | if name in row.name: 9 | count += 1 10 | return count 11 | 12 | count_cars('chevrolet') 13 | -------------------------------------------------------------------------------- /chapter 5/sampleCode2.py: -------------------------------------------------------------------------------- 1 | from pixiedust.display.app import * 2 | import requests 3 | from wordcloud import WordCloud 4 | import matplotlib.pyplot as plt 5 | 6 | @PixieApp 7 | class WCChildApp(): 8 | @route(url='*') 9 | @captureOutput 10 | def generate_word_cloud(self, url): 11 | text = requests.get(url).text 12 | plt.axis("off") 13 | plt.imshow( 14 | WordCloud(max_font_size=40).generate(text), 15 | interpolation='bilinear' 16 | ) 17 | -------------------------------------------------------------------------------- /chapter 5/sampleCode20.py: -------------------------------------------------------------------------------- 1 | %%pixie_debugger -b count_cars 11 2 | import pixiedust 3 | cars = pixiedust.sampleData(1, forcePandas=True) 4 | 5 | def count_cars(name): 6 | count = 0 7 | for row in cars.itertuples(): 8 | if name in row.name: 9 | count += 1 10 | return count 11 | 12 | count_cars('chevrolet') 13 | -------------------------------------------------------------------------------- /chapter 5/sampleCode21.py: -------------------------------------------------------------------------------- 1 | from pixiedust.display.app import * 2 | @PixieApp 3 | class DisplayCars(): 4 | @route() 5 | def main_screen(self): 6 | return """ 7 |
8 | 9 | 10 | 11 | 12 | 16 |
17 |
18 | """ 19 | @route(col="*", query="*") 20 | def display_screen(self, col, query): 21 | self.pdf = cars.loc[cars[col].str.contains(query)] 22 | return """ 23 |
24 | 25 | { 26 | "handlerId": "tableView", 27 | "table_noschema": "true", 28 | "table_nosearch": "true", 29 | "table_nocount": "true" 30 | } 31 | 32 |
33 | """ 34 | app = DisplayCars() 35 | app.run() 36 | -------------------------------------------------------------------------------- /chapter 5/sampleCode22.py: -------------------------------------------------------------------------------- 1 | import pixiedust 2 | my_logger = pixiedust.getLogger(__name__) 3 | -------------------------------------------------------------------------------- /chapter 5/sampleCode23.py: -------------------------------------------------------------------------------- 1 | from pixiedust.display.app import * 2 | from pixiedust.utils import Logger 3 | 4 | @PixieApp 5 | @Logger() 6 | class AppWithLogger(): 7 | @route() 8 | def main_screen(self): 9 | self.info("Calling default route") 10 | return "
hello world
" 11 | 12 | app = AppWithLogger() 13 | app.run() 14 | -------------------------------------------------------------------------------- /chapter 5/sampleCode24.py: -------------------------------------------------------------------------------- 1 | from pixiedust.display.app import * 2 | 3 | @PixieApp 4 | class TestJSDebugger(): 5 | @route() 6 | def main_screen(self): 7 | return """ 8 | 14 | 15 | """ 16 | 17 | @route(state="*") 18 | def my_route(self, state): 19 | return "
Route called with state {{state}}
" 20 | 21 | app = TestJSDebugger() 22 | app.run() 23 | -------------------------------------------------------------------------------- /chapter 5/sampleCode25.py: -------------------------------------------------------------------------------- 1 | from pixiedust.display.app import * 2 | @PixieApp 3 | class MyApp(): 4 | @route(key1="value1", key2="*") 5 | def myroute_screen(self, key1, key2): 6 | return "
fragment: Key1 = {{key1}} - Key2 = {{key2}}" 7 | -------------------------------------------------------------------------------- /chapter 5/sampleCode26.py: -------------------------------------------------------------------------------- 1 | from pixiedust.display.app import * 2 | @PixieApp 3 | class MyApp(): 4 | @route(key1="value1", key2="*") 5 | @templateArgs 6 | def myroute_screen(self, key1, key2): 7 | local_var = "some value" 8 | return "
fragment: local_var = {{local_var}}" 9 | -------------------------------------------------------------------------------- /chapter 5/sampleCode27.py: -------------------------------------------------------------------------------- 1 | from pixiedust.display.app import * 2 | import matplotlib.pyplot as plt 3 | @PixieApp 4 | class MyApp(): 5 | @route() 6 | @captureOutput 7 | def main_screen(self): 8 | plt.plot([1,2,3,4]) 9 | plt.show() 10 | -------------------------------------------------------------------------------- /chapter 5/sampleCode28.py: -------------------------------------------------------------------------------- 1 | from pixiedust.display.app import * 2 | from pixiedust.utils import Logger 3 | @PixieApp 4 | @Logger() 5 | class MyApp(): 6 | @route() 7 | def main_screen(self): 8 | self.debug("In main_screen") 9 | return "
Hello World
" 10 | -------------------------------------------------------------------------------- /chapter 5/sampleCode29.html: -------------------------------------------------------------------------------- 1 |
2 | 3 | { 4 | "mapboxtoken": "XXXXX", 5 | "chartsize": "90", 6 | "aggregation": "SUM", 7 | "rowCount": "500", 8 | "handlerId": "mapView", 9 | "rendererId": "mapbox", 10 | "valueFields": "IncidntNum", 11 | "keyFields": "X,Y", 12 | "basemap": "light-v9" 13 | } 14 | 15 |
16 | -------------------------------------------------------------------------------- /chapter 5/sampleCode3.py: -------------------------------------------------------------------------------- 1 | @PixieApp 2 | class WordCloudApp(): 3 | @route() 4 | def main_screen(self): 5 | return """ 6 |
7 | 8 | 9 | 15 |
16 |
17 | """ 18 | 19 | app = WordCloudApp() 20 | app.run() 21 | -------------------------------------------------------------------------------- /chapter 5/sampleCode30.html: -------------------------------------------------------------------------------- 1 | 2 | 5 | -------------------------------------------------------------------------------- /chapter 5/sampleCode31.html: -------------------------------------------------------------------------------- 1 |
2 | 5 | -------------------------------------------------------------------------------- /chapter 5/sampleCode32.html: -------------------------------------------------------------------------------- 1 | 2 |
3 | 10 | -------------------------------------------------------------------------------- /chapter 5/sampleCode33.html: -------------------------------------------------------------------------------- 1 |
4 |
5 | -------------------------------------------------------------------------------- /chapter 5/sampleCode34.html: -------------------------------------------------------------------------------- 1 |
2 | 3 | print('hello world rendered on load') 4 | 5 |
6 | -------------------------------------------------------------------------------- /chapter 5/sampleCode35.html: -------------------------------------------------------------------------------- 1 | 2 | 7 | -------------------------------------------------------------------------------- /chapter 5/sampleCode36.html: -------------------------------------------------------------------------------- 1 | 4 | 13 | -------------------------------------------------------------------------------- /chapter 5/sampleCode37.html: -------------------------------------------------------------------------------- 1 |
2 | Listening to button event 3 | 7 | 8 |
9 | -------------------------------------------------------------------------------- /chapter 5/sampleCode38.html: -------------------------------------------------------------------------------- 1 |
3 |
4 | -------------------------------------------------------------------------------- /chapter 5/sampleCode39.py: -------------------------------------------------------------------------------- 1 | def setup(self): 2 | self.var1 = "some initial value" 3 | self.pandas_dataframe = pandas.DataFrame(data) 4 | -------------------------------------------------------------------------------- /chapter 5/sampleCode4.py: -------------------------------------------------------------------------------- 1 | from pixiedust.display.app import * 2 | import requests 3 | from wordcloud import WordCloud 4 | import matplotlib.pyplot as plt 5 | 6 | @PixieApp 7 | class WCChildApp(): 8 | @route(widget='wordcloud') 9 | @captureOutput 10 | def generate_word_cloud(self): 11 | text = requests.get(self.url).text if self.url else "" 12 | plt.axis("off") 13 | plt.imshow( 14 | WordCloud(max_font_size=40).generate(text), 15 | interpolation='bilinear' 16 | ) 17 | -------------------------------------------------------------------------------- /chapter 5/sampleCode5.py: -------------------------------------------------------------------------------- 1 | @PixieApp 2 | class WordCloudApp(WCChildApp): 3 | @route() 4 | def main_screen(self): 5 | self.url=None 6 | return """ 7 |
8 | 9 | 10 | 15 |
16 |
17 | """ 18 | 19 | app = WordCloudApp() 20 | app.run() 21 | -------------------------------------------------------------------------------- /chapter 5/sampleCode6.py: -------------------------------------------------------------------------------- 1 | @abstractmethod 2 | def doGetNextData(self): 3 | """Return the next batch of data from the underlying stream. 4 | Accepted return values are: 5 | 1. (x,y): tuple of list/numpy arrays representing the x and y axis 6 | 2. pandas dataframe 7 | 3. y: list/numpy array representing the y axis. In this case, the x axis is automatically created 8 | 4. pandas serie: similar to #3 9 | 5. json 10 | 6. geojson 11 | 7. url with supported payload (json/geojson) 12 | """ 13 | Pass 14 | -------------------------------------------------------------------------------- /chapter 5/sampleCode7.json: -------------------------------------------------------------------------------- 1 | { 2 | "geometry": { 3 | "type": "Point", 4 | "coordinates": [ 5 | -93.824908715741202, 10.875051131034805 6 | ] 7 | }, 8 | "type": "Feature", 9 | "properties": {} 10 | } 11 | -------------------------------------------------------------------------------- /chapter 5/sampleCode8.py: -------------------------------------------------------------------------------- 1 | from pixiedust.display.streaming import * 2 | 3 | class DroneStreamingAdapter(StreamingDataAdapter): 4 | def getMetadata(self): 5 | iconImage = "rocket-15" 6 | return { 7 | "layout": {"icon-image": iconImage, "icon-size": 1.5}, 8 | "type": "symbol" 9 | } 10 | def doGetNextData(self): 11 | return "https://wanderdrone.appspot.com/" 12 | adapter = DroneStreamingAdapter() 13 | display(adapter) 14 | -------------------------------------------------------------------------------- /chapter 5/sampleCode9.py: -------------------------------------------------------------------------------- 1 | @route(topic="*",streampreview="*",schemaX="*") 2 | def showChart(self, schemaX): 3 | self.schemaX = schemaX 4 | self.avgChannelData = self.streamingData.getStreamingChannel(self.computeAverages) 5 | return """ 6 |
7 |
Real-time chart for {{this.schemaX}}(average).
8 |
9 | … 10 |
11 | """ 12 | -------------------------------------------------------------------------------- /chapter 6/Tensorflow VR Part 1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# TensorFlow Visual Recognition Sample Application Part 1\n", 8 | "## Define the model metadata" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 42, 14 | "metadata": { 15 | "pixiedust": { 16 | "displayParams": {} 17 | } 18 | }, 19 | "outputs": [], 20 | "source": [ 21 | "import tensorflow as tf\n", 22 | "import requests\n", 23 | "models = {\n", 24 | " \"mobilenet\": {\n", 25 | " \"base_url\":\"https://github.com/DTAIEB/Thoughtful-Data-Science/raw/master/chapter%206/Visual%20Recognition/mobilenet_v1_0.50_224\",\n", 26 | " \"model_file_url\": \"frozen_graph.pb\",\n", 27 | " \"label_file\": \"labels.txt\",\n", 28 | " \"output_layer\": \"MobilenetV1/Predictions/Softmax\"\n", 29 | " }\n", 30 | "}\n", 31 | "\n", 32 | "# helper method for reading attributes from the model metadata\n", 33 | "def get_model_attribute(model, key, default_value = None):\n", 34 | " if key not in model:\n", 35 | " if default_value is None:\n", 36 | " raise Exception(\"Require model attribute {} not found\".format(key))\n", 37 | " return default_value\n", 38 | " return model[key]" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "## Helper methods for loading the graph and labels for a given model" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 33, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "# Helper method for resolving url relative to the selected model\n", 55 | "def get_url(model, path):\n", 56 | " return model[\"base_url\"] + \"/\" + path\n", 57 | " \n", 58 | "# Download the serialized model and create a TensorFlow graph\n", 59 | "def load_graph(model):\n", 60 | " graph = tf.Graph()\n", 61 | " graph_def = tf.GraphDef()\n", 62 | " graph_def.ParseFromString(\n", 63 | " requests.get( get_url( model, model[\"model_file_url\"] ) ).content\n", 64 | " )\n", 65 | " with graph.as_default():\n", 66 | " tf.import_graph_def(graph_def)\n", 67 | " return graph\n", 68 | "\n", 69 | "# Load the labels\n", 70 | "def load_labels(model, as_json = False):\n", 71 | " labels = [line.rstrip() \\\n", 72 | " for line in requests.get( get_url( model, model[\"label_file\"] ) ).text.split(\"\\n\") \\\n", 73 | " if line != \"\"]\n", 74 | " if as_json:\n", 75 | " return [{\"index\": item.split(\":\")[0], \"label\" : item.split(\":\")[1]} for item in labels]\n", 76 | " return labels" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "## Use BeautifulSoup to scrape the images from a given url" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 34, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "from bs4 import BeautifulSoup as BS\n", 93 | "import re\n", 94 | "\n", 95 | "# return an array of all the images scraped from an html page\n", 96 | "def get_image_urls(url):\n", 97 | " # Instantiate a BeautifulSoup parser\n", 98 | " soup = BS(requests.get(url).text, \"html.parser\")\n", 99 | " \n", 100 | " # Local helper method for extracting url\n", 101 | " def extract_url(val):\n", 102 | " m = re.match(r\"url\\((.*)\\)\", val)\n", 103 | " val = m.group(1) if m is not None else val\n", 104 | " return \"http:\" + val if val.startswith(\"//\") else val\n", 105 | " \n", 106 | " # List comprehension that look for elements and backgroud-image styles\n", 107 | " return [extract_url(imgtag['src']) for imgtag in soup.find_all('img')] + [ \\\n", 108 | " extract_url(val.strip()) for key,val in \\\n", 109 | " [tuple(selector.split(\":\")) for elt in soup.select(\"[style]\") \\\n", 110 | " for selector in elt[\"style\"].strip(\" ;\").split(\";\")] \\\n", 111 | " if key.strip().lower()=='background-image' \\\n", 112 | " ]" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "## Helper method for downloading an image into a temp file" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 35, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "import tempfile\n", 129 | "def download_image(url):\n", 130 | " response = requests.get(url, stream=True)\n", 131 | " if response.status_code == 200:\n", 132 | " with tempfile.NamedTemporaryFile(delete=False) as f:\n", 133 | " for chunk in response.iter_content(2048):\n", 134 | " f.write(chunk)\n", 135 | " return f.name\n", 136 | " else:\n", 137 | " raise Exception(\"Unable to download image: {}\".format(response.status_code))" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "## Decode an image into a tensor" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 36, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "# decode a given image into a tensor\n", 154 | "def read_tensor_from_image_file(model, file_name):\n", 155 | " file_reader = tf.read_file(file_name, \"file_reader\")\n", 156 | " if file_name.endswith(\".png\"):\n", 157 | " image_reader = tf.image.decode_png(file_reader, channels = 3,name='png_reader')\n", 158 | " elif file_name.endswith(\".gif\"):\n", 159 | " image_reader = tf.squeeze(tf.image.decode_gif(file_reader,name='gif_reader'))\n", 160 | " elif file_name.endswith(\".bmp\"):\n", 161 | " image_reader = tf.image.decode_bmp(file_reader, name='bmp_reader')\n", 162 | " else:\n", 163 | " image_reader = tf.image.decode_jpeg(file_reader, channels = 3, name='jpeg_reader')\n", 164 | " float_caster = tf.cast(image_reader, tf.float32)\n", 165 | " dims_expander = tf.expand_dims(float_caster, 0);\n", 166 | " \n", 167 | " # Read some info from the model metadata, providing default values\n", 168 | " input_height = get_model_attribute(model, \"input_height\", 224)\n", 169 | " input_width = get_model_attribute(model, \"input_width\", 224)\n", 170 | " input_mean = get_model_attribute(model, \"input_mean\", 0)\n", 171 | " input_std = get_model_attribute(model, \"input_std\", 255)\n", 172 | " \n", 173 | " resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width])\n", 174 | " normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std])\n", 175 | " sess = tf.Session()\n", 176 | " result = sess.run(normalized)\n", 177 | " return result" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "## Score_image method that run the model and return the top 5 candidate answers" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 37, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "import numpy as np\n", 194 | "\n", 195 | "# classify an image given its url\n", 196 | "def score_image(graph, model, url):\n", 197 | " # Get the input and output layer from the model\n", 198 | " input_layer = get_model_attribute(model, \"input_layer\", \"input\")\n", 199 | " output_layer = get_model_attribute(model, \"output_layer\")\n", 200 | " \n", 201 | " # Download the image and build a tensor from its data\n", 202 | " t = read_tensor_from_image_file(model, download_image(url))\n", 203 | " \n", 204 | " # Retrieve the tensors corresponding to the input and output layers\n", 205 | " input_tensor = graph.get_tensor_by_name(\"import/\" + input_layer + \":0\");\n", 206 | " output_tensor = graph.get_tensor_by_name(\"import/\" + output_layer + \":0\");\n", 207 | "\n", 208 | " with tf.Session(graph=graph) as sess:\n", 209 | " # Execute the output, overriding the input tensor with the one corresponding\n", 210 | " # to the image in the feed_dict argument\n", 211 | " results = sess.run(output_tensor, {input_tensor: t})\n", 212 | " results = np.squeeze(results)\n", 213 | " # select the top 5 candidate and match them to the labels\n", 214 | " top_k = results.argsort()[-5:][::-1]\n", 215 | " labels = load_labels(model)\n", 216 | " return [(labels[i].split(\":\")[1], results[i]) for i in top_k]" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": {}, 222 | "source": [ 223 | "## Test the model using a Flickr page" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": 41, 229 | "metadata": {}, 230 | "outputs": [ 231 | { 232 | "name": "stdout", 233 | "output_type": "stream", 234 | "text": [ 235 | "Results for https://geo.yahoo.com/b?s=792600534: \n", 236 | "\t[('nail', 0.034935154), ('screw', 0.03144558), ('puck, hockey puck', 0.03032596), ('envelope', 0.0285034), ('Band Aid', 0.027891463)]\n", 237 | "Results for http://c1.staticflickr.com/6/5598/14934282524_344c84246b_n.jpg: \n", 238 | "\t[('Egyptian cat', 0.4644194), ('tiger cat', 0.1485573), ('tabby, tabby cat', 0.09759513), ('plastic bag', 0.03814263), ('Siamese cat, Siamese', 0.033892646)]\n", 239 | "Results for http://c1.staticflickr.com/4/3677/13545844805_170ec3746b_n.jpg: \n", 240 | "\t[('tabby, tabby cat', 0.7330132), ('Egyptian cat', 0.14256532), ('tiger cat', 0.11719289), ('plastic bag', 0.0028653105), ('bow tie, bow-tie, bowtie', 0.00082955)]\n", 241 | "Results for http://c1.staticflickr.com/6/5170/5372754294_db6acaa1e5_n.jpg: \n", 242 | "\t[('Persian cat', 0.607673), ('Angora, Angora rabbit', 0.20204937), ('hamster', 0.02988311), ('Egyptian cat', 0.027227053), ('lynx, catamount', 0.018035706)]\n", 243 | "Results for http://c1.staticflickr.com/6/5589/14818641818_b0058c0cfc_m.jpg: \n", 244 | "\t[('Egyptian cat', 0.5786173), ('tabby, tabby cat', 0.27942237), ('tiger cat', 0.11966114), ('lynx, catamount', 0.016066141), ('plastic bag', 0.002206809)]\n", 245 | "Results for http://c1.staticflickr.com/6/5036/5881933297_7974eaff82_n.jpg: \n", 246 | "\t[('tiger cat', 0.26617262), ('tabby, tabby cat', 0.2417825), ('Persian cat', 0.18471399), ('lynx, catamount', 0.11543496), ('Egyptian cat', 0.025188642)]\n", 247 | "Results for http://c1.staticflickr.com/3/2602/3977203168_b9d02a0233.jpg: \n", 248 | "\t[('tabby, tabby cat', 0.75482476), ('tiger cat', 0.13780454), ('Egyptian cat', 0.05675489), ('Siamese cat, Siamese', 0.02073992), ('lynx, catamount', 0.010187127)]\n", 249 | "Results for http://c1.staticflickr.com/8/7401/16393044637_72e93d96b6_n.jpg: \n", 250 | "\t[('Egyptian cat', 0.67294717), ('tiger cat', 0.18149199), ('tabby, tabby cat', 0.0952419), ('lynx, catamount', 0.025225954), ('candle, taper, wax light', 0.003860443)]\n", 251 | "Results for http://c1.staticflickr.com/9/8110/8594699278_dd256c10fd_m.jpg: \n", 252 | "\t[('tabby, tabby cat', 0.5829553), ('Egyptian cat', 0.15930973), ('tiger cat', 0.12964381), ('lynx, catamount', 0.11114485), ('plastic bag', 0.006467772)]\n", 253 | "Results for http://c1.staticflickr.com/8/7023/6581178955_7e23af8bf9_m.jpg: \n", 254 | "\t[('tabby, tabby cat', 0.28574014), ('Egyptian cat', 0.190615), ('plastic bag', 0.17165014), ('lynx, catamount', 0.101593874), ('tiger cat', 0.040527806)]\n", 255 | "Results for http://c1.staticflickr.com/8/7313/9775005856_9b5e0ebe16_n.jpg: \n", 256 | "\t[('tiger cat', 0.40977326), ('tabby, tabby cat', 0.31697693), ('Egyptian cat', 0.16972947), ('lynx, catamount', 0.059500016), ('washer, automatic washer, washing machine', 0.0046033794)]\n", 257 | "Results for http://c1.staticflickr.com/8/7496/16236770082_205f4e358f_n.jpg: \n", 258 | "\t[('Egyptian cat', 0.40310237), ('Siamese cat, Siamese', 0.23720524), ('tiger cat', 0.100198396), ('tabby, tabby cat', 0.08537914), ('plastic bag', 0.0352822)]\n", 259 | "Results for http://c1.staticflickr.com/8/7049/13244364473_7b71bc5a4f_n.jpg: \n", 260 | "\t[('Egyptian cat', 0.59387493), ('candle, taper, wax light', 0.057717346), ('paper towel', 0.046397187), ('plastic bag', 0.035106137), ('tabby, tabby cat', 0.018382242)]\n", 261 | "Results for http://c1.staticflickr.com/4/3753/9837176706_9ecc1cddac_n.jpg: \n", 262 | "\t[('tabby, tabby cat', 0.55699265), ('Egyptian cat', 0.19758604), ('tiger cat', 0.12088148), ('lynx, catamount', 0.057880934), ('plastic bag', 0.01653284)]\n", 263 | "Results for http://c1.staticflickr.com/4/3488/4051998735_5b4863ac11_m.jpg: \n", 264 | "\t[('Egyptian cat', 0.5310361), ('tabby, tabby cat', 0.26919606), ('tiger cat', 0.13531871), ('lynx, catamount', 0.050503224), ('washer, automatic washer, washing machine', 0.0053878534)]\n", 265 | "Results for http://c1.staticflickr.com/9/8335/8086459588_46aae939c8.jpg: \n", 266 | "\t[('Siamese cat, Siamese', 0.827261), ('mouse, computer mouse', 0.046974737), ('screen, CRT screen', 0.029382586), ('carton', 0.0076049017), ('lynx, catamount', 0.0067297667)]\n", 267 | "Results for http://c1.staticflickr.com/8/7472/16230028882_c03cd6f2cc_n.jpg: \n", 268 | "\t[('tiger cat', 0.5394526), ('lynx, catamount', 0.14366476), ('Egyptian cat', 0.10943988), ('red fox, Vulpes vulpes', 0.07641454), ('tabby, tabby cat', 0.034076575)]\n", 269 | "Results for http://c1.staticflickr.com/4/3940/15504684310_f555c88915_n.jpg: \n", 270 | "\t[('tabby, tabby cat', 0.49280357), ('Egyptian cat', 0.31668788), ('tiger cat', 0.12977621), ('lynx, catamount', 0.022205332), ('plastic bag', 0.008769177)]\n", 271 | "Results for http://c1.staticflickr.com/9/8630/16556634997_ef0f9dd5f1_n.jpg: \n", 272 | "\t[('West Highland white terrier', 0.8534684), ('Angora, Angora rabbit', 0.038167812), ('Samoyed, Samoyede', 0.024762549), ('Scotch terrier, Scottish terrier, Scottie', 0.01685713), ('Persian cat', 0.01484343)]\n", 273 | "Results for http://c1.staticflickr.com/6/5226/5674849391_824822628c_n.jpg: \n", 274 | "\t[('tiger cat', 0.45084468), ('tabby, tabby cat', 0.40245533), ('Egyptian cat', 0.11048719), ('lynx, catamount', 0.024745336), ('tiger, Panthera tigris', 0.0064596836)]\n", 275 | "Results for http://c1.staticflickr.com/3/2234/1704658865_3b982b56cf_m.jpg: \n", 276 | "\t[('Angora, Angora rabbit', 0.21852449), ('Egyptian cat', 0.19025268), ('tabby, tabby cat', 0.14283349), ('Persian cat', 0.085699804), ('tiger cat', 0.06147669)]\n", 277 | "Results for http://c1.staticflickr.com/2/1361/5110233061_aa3b1c47ef_n.jpg: \n", 278 | "\t[('tabby, tabby cat', 0.6095775), ('tiger cat', 0.24819912), ('Egyptian cat', 0.13453156), ('lynx, catamount', 0.0021140918), ('carton', 0.0015312452)]\n", 279 | "Results for http://c1.staticflickr.com/4/3294/2434900370_17c1221ccf_n.jpg: \n", 280 | "\t[('Egyptian cat', 0.4372107), ('tabby, tabby cat', 0.26445335), ('tiger cat', 0.13057052), ('bow tie, bow-tie, bowtie', 0.06754344), ('lynx, catamount', 0.037636597)]\n", 281 | "Results for http://c1.staticflickr.com/3/2858/12174748174_27491cde33_n.jpg: \n", 282 | "\t[('tiger cat', 0.4069278), ('tabby, tabby cat', 0.23834446), ('Egyptian cat', 0.23789576), ('lynx, catamount', 0.11284405), ('tiger, Panthera tigris', 0.0008611009)]\n", 283 | "Results for http://c1.staticflickr.com/4/3674/13336301695_1cab4f5c85_n.jpg: \n", 284 | "\t[('weasel', 0.25950897), ('black-footed ferret, ferret, Mustela nigripes', 0.1795659), ('polecat, fitch, foulmart, foumart, Mustela putorius', 0.15248777), ('mink', 0.07626065), ('Egyptian cat', 0.04768039)]\n" 285 | ] 286 | } 287 | ], 288 | "source": [ 289 | "model = models['mobilenet']\n", 290 | "graph = load_graph(model)\n", 291 | "image_urls = get_image_urls(\"https://www.flickr.com/search/?text=cats\")\n", 292 | "for url in image_urls:\n", 293 | " results = score_image(graph, model, url)\n", 294 | " print(\"Results for {}: \\n\\t{}\".format(url, results))" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": null, 300 | "metadata": {}, 301 | "outputs": [], 302 | "source": [] 303 | } 304 | ], 305 | "metadata": { 306 | "celltoolbar": "Edit Metadata", 307 | "kernelspec": { 308 | "display_name": "Python 3", 309 | "language": "python", 310 | "name": "python3" 311 | }, 312 | "language_info": { 313 | "codemirror_mode": { 314 | "name": "ipython", 315 | "version": 3 316 | }, 317 | "file_extension": ".py", 318 | "mimetype": "text/x-python", 319 | "name": "python", 320 | "nbconvert_exporter": "python", 321 | "pygments_lexer": "ipython3", 322 | "version": "3.5.4" 323 | } 324 | }, 325 | "nbformat": 4, 326 | "nbformat_minor": 2 327 | } 328 | -------------------------------------------------------------------------------- /chapter 6/Tensorflow VR Part 2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# TensorFlow Visual Recognition Sample Application Part 2\n", 8 | "\n", 9 | "## Provide a User Interface with a PixieApp\n", 10 | "\n", 11 | "## Define the model metadata" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": { 18 | "pixiedust": { 19 | "displayParams": {} 20 | } 21 | }, 22 | "outputs": [ 23 | { 24 | "name": "stderr", 25 | "output_type": "stream", 26 | "text": [ 27 | "/Users/dtaieb/anaconda/envs/dashboard/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: compiletime version 3.6 of module 'tensorflow.python.framework.fast_tensor_util' does not match runtime version 3.5\n", 28 | " return f(*args, **kwds)\n" 29 | ] 30 | } 31 | ], 32 | "source": [ 33 | "import tensorflow as tf\n", 34 | "import requests\n", 35 | "models = {\n", 36 | " \"mobilenet\": {\n", 37 | " \"base_url\":\"https://github.com/DTAIEB/Thoughtful-Data-Science/raw/master/chapter%206/Visual%20Recognition/mobilenet_v1_0.50_224\",\n", 38 | " \"model_file_url\": \"frozen_graph.pb\",\n", 39 | " \"label_file\": \"labels.txt\",\n", 40 | " \"output_layer\": \"MobilenetV1/Predictions/Softmax\"\n", 41 | " }\n", 42 | "}\n", 43 | "\n", 44 | "# helper method for reading attributes from the model metadata\n", 45 | "def get_model_attribute(model, key, default_value = None):\n", 46 | " if key not in model:\n", 47 | " if default_value is None:\n", 48 | " raise Exception(\"Require model attribute {} not found\".format(key))\n", 49 | " return default_value\n", 50 | " return model[key]" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "## Helper methods for loading the graph and labels for a given model" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 2, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "# Helper method for resolving url relative to the selected model\n", 67 | "def get_url(model, path):\n", 68 | " return model[\"base_url\"] + \"/\" + path\n", 69 | " \n", 70 | "# Download the serialized model and create a TensorFlow graph\n", 71 | "def load_graph(model):\n", 72 | " graph = tf.Graph()\n", 73 | " graph_def = tf.GraphDef()\n", 74 | " graph_def.ParseFromString(\n", 75 | " requests.get( get_url( model, model[\"model_file_url\"] ) ).content\n", 76 | " )\n", 77 | " with graph.as_default():\n", 78 | " tf.import_graph_def(graph_def)\n", 79 | " return graph\n", 80 | "\n", 81 | "# Load the labels\n", 82 | "def load_labels(model, as_json = False):\n", 83 | " labels = [line.rstrip() \\\n", 84 | " for line in requests.get( get_url( model, model[\"label_file\"] ) ).text.split(\"\\n\") \\\n", 85 | " if line != \"\"]\n", 86 | " if as_json:\n", 87 | " return [{\"index\": item.split(\":\")[0], \"label\" : item.split(\":\")[1]} for item in labels]\n", 88 | " return labels" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "## Use BeautifulSoup to scrape the images from a given url" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 3, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "from bs4 import BeautifulSoup as BS\n", 105 | "import re\n", 106 | "\n", 107 | "# return an array of all the images scraped from an html page\n", 108 | "def get_image_urls(url):\n", 109 | " # Instantiate a BeautifulSoup parser\n", 110 | " soup = BS(requests.get(url).text, \"html.parser\")\n", 111 | " \n", 112 | " # Local helper method for extracting url\n", 113 | " def extract_url(val):\n", 114 | " m = re.match(r\"url\\((.*)\\)\", val)\n", 115 | " val = m.group(1) if m is not None else val\n", 116 | " return \"http:\" + val if val.startswith(\"//\") else val\n", 117 | " \n", 118 | " # List comprehension that look for elements and backgroud-image styles\n", 119 | " return [extract_url(imgtag['src']) for imgtag in soup.find_all('img')] + [ \\\n", 120 | " extract_url(val.strip()) for key,val in \\\n", 121 | " [tuple(selector.split(\":\")) for elt in soup.select(\"[style]\") \\\n", 122 | " for selector in elt[\"style\"].strip(\" ;\").split(\";\")] \\\n", 123 | " if key.strip().lower()=='background-image' \\\n", 124 | " ]" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "## Helper method for downloading an image into a temp file" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 4, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "import tempfile\n", 141 | "def download_image(url):\n", 142 | " response = requests.get(url, stream=True)\n", 143 | " if response.status_code == 200:\n", 144 | " with tempfile.NamedTemporaryFile(delete=False) as f:\n", 145 | " for chunk in response.iter_content(2048):\n", 146 | " f.write(chunk)\n", 147 | " return f.name\n", 148 | " else:\n", 149 | " raise Exception(\"Unable to download image: {}\".format(response.status_code))" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "## Decode an image into a tensor" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 5, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "# decode a given image into a tensor\n", 166 | "def read_tensor_from_image_file(model, file_name):\n", 167 | " file_reader = tf.read_file(file_name, \"file_reader\")\n", 168 | " if file_name.endswith(\".png\"):\n", 169 | " image_reader = tf.image.decode_png(file_reader, channels = 3,name='png_reader')\n", 170 | " elif file_name.endswith(\".gif\"):\n", 171 | " image_reader = tf.squeeze(tf.image.decode_gif(file_reader,name='gif_reader'))\n", 172 | " elif file_name.endswith(\".bmp\"):\n", 173 | " image_reader = tf.image.decode_bmp(file_reader, name='bmp_reader')\n", 174 | " else:\n", 175 | " image_reader = tf.image.decode_jpeg(file_reader, channels = 3, name='jpeg_reader')\n", 176 | " float_caster = tf.cast(image_reader, tf.float32)\n", 177 | " dims_expander = tf.expand_dims(float_caster, 0);\n", 178 | " \n", 179 | " # Read some info from the model metadata, providing default values\n", 180 | " input_height = get_model_attribute(model, \"input_height\", 224)\n", 181 | " input_width = get_model_attribute(model, \"input_width\", 224)\n", 182 | " input_mean = get_model_attribute(model, \"input_mean\", 0)\n", 183 | " input_std = get_model_attribute(model, \"input_std\", 255)\n", 184 | " \n", 185 | " resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width])\n", 186 | " normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std])\n", 187 | " sess = tf.Session()\n", 188 | " result = sess.run(normalized)\n", 189 | " return result" 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "metadata": {}, 195 | "source": [ 196 | "## Score_image method that run the model and return the top 5 candidate answers" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 6, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "import numpy as np\n", 206 | "\n", 207 | "# classify an image given its url\n", 208 | "def score_image(graph, model, url):\n", 209 | " # Get the input and output layer from the model\n", 210 | " input_layer = get_model_attribute(model, \"input_layer\", \"input\")\n", 211 | " output_layer = get_model_attribute(model, \"output_layer\")\n", 212 | " \n", 213 | " # Download the image and build a tensor from its data\n", 214 | " t = read_tensor_from_image_file(model, download_image(url))\n", 215 | " \n", 216 | " # Retrieve the tensors corresponding to the input and output layers\n", 217 | " input_tensor = graph.get_tensor_by_name(\"import/\" + input_layer + \":0\");\n", 218 | " output_tensor = graph.get_tensor_by_name(\"import/\" + output_layer + \":0\");\n", 219 | "\n", 220 | " with tf.Session(graph=graph) as sess:\n", 221 | " # Execute the output, overriding the input tensor with the one corresponding\n", 222 | " # to the image in the feed_dict argument\n", 223 | " results = sess.run(output_tensor, {input_tensor: t})\n", 224 | " results = np.squeeze(results)\n", 225 | " # select the top 5 candidate and match them to the labels\n", 226 | " top_k = results.argsort()[-5:][::-1]\n", 227 | " labels = load_labels(model)\n", 228 | " return [(labels[i].split(\":\")[1], results[i]) for i in top_k]" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "## PixieApp with the following screens:\n", 236 | "1. Ask the user for a url to a web page\n", 237 | "2. Display the images with top 5 candidate classifications" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 9, 243 | "metadata": { 244 | "pixiedust": { 245 | "displayParams": {} 246 | } 247 | }, 248 | "outputs": [ 249 | { 250 | "data": { 251 | "text/html": [ 252 | "
Hey, there's something awesome here! To see it, open this notebook outside GitHub, in a viewer like Jupyter
" 253 | ], 254 | "text/plain": [ 255 | "" 256 | ] 257 | }, 258 | "metadata": { 259 | "pixieapp_metadata": null 260 | }, 261 | "output_type": "display_data" 262 | } 263 | ], 264 | "source": [ 265 | "from pixiedust.display.app import *\n", 266 | "\n", 267 | "@PixieApp\n", 268 | "class ScoreImageApp():\n", 269 | " def setup(self):\n", 270 | " self.model = models[\"mobilenet\"]\n", 271 | " self.graph = load_graph( self.model )\n", 272 | "\n", 273 | " @route()\n", 274 | " def main_screen(self):\n", 275 | " return \"\"\"\n", 276 | "\n", 284 | "
\n", 285 | "
\n", 286 | "
\n", 287 | "
\n", 288 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | "
\n", 295 | "
\n", 296 | "
\n", 297 | "\"\"\"\n", 298 | " \n", 299 | " @route(image_url=\"*\")\n", 300 | " @templateArgs\n", 301 | " def do_process_url(self, image_url):\n", 302 | " image_urls = get_image_urls(image_url)\n", 303 | " return \"\"\"\n", 304 | "
\n", 305 | "{%for url in image_urls%}\n", 306 | "
\n", 307 | "\n", 308 | "
\n", 309 | "
\n", 310 | "{%endfor%}\n", 311 | "

\n", 312 | "

\n", 313 | " \"\"\"\n", 314 | " \n", 315 | " @route(score_url=\"*\")\n", 316 | " @templateArgs\n", 317 | " def do_score_url(self, score_url):\n", 318 | " results = score_image(self.graph, self.model, score_url)\n", 319 | " return \"\"\"\n", 320 | "
    \n", 321 | "{%for label, confidence in results%}\n", 322 | "
  • {{label}}: {{confidence}}
  • \n", 323 | "{%endfor%}\n", 324 | "
\n", 325 | "\"\"\"\n", 326 | " \n", 327 | "app = ScoreImageApp()\n", 328 | "app.run()" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": null, 334 | "metadata": {}, 335 | "outputs": [], 336 | "source": [] 337 | } 338 | ], 339 | "metadata": { 340 | "celltoolbar": "Edit Metadata", 341 | "kernelspec": { 342 | "display_name": "Python 3", 343 | "language": "python", 344 | "name": "python3" 345 | }, 346 | "language_info": { 347 | "codemirror_mode": { 348 | "name": "ipython", 349 | "version": 3 350 | }, 351 | "file_extension": ".py", 352 | "mimetype": "text/x-python", 353 | "name": "python", 354 | "nbconvert_exporter": "python", 355 | "pygments_lexer": "ipython3", 356 | "version": "3.5.4" 357 | } 358 | }, 359 | "nbformat": 4, 360 | "nbformat_minor": 2 361 | } 362 | -------------------------------------------------------------------------------- /chapter 6/Visual Recognition/mobilenet_v1_0.50_224/frozen_graph.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DTAIEB/Thoughtful-Data-Science/8b80e8f3e33b6fdc6672ecee1f27e0b983b28241/chapter 6/Visual Recognition/mobilenet_v1_0.50_224/frozen_graph.pb -------------------------------------------------------------------------------- /chapter 6/Visual Recognition/mobilenet_v1_0.50_224/quantized_graph.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DTAIEB/Thoughtful-Data-Science/8b80e8f3e33b6fdc6672ecee1f27e0b983b28241/chapter 6/Visual Recognition/mobilenet_v1_0.50_224/quantized_graph.pb -------------------------------------------------------------------------------- /chapter 6/sampleCode1.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | x_input = tf.placeholder(tf.float32) 3 | y_output = tf.placeholder(tf.float32) 4 | eps = 0.01 5 | W1 = tf.Variable(tf.random_uniform([2,2], -eps, eps)) 6 | W2 = tf.Variable(tf.random_uniform([2,1], -eps, eps)) 7 | layer1 = tf.sigmoid(tf.matmul(x_input, W1)) 8 | output_layer = tf.sigmoid(tf.matmul(layer1, W2)) 9 | cost = tf.reduce_mean(tf.square(y_output - output_layer)) 10 | train = tf.train.GradientDescentOptimizer(0.05).minimize(cost) 11 | training_data = ([[0,0],[0,1],[1,0],[1,1]], [[0],[1],[1],[0]]) 12 | with tf.Session() as sess: 13 | sess.run(tf.global_variables_initializer()) 14 | for i in range(5000): 15 | sess.run(train, feed_dict={x_input: training_data[0], y_output: training_data[1]}) 16 | -------------------------------------------------------------------------------- /chapter 6/sampleCode10.py: -------------------------------------------------------------------------------- 1 | # Load the labels 2 | def load_labels(model, as_json = False): 3 | labels = [line.rstrip() \ 4 | for line in requests.get(get_url(model, model["label_file"]) ).text.split("\n") if line != ""] 5 | if as_json: 6 | return [{"index": item.split(":")[0],"label":item.split(":")[1]} for item in labels] 7 | return labels 8 | -------------------------------------------------------------------------------- /chapter 6/sampleCode11.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup as BS 2 | import re 3 | 4 | # return an array of all the images scraped from an html page 5 | def get_image_urls(url): 6 | # Instantiate a BeautifulSoup parser 7 | soup = BS(requests.get(url).text, "html.parser") 8 | 9 | # Local helper method for extracting url 10 | def extract_url(val): 11 | m = re.match(r"url\((.*)\)", val) 12 | val = m.group(1) if m is not None else val 13 | return "http:" + val if val.startswith("//") else val 14 | 15 | # List comprehension that look for elements and backgroud-image styles 16 | return [extract_url(imgtag['src']) for imgtag in soup.find_all('img')] + [ \ 17 | extract_url(val.strip()) for key,val in \ 18 | [tuple(selector.split(":")) for elt in soup.select("[style]") \ 19 | for selector in elt["style"].strip(" ;").split(";")] \ 20 | if key.strip().lower()=='background-image' \ 21 | ] 22 | -------------------------------------------------------------------------------- /chapter 6/sampleCode12.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | def download_image(url): 3 | response = requests.get(url, stream=True) 4 | if response.status_code == 200: 5 | with tempfile.NamedTemporaryFile(delete=False) as f: 6 | for chunk in response.iter_content(2048): 7 | f.write(chunk) 8 | return f.name 9 | else: 10 | raise Exception("Unable to download image: {}".format(response.status_code)) 11 | -------------------------------------------------------------------------------- /chapter 6/sampleCode13.py: -------------------------------------------------------------------------------- 1 | # decode a given image into a tensor 2 | def read_tensor_from_image_file(model, file_name): 3 | file_reader = tf.read_file(file_name, "file_reader") 4 | if file_name.endswith(".png"): 5 | image_reader = tf.image.decode_png(file_reader, channels = 3,name='png_reader') 6 | elif file_name.endswith(".gif"): 7 | image_reader = tf.squeeze(tf.image.decode_gif(file_reader,name='gif_reader')) 8 | elif file_name.endswith(".bmp"): 9 | image_reader = tf.image.decode_bmp(file_reader, name='bmp_reader') 10 | else: 11 | image_reader = tf.image.decode_jpeg(file_reader, channels = 3, name='jpeg_reader') 12 | float_caster = tf.cast(image_reader, tf.float32) 13 | dims_expander = tf.expand_dims(float_caster, 0); 14 | 15 | # Read some info from the model metadata, providing default values 16 | input_height = get_model_attribute(model, "input_height", 224) 17 | input_width = get_model_attribute(model, "input_width", 224) 18 | input_mean = get_model_attribute(model, "input_mean", 0) 19 | input_std = get_model_attribute(model, "input_std", 255) 20 | 21 | resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width]) 22 | normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std]) 23 | sess = tf.Session() 24 | result = sess.run(normalized) 25 | return result 26 | -------------------------------------------------------------------------------- /chapter 6/sampleCode14.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # classify an image given its url 4 | def score_image(graph, model, url): 5 | # Get the input and output layer from the model 6 | input_layer = get_model_attribute(model, "input_layer", "input") 7 | output_layer = get_model_attribute(model, "output_layer") 8 | 9 | # Download the image and build a tensor from its data 10 | t = read_tensor_from_image_file(model, download_image(url)) 11 | 12 | # Retrieve the tensors corresponding to the input and output layers 13 | input_tensor = graph.get_tensor_by_name("import/" + input_layer + ":0"); 14 | output_tensor = graph.get_tensor_by_name("import/" + output_layer + ":0"); 15 | 16 | with tf.Session(graph=graph) as sess: 17 | results = sess.run(output_tensor, {input_tensor: t}) 18 | results = np.squeeze(results) 19 | # select the top 5 candidate and match them to the labels 20 | top_k = results.argsort()[-5:][::-1] 21 | labels = load_labels(model) 22 | return [(labels[i].split(":")[1], results[i]) for i in top_k] 23 | -------------------------------------------------------------------------------- /chapter 6/sampleCode15.py: -------------------------------------------------------------------------------- 1 | model = models['mobilenet'] 2 | graph = load_graph(model) 3 | image_urls = get_image_urls("https://www.flickr.com/search/?text=cats") 4 | for url in image_urls: 5 | results = score_image(graph, model, url) 6 | print("Result for {}: \n\t{}".format(url, results)) 7 | -------------------------------------------------------------------------------- /chapter 6/sampleCode16.py: -------------------------------------------------------------------------------- 1 | from pixiedust.display.app import * 2 | 3 | @PixieApp 4 | class ScoreImageApp(): 5 | def setup(self): 6 | self.model = models["mobilenet"] 7 | self.graph = load_graph( self.model ) 8 | … 9 | -------------------------------------------------------------------------------- /chapter 6/sampleCode17.py: -------------------------------------------------------------------------------- 1 | [[ScoreImageApp]] 2 | @route() 3 | def main_screen(self): 4 | return """ 5 | 13 |
14 |
15 |
16 |
17 | 20 | 21 | 22 | 23 |
24 |
25 |
26 | """ 27 | -------------------------------------------------------------------------------- /chapter 6/sampleCode18.py: -------------------------------------------------------------------------------- 1 | [[ScoreImageApp]] 2 | @route(image_url="*") 3 | @templateArgs 4 | def do_process_url(self, image_url): 5 | image_urls = get_image_urls(image_url) 6 | return """ 7 |
8 | {%for url in image_urls%} 9 |
10 | 11 |
12 |
13 |
14 | {%endfor%} 15 |

16 |

17 | """ 18 | -------------------------------------------------------------------------------- /chapter 6/sampleCode19.py: -------------------------------------------------------------------------------- 1 | [[ScoreImageApp]] 2 | @route(score_url="*") 3 | @templateArgs 4 | def do_score_url(self, score_url): 5 | results = score_image(self.graph, self.model, score_url) 6 | return """ 7 |
    8 | {%for label, confidence in results%} 9 |
  • {{label}}: {{confidence}}
  • 10 | {%endfor%} 11 |
12 | """ 13 | -------------------------------------------------------------------------------- /chapter 6/sampleCode2.py: -------------------------------------------------------------------------------- 1 | def do_training(train, train_labels, test, test_labels, num_classes): 2 | #set TensorFlow logging level to INFO 3 | tf.logging.set_verbosity(tf.logging.INFO) 4 | 5 | # Build 2 hidden layer DNN with 10, 10 units respectively. 6 | classifier = tf.estimator.DNNClassifier( 7 | # Compute feature_columns from dataframe keys using list comprehension 8 | feature_columns = 9 | [tf.feature_column.numeric_column(key=key) for key in train.keys()], 10 | hidden_units=[10, 10], 11 | n_classes=num_classes) 12 | 13 | # Train the Model 14 | classifier.train( 15 | input_fn=lambda:train_input_fn(train, train_labels,100), 16 | steps=1000 17 | ) 18 | 19 | # Evaluate the model 20 | eval_result = classifier.evaluate( 21 | input_fn=lambda:eval_input_fn(test, test_labels,100) 22 | ) 23 | 24 | return (classifier, eval_result) 25 | -------------------------------------------------------------------------------- /chapter 6/sampleCode20.py: -------------------------------------------------------------------------------- 1 | [[ImageRecoApp]] 2 | from pixiedust.apps.template import TemplateTabbedApp 3 | @PixieApp 4 | class ImageRecoApp(TemplateTabbedApp): 5 | def setup(self): 6 | self.apps = [ 7 | {"title": "Score", "app_class": "ScoreImageApp"}, 8 | {"title": "Model", "app_class": "TensorGraphApp"}, 9 | {"title": "Labels", "app_class": "LabelsApp"} 10 | ] 11 | self.model = models["mobilenet"] 12 | self.graph = self.load_graph(self.model) 13 | 14 | app = ImageRecoApp() 15 | app.run() 16 | -------------------------------------------------------------------------------- /chapter 6/sampleCode21.py: -------------------------------------------------------------------------------- 1 | @PixieApp 2 | class TensorGraphApp(): 3 | """Visualize TensorFlow graph.""" 4 | def setup(self): 5 | self.graph = self.parent_pixieapp.graph 6 | 7 | @route() 8 | @templateArgs 9 | def main_screen(self): 10 | strip_def = self.strip_consts(self.graph.as_graph_def()) 11 | code = """ 12 | 17 | 18 |
19 | 20 |
21 | """.format(data=repr(str(strip_def)), id='graph'+ self.getPrefix()).replace('"', '"') 22 | 23 | return """ 24 | 25 | """ 26 | 27 | def strip_consts(self, graph_def, max_const_size=32): 28 | """Strip large constant values from graph_def.""" 29 | strip_def = tf.GraphDef() 30 | for n0 in graph_def.node: 31 | n = strip_def.node.add() 32 | n.MergeFrom(n0) 33 | if n.op == 'Const': 34 | tensor = n.attr['value'].tensor 35 | size = len(tensor.tensor_content) 36 | if size > max_const_size: 37 | tensor.tensor_content = "".format(size).encode("UTF-8") 38 | return strip_def 39 | -------------------------------------------------------------------------------- /chapter 6/sampleCode22.py: -------------------------------------------------------------------------------- 1 | [[LabelsApp]] 2 | @PixieApp 3 | class LabelsApp(): 4 | def setup(self): 5 | self.labels = self.parent_pixieapp.load_labels( 6 | self.parent_pixieapp.model, as_json=True 7 | ) 8 | 9 | @route() 10 | def main_screen(self): 11 | return """ 12 |
13 | 14 | { 15 | "table_noschema": "true", 16 | "handlerId": "tableView", 17 | "rowCount": "10000" 18 | } 19 | 20 |
21 | """ 22 | -------------------------------------------------------------------------------- /chapter 6/sampleCode23.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | wnid_to_urls = pandas.read_csv('/Users/dtaieb/Downloads/fall11_urls.txt', sep='\t', names=["wnid", "url"], 3 | header=0, error_bad_lines=False, warn_bad_lines=False, encoding="ISO-8859-1") 4 | wnid_to_urls['wnid'] = wnid_to_urls['wnid'].apply(lambda x: x.split("_")[0]) 5 | wnid_to_urls = wnid_to_urls.dropna() 6 | 7 | wnid_to_words = pandas.read_csv('/Users/dtaieb/Downloads/words.txt', sep='\t', names=["wnid", "description"], 8 | header=0, error_bad_lines=False, warn_bad_lines=False, encoding="ISO-8859-1") 9 | wnid_to_words = wnid_to_words.dropna() 10 | -------------------------------------------------------------------------------- /chapter 6/sampleCode24.py: -------------------------------------------------------------------------------- 1 | def get_url_for_keywords(keywords): 2 | results = {} 3 | for keyword in keywords: 4 | df = wnid_to_words.loc[wnid_to_words['description'] == keyword] 5 | row_list = df['wnid'].values.tolist() 6 | descriptions = df['description'].values.tolist() 7 | if len(row_list) > 0: 8 | results[descriptions[0]] = wnid_to_urls.loc[wnid_to_urls['wnid'] == row_list[0]]["url"].values.tolist() 9 | return results 10 | -------------------------------------------------------------------------------- /chapter 6/sampleCode25.py: -------------------------------------------------------------------------------- 1 | from pixiedust.utils.environment import Environment 2 | root_dir = ensure_dir_exists(os.path.join(Environment.pixiedustHome, "imageRecoApp") 3 | image_dir = root_dir 4 | image_dict = get_url_for_keywords(["apple", "orange", "pear", "banana"]) 5 | with open(os.path.join(image_dir, "retrained_label.txt"), "w") as f_label: 6 | for key in image_dict: 7 | f_label.write(key + "\n") 8 | path = ensure_dir_exists(os.path.join(image_dir, key)) 9 | count = 0 10 | for url in image_dict[key]: 11 | download_image_into_dir(url, path) 12 | count += 1 13 | if count > 500: 14 | break; 15 | -------------------------------------------------------------------------------- /chapter 6/sampleCode26.py: -------------------------------------------------------------------------------- 1 | def add_jpeg_decoding(model): 2 | input_height = get_model_attribute(model, "input_height") 3 | input_width = get_model_attribute(model, "input_width") 4 | input_depth = get_model_attribute(model, "input_depth") 5 | input_mean = get_model_attribute(model, "input_mean", 0) 6 | input_std = get_model_attribute(model, "input_std", 255) 7 | 8 | jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput') 9 | decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth) 10 | decoded_image_as_float = tf.cast(decoded_image, dtype=tf.float32) 11 | decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) 12 | resize_shape = tf.stack([input_height, input_width]) 13 | resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32) 14 | resized_image = tf.image.resize_bilinear(decoded_image_4d, 15 | resize_shape_as_int) 16 | offset_image = tf.subtract(resized_image, input_mean) 17 | mul_image = tf.multiply(offset_image, 1.0 / input_std) 18 | return jpeg_data, mul_image 19 | -------------------------------------------------------------------------------- /chapter 6/sampleCode27.py: -------------------------------------------------------------------------------- 1 | def run_bottleneck_on_image(sess, image_data, image_data_tensor,decoded_image_tensor, 2 | resized_input_tensor,bottleneck_tensor): 3 | # First decode the JPEG image, resize it, and rescale the pixel values. 4 | resized_input_values = sess.run(decoded_image_tensor,{image_data_tensor: image_data}) 5 | # Then run it through the recognition network. 6 | bottleneck_values = sess.run(bottleneck_tensor,{resized_input_tensor: resized_input_values}) 7 | bottleneck_values = np.squeeze(bottleneck_values) 8 | return bottleneck_values 9 | -------------------------------------------------------------------------------- /chapter 6/sampleCode28.py: -------------------------------------------------------------------------------- 1 | [[TensorGraphApp]] 2 | return """ 3 | {%if this.custom_graph%} 4 |
5 | 6 | self.graph = self.custom_graph if self.graph is not self.custom_graph else self.parent_pixieapp.graph 7 | 8 | Select a model to display: 9 | 13 | {%endif%} 14 | 15 | """ 16 | -------------------------------------------------------------------------------- /chapter 6/sampleCode29.py: -------------------------------------------------------------------------------- 1 | with tf.name_scope('cross_entropy'): 2 | cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=ground_truth_input, logits=logits) 3 | with tf.name_scope('total'): 4 | cross_entropy_mean = tf.reduce_mean(cross_entropy) 5 | -------------------------------------------------------------------------------- /chapter 6/sampleCode3.py: -------------------------------------------------------------------------------- 1 | def input_fn(features, labels, batch_size, train): 2 | # Convert the inputs to a Dataset and shuffle. 3 | dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels)).shuffle(1000) 4 | if train: 5 | #repeat only for training 6 | dataset = dataset.repeat() 7 | # Return the dataset in batch 8 | return dataset.batch(batch_size) 9 | 10 | def train_input_fn(features, labels, batch_size): 11 | return input_fn(features, labels, batch_size, train=True) 12 | 13 | def eval_input_fn(features, labels, batch_size): 14 | return input_fn(features, labels, batch_size, train=False) 15 | -------------------------------------------------------------------------------- /chapter 6/sampleCode30.py: -------------------------------------------------------------------------------- 1 | [[LabelsApp]] 2 | @PixieApp 3 | class LabelsApp(): 4 | def setup(self): 5 | … 6 | 7 | @route() 8 | def main_screen(self): 9 | return """ 10 | {%if this.custom_labels%} 11 |
12 | 13 | self.current_labels = self.custom_labels if self.current_labels is not self.custom_labels else self.labels 14 | 15 | Select a model to display: 16 | 20 | {%endif%} 21 |
22 | 23 | { 24 | "table_noschema": "true", 25 | "handlerId": "tableView", 26 | "rowCount": "10000", 27 | "noChartCache": "true" 28 | 29 | } 30 | 31 |
32 | """ 33 | -------------------------------------------------------------------------------- /chapter 6/sampleCode31.py: -------------------------------------------------------------------------------- 1 | # classify an image given its url 2 | def score_image(graph, model, url): 3 | # Download the image and build a tensor from its data 4 | t = read_tensor_from_image_file(model, download_image(url)) 5 | 6 | def do_score_image(graph, output_layer, labels): 7 | # Retrieve the tensors corresponding to the input and output layers 8 | input_tensor = graph.get_tensor_by_name("import/" + input_layer + ":0"); 9 | output_tensor = graph.get_tensor_by_name( output_layer + ":0"); 10 | 11 | with tf.Session(graph=graph) as sess: 12 | # Initialize the variables 13 | sess.run(tf.global_variables_initializer()) 14 | results = sess.run(output_tensor, {input_tensor: t}) 15 | results = np.squeeze(results) 16 | # select the top 5 candidates and match them to the labels 17 | top_k = results.argsort()[-5:][::-1] 18 | return [(labels[i].split(":")[1], results[i]) for i in top_k] 19 | 20 | results = {} 21 | input_layer = get_model_attribute(model, "input_layer", "input") 22 | labels = load_labels(model) 23 | results["mobilenet"] = do_score_image(graph, "import/" + get_model_attribute(model, "output_layer"), labels) 24 | if "custom_graph" in model and "custom_labels" in model: 25 | with open(model["custom_labels"]) as f: 26 | labels = [line.rstrip() for line in f.readlines() if line != ""] 27 | custom_labels = ["{}:{}".format(i, label) for i,label in zip(range(len(labels)), labels)] 28 | results["custom"] = do_score_image(model["custom_graph"], "final_result", custom_labels) 29 | return results 30 | -------------------------------------------------------------------------------- /chapter 6/sampleCode32.py: -------------------------------------------------------------------------------- 1 | @route(score_url="*") 2 | @templateArgs 3 | def do_score_url(self, score_url): 4 | scores_dict = score_image(self.graph, self.model, score_url) 5 | return """ 6 | {%for model, results in scores_dict.items()%} 7 |
{{model}}
8 |
    9 | {%for label, confidence in results%} 10 |
  • {{label}}: {{confidence}}
  • 11 | {%endfor%} 12 |
13 | {%endfor%} 14 | """ 15 | -------------------------------------------------------------------------------- /chapter 6/sampleCode4.py: -------------------------------------------------------------------------------- 1 | from pixiedust.display.app import * 2 | @PixieApp 3 | class SimpleClassificationDNN(): 4 | @route() 5 | def main_screen(self): 6 | return """ 7 |

8 |
The classificiation model will be trained on all the numeric columns of the dataset
9 |

10 | 18 |
19 |
20 |
21 |
22 | 28 |
29 |
30 |
31 | """ 32 | -------------------------------------------------------------------------------- /chapter 6/sampleCode5.py: -------------------------------------------------------------------------------- 1 | @route(predictor="*") 2 | @templateArgs 3 | def prepare_training(self, predictor): 4 | #select only numerical columns 5 | self.dataset = self.pixieapp_entity.dropna(axis=1).select_dtypes( 6 | include=['int16', 'int32', 'int64', 'float16', 'float32', 'float64'] 7 | ) 8 | #Compute the number of classed by counting the groups 9 | self.num_classes = self.dataset.groupby(predictor).size().shape[0] 10 | #Create the train and test feature and labels 11 | self.train_x=self.dataset.sample(frac=0.8) 12 | self.full_train = self.train_x.copy() 13 | self.train_y = self.train_x.pop(predictor) 14 | self.test_x=self.dataset.drop(self.train_x.index) 15 | self.full_test = self.test_x.copy() 16 | self.test_y=self.test_x.pop(predictor) 17 | 18 | bar_chart_options = { 19 | "rowCount": "100", 20 | "keyFields": predictor, 21 | "handlerId": "barChart", 22 | "noChartCache": "true" 23 | } 24 | 25 | return """ 26 |
27 |
28 |
29 |

Train set class distribution

30 |
31 | {{bar_chart_options|tojson}} 32 |
33 |
34 |
35 |

Test set class distribution

36 |
37 | {{bar_chart_options|tojson}} 38 |
39 |
40 |
41 |
42 | 43 |
44 | 47 |
48 | """ 49 | -------------------------------------------------------------------------------- /chapter 6/sampleCode6.py: -------------------------------------------------------------------------------- 1 | @route(do_training="*") 2 | @captureOutput 3 | def do_training_screen(self): 4 | self.classifier, self.eval_results = \ 5 | do_training( 6 | self.train_x, self.train_y, self.test_x, self.test_y, self.num_classes 7 | ) 8 | return """ 9 |

Training completed successfully

10 | 11 | 12 | 13 | 14 | 15 | 16 | {%for key,value in this.eval_results.items()%} 17 | 18 | 19 | 20 | 21 | {%endfor%} 22 | 23 |
MetricValue
{{key}}{{value}}
24 | """ 25 | -------------------------------------------------------------------------------- /chapter 6/sampleCode7.py: -------------------------------------------------------------------------------- 1 | models = { 2 | "mobilenet": { 3 | "base_url":"https://github.com/DTAIEB/Thoughtful-Data-Science/raw/master/chapter%206/Visual%20Recognition/mobilenet_v1_0.50_224", 4 | "model_file_url": "frozen_graph.pb", 5 | "label_file": "labels.txt", 6 | "output_layer": "MobilenetV1/Predictions/Softmax" 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /chapter 6/sampleCode8.py: -------------------------------------------------------------------------------- 1 | # helper method for reading attributes from the model metadata 2 | def get_model_attribute(model, key, default_value = None): 3 | if key not in model: 4 | if default_value is None: 5 | raise Exception("Require model attribute {} not found".format(key)) 6 | return default_value 7 | return model[key] 8 | -------------------------------------------------------------------------------- /chapter 6/sampleCode9.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import requests 3 | # Helper method for resolving url relative to the selected model 4 | def get_url(model, path): 5 | return model["base_url"] + "/" + path 6 | 7 | # Download the serialized model and create a TensorFlow graph 8 | def load_graph(model): 9 | graph = tf.Graph() 10 | graph_def = tf.GraphDef() 11 | graph_def.ParseFromString( 12 | requests.get( get_url( model, model["model_file_url"] ) ).content 13 | ) 14 | with graph.as_default(): 15 | tf.import_graph_def(graph_def) 16 | return graph 17 | -------------------------------------------------------------------------------- /chapter 7/sampleCode1.py: -------------------------------------------------------------------------------- 1 | from six import iteritems 2 | import json 3 | import csv 4 | from tweepy.streaming import StreamListener 5 | class RawTweetsListener(StreamListener): 6 | def __init__(self): 7 | self.buffered_data = [] 8 | self.counter = 0 9 | 10 | def flush_buffer_if_needed(self): 11 | "Check the buffer capacity and write to a new file if needed" 12 | length = len(self.buffered_data) 13 | if length > 0 and length % 10 == 0: 14 | with open(os.path.join( output_dir, "tweets{}.csv".format(self.counter)), "w") as fs: 15 | self.counter += 1 16 | csv_writer = csv.DictWriter( fs, fieldnames = fieldnames) 17 | for data in self.buffered_data: 18 | csv_writer.writerow(data) 19 | self.buffered_data = [] 20 | 21 | def on_data(self, data): 22 | def transform(key, value): 23 | return transforms[key](value) if key in transforms else value 24 | 25 | self.buffered_data.append( 26 | {key:transform(key,value) \ 27 | for key,value in iteritems(json.loads(data)) \ 28 | if key in fieldnames} 29 | ) 30 | self.flush_buffer_if_needed() 31 | return True 32 | 33 | def on_error(self, status): 34 | print("An error occured while receiving streaming data: {}".format(status)) 35 | return False 36 | -------------------------------------------------------------------------------- /chapter 7/sampleCode10.py: -------------------------------------------------------------------------------- 1 | parquet_batch_df = spark.sql( 2 | "select * from parquet.`{}`".format( 3 | os.path.join(root_dir, "output_parquet") 4 | ) 5 | ) 6 | -------------------------------------------------------------------------------- /chapter 7/sampleCode11.py: -------------------------------------------------------------------------------- 1 | from watson_developer_cloud import NaturalLanguageUnderstandingV1 2 | from watson_developer_cloud.natural_language_understanding_v1 import Features, SentimentOptions, EntitiesOptions 3 | 4 | nlu = NaturalLanguageUnderstandingV1( 5 | version='2017-02-27', 6 | username='XXXX', 7 | password='XXXX' 8 | ) 9 | -------------------------------------------------------------------------------- /chapter 7/sampleCode12.py: -------------------------------------------------------------------------------- 1 | [[RawTweetsListener]] 2 | def enrich(self, data): 3 | try: 4 | response = nlu.analyze( 5 | text = data['text'], 6 | features = Features( 7 | sentiment=SentimentOptions(), 8 | entities=EntitiesOptions() 9 | ) 10 | ) 11 | data["sentiment"] = response["sentiment"]["document"]["label"] 12 | top_entity = response["entities"][0] if len(response["entities"]) > 0 else None 13 | data["entity"] = top_entity["text"] if top_entity is not None else "" 14 | data["entity_type"] = top_entity["type"] if top_entity is not None else "" 15 | return data 16 | except Exception as e: 17 | self.warn("Error from Watson service while enriching data: {}".format(e)) 18 | -------------------------------------------------------------------------------- /chapter 7/sampleCode13.py: -------------------------------------------------------------------------------- 1 | field_metadata = [ 2 | {"name": "created_at","type": DateType()}, 3 | {"name": "text", "type": StringType()}, 4 | {"name": "source", "type": StringType(), 5 | "transform": lambda s: BS(s, "html.parser").text.strip() 6 | }, 7 | {"name": "sentiment", "type": StringType()}, 8 | {"name": "entity", "type": StringType()}, 9 | {"name": "entity_type", "type": StringType()} 10 | ] 11 | -------------------------------------------------------------------------------- /chapter 7/sampleCode14.py: -------------------------------------------------------------------------------- 1 | def on_data(self, data): 2 | def transform(key, value): 3 | return transforms[key](value) if key in transforms else value 4 | data = self.enrich(json.loads(data)) 5 | if data is not None: 6 | self.buffered_data.append( 7 | {key:transform(key,value) \ 8 | for key,value in iteritems(data) \ 9 | if key in fieldnames} 10 | ) 11 | self.flush_buffer_if_needed() 12 | return True 13 | -------------------------------------------------------------------------------- /chapter 7/sampleCode15.py: -------------------------------------------------------------------------------- 1 | schema = StructType( 2 | [StructField(f["name"], f["type"], True) for f in field_metadata] 3 | ) 4 | csv_sdf = spark.readStream \ 5 | .csv( 6 | output_dir, 7 | schema=schema, 8 | multiLine = True, 9 | dateFormat = 'EEE MMM dd kk:mm:ss Z y', 10 | ignoreTrailingWhiteSpace = True, 11 | ignoreLeadingWhiteSpace = True 12 | ) 13 | csv_sdf.printSchema() 14 | -------------------------------------------------------------------------------- /chapter 7/sampleCode16.py: -------------------------------------------------------------------------------- 1 | def start_stream(queries): 2 | "Asynchronously start a new Twitter stream" 3 | stream = Stream(auth, RawTweetsListener()) 4 | stream.filter(track=queries, languages=["en"], async=True) 5 | return stream 6 | -------------------------------------------------------------------------------- /chapter 7/sampleCode17.py: -------------------------------------------------------------------------------- 1 | def start_streaming_dataframe(output_dir): 2 | "Start a Spark Streaming DataFrame from a file source" 3 | schema = StructType( 4 | [StructField(f["name"], f["type"], True) for f in field_metadata] 5 | ) 6 | return spark.readStream \ 7 | .csv( 8 | output_dir, 9 | schema=schema, 10 | multiLine = True, 11 | timestampFormat = 'EEE MMM dd kk:mm:ss Z yyyy', 12 | ignoreTrailingWhiteSpace = True, 13 | ignoreLeadingWhiteSpace = True 14 | ) 15 | -------------------------------------------------------------------------------- /chapter 7/sampleCode18.py: -------------------------------------------------------------------------------- 1 | def start_parquet_streaming_query(csv_sdf): 2 | """ 3 | Create and run a streaming query from a Structured DataFrame 4 | outputing the results into a parquet database 5 | """ 6 | streaming_query = csv_sdf \ 7 | .writeStream \ 8 | .format("parquet") \ 9 | .option("path", os.path.join(root_dir, "output_parquet")) \ 10 | .trigger(processingTime="2 seconds") \ 11 | .option("checkpointLocation", os.path.join(root_dir, "output_chkpt")) \ 12 | .start() 13 | return streaming_query 14 | -------------------------------------------------------------------------------- /chapter 7/sampleCode19.py: -------------------------------------------------------------------------------- 1 | class StreamsManager(): 2 | def __init__(self): 3 | self.twitter_stream = None 4 | self.csv_sdf = None 5 | 6 | def reset(self, search_query = None): 7 | if self.twitter_stream is not None: 8 | self.twitter_stream.disconnect() 9 | #stop all the active streaming queries and re_initialize the directories 10 | for query in spark.streams.active: 11 | query.stop() 12 | # initialize the directories 13 | self.root_dir, self.output_dir = init_output_dirs() 14 | # start the tweepy stream 15 | self.twitter_stream = start_stream([search_query]) if search_query is not None else None 16 | # start the spark streaming stream 17 | self.csv_sdf = start_streaming_dataframe(output_dir) if search_query is not None else None 18 | 19 | def __del__(self): 20 | # Automatically called when the class is garbage collected 21 | self.reset() 22 | 23 | streams_manager = StreamsManager() 24 | -------------------------------------------------------------------------------- /chapter 7/sampleCode2.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql.types import StringType, DateType 2 | from bs4 import BeautifulSoup as BS 3 | fieldnames = [f["name"] for f in field_metadata] 4 | transforms = {item['name']:item['transform'] for item in field_metadata if "transform" in item} 5 | field_metadata = [ 6 | {"name": "created_at","type": DateType()}, 7 | {"name": "text", "type": StringType()}, 8 | {"name": "source", "type": StringType(), 9 | "transform": lambda s: BS(s, "html.parser").text.strip() 10 | } 11 | ] 12 | -------------------------------------------------------------------------------- /chapter 7/sampleCode20.py: -------------------------------------------------------------------------------- 1 | from pixiedust.display.app import * 2 | @PixieApp 3 | class TweetInsightApp(): 4 | @route() 5 | def main_screen(self): 6 | return """ 7 | 15 |
16 |
17 |
18 |
19 | 22 | 23 | 26 | 27 |
28 |
29 |
30 | """ 31 | -------------------------------------------------------------------------------- /chapter 7/sampleCode21.py: -------------------------------------------------------------------------------- 1 | import time 2 | [[TweetInsightApp]] 3 | @route(search_query="*") 4 | def do_search_query(self, search_query): 5 | streams_manager.reset(search_query) 6 | start_parquet_streaming_query(streams_manager.csv_sdf) 7 | while True: 8 | try: 9 | parquet_dir = os.path.join(root_dir, "output_parquet") 10 | self.parquet_df = spark.sql("select * from parquet.`{}`".format(parquet_dir)) 11 | break 12 | except: 13 | time.sleep(5) 14 | return """ 15 |
16 |
17 | 18 | print("Number of tweets received: {}".format(streams_manager.twitter_stream.listener.tweet_count)) 19 | 20 |
21 |
22 |
23 |
25 |
26 |
27 |
28 |
30 |
31 |
32 |
33 | 34 |
35 |
36 |
38 |
39 |
40 |
41 | """ 42 | -------------------------------------------------------------------------------- /chapter 7/sampleCode22.py: -------------------------------------------------------------------------------- 1 | [[TweetInsightApp]] 2 | @route(display_metric1="*") 3 | def do_display_metric1(self, display_metric1): 4 | parquet_dir = os.path.join(root_dir, "output_parquet") 5 | self.parquet_df = spark.sql("select * from parquet.`{}`".format(parquet_dir)) 6 | return """ 7 |
8 | 9 | { 10 | "legend": "true", 11 | "keyFields": "sentiment", 12 | "clusterby": "entity_type", 13 | "handlerId": "barChart", 14 | "rendererId": "bokeh", 15 | "rowCount": "10", 16 | "sortby": "Values DESC", 17 | "noChartCache": "true" 18 | } 19 | 20 |
21 | """ 22 | -------------------------------------------------------------------------------- /chapter 7/sampleCode23.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from wordcloud import WordCloud 3 | [[TweetInsightApp]] 4 | @route(display_wc="*") 5 | @captureOutput 6 | def do_display_wc(self): 7 | text = "\n".join( 8 | [r['entity'] for r in self.parquet_df.select("entity").collect() if r['entity'] is not None] 9 | ) 10 | plt.figure( figsize=(13,7) ) 11 | plt.axis("off") 12 | plt.imshow( 13 | WordCloud(width=750, height=350).generate(text), 14 | interpolation='bilinear' 15 | ) 16 | -------------------------------------------------------------------------------- /chapter 7/sampleCode24.py: -------------------------------------------------------------------------------- 1 | @PixieApp 2 | class StreamingQueriesApp(): 3 | @route() 4 | def main_screen(self): 5 | return """ 6 |
7 |
8 | """ 9 | -------------------------------------------------------------------------------- /chapter 7/sampleCode25.py: -------------------------------------------------------------------------------- 1 | @route(show_progress="true") 2 | def do_show_progress(self): 3 | return """ 4 | {%for query in this.spark.streams.active%} 5 |
6 |