├── .gitignore ├── LICENSE ├── chapter 2 ├── sampleCode1.py ├── sampleCode2.py ├── sampleCode3.py ├── sampleCode4.py ├── sampleCode5.py ├── sampleCode6.py ├── sampleCode7.py ├── sampleCode8.py └── sampleCode9.py ├── chapter 3 ├── GitHub Tracking Application │ ├── GitHub Sample Application - Part 1.ipynb │ ├── GitHub Sample Application - Part 2.ipynb │ ├── GitHub Sample Application - Part 3.ipynb │ └── GitHub Sample Application - Part 4.ipynb ├── sampleCode1.py ├── sampleCode10.py ├── sampleCode11.py ├── sampleCode12.py ├── sampleCode13.py ├── sampleCode14.py ├── sampleCode15.py ├── sampleCode16.py ├── sampleCode17.py ├── sampleCode18.py ├── sampleCode2.py ├── sampleCode3.py ├── sampleCode4.py ├── sampleCode5.py ├── sampleCode6.py ├── sampleCode7.py ├── sampleCode8.py └── sampleCode9.py ├── chapter 4 ├── sampleCode1.py ├── sampleCode2.py └── sampleCode3.html ├── chapter 5 ├── sampleCode1.py ├── sampleCode10.py ├── sampleCode11.html ├── sampleCode12.html ├── sampleCode13.html ├── sampleCode14.py ├── sampleCode15.py ├── sampleCode16.py ├── sampleCode17.py ├── sampleCode18.py ├── sampleCode19.py ├── sampleCode2.py ├── sampleCode20.py ├── sampleCode21.py ├── sampleCode22.py ├── sampleCode23.py ├── sampleCode24.py ├── sampleCode25.py ├── sampleCode26.py ├── sampleCode27.py ├── sampleCode28.py ├── sampleCode29.html ├── sampleCode3.py ├── sampleCode30.html ├── sampleCode31.html ├── sampleCode32.html ├── sampleCode33.html ├── sampleCode34.html ├── sampleCode35.html ├── sampleCode36.html ├── sampleCode37.html ├── sampleCode38.html ├── sampleCode39.py ├── sampleCode4.py ├── sampleCode5.py ├── sampleCode6.py ├── sampleCode7.json ├── sampleCode8.py └── sampleCode9.py ├── chapter 6 ├── TensorFlow classification.ipynb ├── Tensorflow VR Part 1.ipynb ├── Tensorflow VR Part 2.ipynb ├── Tensorflow VR Part 3.ipynb ├── Tensorflow VR Part 4.ipynb ├── Visual Recognition │ └── mobilenet_v1_0.50_224 │ │ ├── frozen_graph.pb │ │ ├── labels.txt │ │ └── quantized_graph.pb ├── sampleCode1.py ├── sampleCode10.py ├── sampleCode11.py ├── sampleCode12.py ├── sampleCode13.py ├── sampleCode14.py ├── sampleCode15.py ├── sampleCode16.py ├── sampleCode17.py ├── sampleCode18.py ├── sampleCode19.py ├── sampleCode2.py ├── sampleCode20.py ├── sampleCode21.py ├── sampleCode22.py ├── sampleCode23.py ├── sampleCode24.py ├── sampleCode25.py ├── sampleCode26.py ├── sampleCode27.py ├── sampleCode28.py ├── sampleCode29.py ├── sampleCode3.py ├── sampleCode30.py ├── sampleCode31.py ├── sampleCode32.py ├── sampleCode4.py ├── sampleCode5.py ├── sampleCode6.py ├── sampleCode7.py ├── sampleCode8.py └── sampleCode9.py ├── chapter 7 ├── Twitter Sentiment Analysis - Part 1.ipynb ├── Twitter Sentiment Analysis - Part 2.ipynb ├── Twitter Sentiment Analysis - Part 3.ipynb ├── Twitter Sentiment Analysis - Part 4.ipynb ├── sampleCode1.py ├── sampleCode10.py ├── sampleCode11.py ├── sampleCode12.py ├── sampleCode13.py ├── sampleCode14.py ├── sampleCode15.py ├── sampleCode16.py ├── sampleCode17.py ├── sampleCode18.py ├── sampleCode19.py ├── sampleCode2.py ├── sampleCode20.py ├── sampleCode21.py ├── sampleCode22.py ├── sampleCode23.py ├── sampleCode24.py ├── sampleCode25.py ├── sampleCode26.py ├── sampleCode27.py ├── sampleCode28.py ├── sampleCode29.py ├── sampleCode3.py ├── sampleCode30.py ├── sampleCode31.py ├── sampleCode32.py ├── sampleCode33.py ├── sampleCode34.json ├── sampleCode4.py ├── sampleCode5.py ├── sampleCode6.py ├── sampleCode7.py ├── sampleCode8.py └── sampleCode9.py ├── chapter 8 ├── StockExplorer - Part 1.ipynb ├── StockExplorer - Part 2.ipynb ├── sampleCode1.py ├── sampleCode10.py ├── sampleCode11.py ├── sampleCode12.py ├── sampleCode13.py ├── sampleCode14.py ├── sampleCode15.py ├── sampleCode16.py ├── sampleCode17.py ├── sampleCode18.py ├── sampleCode19.py ├── sampleCode2.py ├── sampleCode20.py ├── sampleCode21.py ├── sampleCode22.py ├── sampleCode23.py ├── sampleCode24.json ├── sampleCode25.py ├── sampleCode26.py ├── sampleCode27.py ├── sampleCode28.py ├── sampleCode29.py ├── sampleCode3.py ├── sampleCode30.py ├── sampleCode31.py ├── sampleCode32.py ├── sampleCode33.py ├── sampleCode34.py ├── sampleCode35.py ├── sampleCode36.py ├── sampleCode37.py ├── sampleCode38.py ├── sampleCode39.py ├── sampleCode4.json ├── sampleCode40.py ├── sampleCode41.py ├── sampleCode5.py ├── sampleCode6.py ├── sampleCode7.py ├── sampleCode8.py └── sampleCode9.py └── chapter 9 ├── USFlightsAnalysis ├── US Flight data analysis - Part 1.ipynb ├── US Flight data analysis - Part 2.ipynb ├── US Flight data analysis - Part 3.ipynb ├── US Flight data analysis - Part 4.ipynb ├── airlines.csv ├── airports.csv └── flights.zip ├── sampleCode1.py ├── sampleCode10.py ├── sampleCode11.py ├── sampleCode12.py ├── sampleCode13.py ├── sampleCode14.py ├── sampleCode15.py ├── sampleCode16.py ├── sampleCode17.py ├── sampleCode18.py ├── sampleCode19.py ├── sampleCode2.py ├── sampleCode20.html ├── sampleCode21.py ├── sampleCode22.py ├── sampleCode23.json ├── sampleCode24.py ├── sampleCode25.py ├── sampleCode26.py ├── sampleCode27.py ├── sampleCode28.py ├── sampleCode29.py ├── sampleCode3.py ├── sampleCode30.py ├── sampleCode31.py ├── sampleCode32.py ├── sampleCode33.py ├── sampleCode34.py ├── sampleCode35.py ├── sampleCode36.py ├── sampleCode37.py ├── sampleCode38.py ├── sampleCode39.py ├── sampleCode4.py ├── sampleCode40.py ├── sampleCode41.py ├── sampleCode42.py ├── sampleCode43.py ├── sampleCode44.py ├── sampleCode5.py ├── sampleCode6.py ├── sampleCode7.py ├── sampleCode8.py └── sampleCode9.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /chapter 2/sampleCode1.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | data_url = "https://data.cityofnewyork.us/api/views/e98g-f8hy/rows.csv?accessType=DOWNLOAD" 3 | building_df = pandas.read_csv(data_url) 4 | -------------------------------------------------------------------------------- /chapter 2/sampleCode2.py: -------------------------------------------------------------------------------- 1 | #Spark CSV Loading 2 | from pyspark.sql import SparkSession 3 | try: 4 | from urllib import urlretrieve 5 | except ImportError: 6 | #urlretrieve package has been refactored in Python 3 7 | from urllib.request import urlretrieve 8 | 9 | data_url = "https://data.cityofnewyork.us/api/views/e98g-f8hy/rows.csv?accessType=DOWNLOAD" 10 | urlretrieve (data_url, "building.csv") 11 | 12 | spark = SparkSession.builder.getOrCreate() 13 | building_df = spark.read\ 14 | .format('org.apache.spark.sql.execution.datasources.csv.CSVFileFormat')\ 15 | .load("building.csv") 16 | -------------------------------------------------------------------------------- /chapter 2/sampleCode3.py: -------------------------------------------------------------------------------- 1 | import pixiedust 2 | cars = pixiedust.sampleData(1) 3 | -------------------------------------------------------------------------------- /chapter 2/sampleCode4.py: -------------------------------------------------------------------------------- 1 | import pixiedust 2 | data_url = "https://data.cityofnewyork.us/api/views/e98g-f8hy/rows.csv?accessType=DOWNLOAD" 3 | building_dataframe = pixiedust.sampleData(data_url, forcePandas=True) 4 | -------------------------------------------------------------------------------- /chapter 2/sampleCode5.py: -------------------------------------------------------------------------------- 1 | import pixiedust 2 | london_info = pixiedust.sampleData("https://files.datapress.com/london/dataset/london-borough-profiles/2015-09-24T15:50:01/London-borough-profiles.zip") 3 | display(london_info) 4 | -------------------------------------------------------------------------------- /chapter 2/sampleCode6.py: -------------------------------------------------------------------------------- 1 | import pixiedust 2 | data_url = "https://server/path" 3 | pixiedust.wrangleData(data_url) 4 | -------------------------------------------------------------------------------- /chapter 2/sampleCode7.py: -------------------------------------------------------------------------------- 1 | import pixiedust 2 | cars = pixiedust.sampleData(1, forcePandas=True) #car performance data 3 | display(cars) 4 | -------------------------------------------------------------------------------- /chapter 2/sampleCode8.py: -------------------------------------------------------------------------------- 1 | import pixiedust 2 | homes = pixiedust.sampleData(6, forcePandas=True) #Million dollar home sales in NE Mass 3 | display(homes) 4 | -------------------------------------------------------------------------------- /chapter 2/sampleCode9.py: -------------------------------------------------------------------------------- 1 | #import the pixieapp decorators 2 | from pixiedust.display.app import * 3 | 4 | #Load the cars dataframe into the Notebook 5 | cars = pixiedust.sampleData(1) 6 | 7 | @PixieApp #decorator for making the class a PixieApp 8 | class HelloWorldApp(): 9 | @route() #decorator for making a method a route (no arguments means default route) 10 | def main_screen(self): 11 | return """ 12 | 13 | 14 |
15 | """ 16 | 17 | @route(show_chart="true") 18 | def chart(self): 19 | #Return a div bound to the cars dataframe using the pd_entity attribute 20 | #pd_entity can refer a class variable or a global variable scoped to the notebook 21 | return """ 22 |Repo Name | \n", 89 | "Lastname | \n", 90 | "URL | \n", 91 | "Stars | \n", 92 | "
---|
Repo Name | \n", 289 | "Lastname | \n", 290 | "URL | \n", 291 | "Stars | \n", 292 | "Actions | \n", 293 | "
---|
>
6 |
7 | @route(query="*")
8 | @templateArgs
9 | def do_search(self, query):
10 | <>
11 |
12 | @route(page="*")
13 | @templateArgs
14 | def do_retrieve_page(self, page):
15 | <>
16 |
17 | app = GitHubTracking()
18 | app.run()
19 |
--------------------------------------------------------------------------------
/chapter 3/sampleCode14.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.app import *
2 |
3 | def call_me():
4 | print("Hello from call_me")
5 |
6 | @PixieApp
7 | class Test():
8 | @route()
9 | def main_screen(self):
10 | return """
11 |
12 |
13 |
14 | """
15 | Test().run()
16 |
--------------------------------------------------------------------------------
/chapter 3/sampleCode15.py:
--------------------------------------------------------------------------------
1 | @PixieApp
2 | class Test():
3 | @route()
4 | def main_screen(self):
5 | return """
6 |
15 |
16 |
17 | """
18 | Test().run()
19 |
--------------------------------------------------------------------------------
/chapter 3/sampleCode16.py:
--------------------------------------------------------------------------------
1 | [[RepoAnalysis]]
2 | @route(analyse_type="*")
3 | @templateArgs
4 | def do_analyse_type(self, analyse_type):
5 | fn = [analysis_fn for a_type,analysis_fn in analyses if a_type == analyse_type]
6 | if len(fn) == 0:
7 | return "No loader function found for {{analyse_type}}"
8 | vis_info = fn[0](self._analyse_repo_owner, self._analyse_repo_name)
9 | self.pdf = vis_info["pdf"]
10 | chart_options = {"handlerId":"dataframe"} if self.show_stats else vis_info["chart_options"]
11 | return """
12 |
13 | {{chart_options | tojson}}
14 |
15 | """
16 |
--------------------------------------------------------------------------------
/chapter 3/sampleCode17.py:
--------------------------------------------------------------------------------
1 | def get_pdf(self):
2 | if self.show_stats:
3 | summary = self.pdf.describe()
4 | summary.insert(0, "Stat", summary.index)
5 | return summary
6 | return self.pdf
7 |
--------------------------------------------------------------------------------
/chapter 3/sampleCode18.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.app import *
2 |
3 | @PixieApp
4 | class WidgetApp():
5 | @route(widget="my_widget")
6 | def widget_main_screen(self):
7 | return "Hello World Widget"
8 |
9 | @PixieApp
10 | class ConsumerApp(WidgetApp):
11 | @route()
12 | def main_screen(self):
13 | return """"""
14 |
15 | ConsumerApp().run()
16 |
--------------------------------------------------------------------------------
/chapter 3/sampleCode2.py:
--------------------------------------------------------------------------------
1 | @route(state1="*", state2="*")
2 | def my_method(self, state1, state2):
3 | return "State1 is {{state1}}. State2 is {{state2}}"
4 |
--------------------------------------------------------------------------------
/chapter 3/sampleCode3.py:
--------------------------------------------------------------------------------
1 | @route()
2 | @templateArgs
3 | def main_screen(self):
4 | var1 = self.compute_something()
5 | var2 = self.compute_something_else()
6 | return "var1 is {{var1}}. var2 is {{var2}}"
7 |
--------------------------------------------------------------------------------
/chapter 3/sampleCode4.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.app import *
2 |
3 | @PixieApp
4 | class GitHubTracking():
5 | @route()
6 | def main_screen(self):
7 | return """
8 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 | """
28 |
29 | app = GitHubTracking()
30 | app.run()
31 |
--------------------------------------------------------------------------------
/chapter 3/sampleCode5.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import pandas
3 | [[GitHubTracking]]
4 | @route(query="*")
5 | @templateArgs
6 | def do_search(self, query):
7 | response = requests.get( "https://api.github.com/search/repositories?q={}".format(query))
8 | frames = [pandas.DataFrame(response.json()['items'])]
9 | while response.ok and "next" in response.links:
10 | response = requests.get(response.links['next']['url'])
11 | frames.append(pandas.DataFrame(response.json()['items']))
12 |
13 | pdf = pandas.concat(frames)
14 | response = requests.get( "https://api.github.com/search/repositories?q={}".format(query))
15 | if not response.ok:
16 | return "An Error occurred: {{response.text}}"
17 | return """{{pdf|length}} repositories were found
"""
18 |
--------------------------------------------------------------------------------
/chapter 3/sampleCode6.py:
--------------------------------------------------------------------------------
1 | [[GitHubTracking]]
2 | @route(query="*")
3 | @templateArgs
4 | def do_search(self, query):
5 | self.first_url = "https://api.github.com/search/repositories?q={}".format(query)
6 | self.prev_url = None
7 | self.next_url = None
8 | self.last_url = None
9 |
10 | response = requests.get(self.first_url)
11 | if not response.ok:
12 | return "An Error occurred: {{response.text}}"
13 |
14 | total_count = response.json()['total_count']
15 | self.next_url = response.links.get('next', {}).get('url', None)
16 | self.last_url = response.links.get('last', {}).get('url', None)
17 | return """
18 | {{total_count}} repositories were found
19 |
25 |
26 |
27 |
28 | Repo Name
29 | Lastname
30 | URL
31 | Stars
32 |
33 |
34 |
35 | {{this.invoke_route(this.do_retrieve_page, page='first_url')}}
36 |
37 |
38 | """
39 |
--------------------------------------------------------------------------------
/chapter 3/sampleCode7.py:
--------------------------------------------------------------------------------
1 | [[GitHubTracking]]
2 | @route(page="*")
3 | @templateArgs
4 | def do_retrieve_page(self, page):
5 | url = getattr(self, page)
6 | if url is None:
7 | return "No more rows"
8 | response = requests.get(url)
9 | self.prev_url = response.links.get('prev', {}).get('url', None)
10 | self.next_url = response.links.get('next', {}).get('url', None)
11 | items = response.json()['items']
12 | return """
13 | {%for row in items%}
14 |
15 | {{row['name']}}
16 | {{row.get('owner',{}).get('login', 'N/A')}}
17 | {{row['html_url']}}
18 | {{row['stargazers_count']}}
19 |
20 | {%endfor%}
21 | """
22 |
--------------------------------------------------------------------------------
/chapter 3/sampleCode8.py:
--------------------------------------------------------------------------------
1 | @PixieApp
2 | class RepoAnalysis():
3 | @route(analyse_repo_owner="*", analyse_repo_name="*")
4 | @templateArgs
5 | def do_analyse_repo(self, analyse_repo_owner, analyse_repo_name):
6 | self._analyse_repo_owner = analyse_repo_owner
7 | self._analyse_repo_name = analyse_repo_name
8 | return """
9 |
10 |
11 |
15 |
26 |
27 |
25 |
28 | """
29 |
--------------------------------------------------------------------------------
/chapter 3/sampleCode9.py:
--------------------------------------------------------------------------------
1 | def compute_pdf(key):
2 | return pandas.DataFrame([
3 | {"col{}".format(i): "{}{}-{}".format(key,i,j) for i in range(4)} for j in range(10)
4 | ])
5 |
--------------------------------------------------------------------------------
/chapter 4/sampleCode1.py:
--------------------------------------------------------------------------------
1 | @route(query="*", persist_args='true')
2 | @templateArgs
3 | def do_search(self, query):
4 | self.first_url = "https://api.github.com/search/repositories?q={}".format(query)
5 | self.prev_url = None
6 | self.next_url = None
7 | self.last_url = None
8 | ...
9 |
--------------------------------------------------------------------------------
/chapter 4/sampleCode2.py:
--------------------------------------------------------------------------------
1 | import pixiedust
2 | cars = pixiedust.sampleData(1, forcePandas=True) #car performance data
3 | display(cars)
4 |
--------------------------------------------------------------------------------
/chapter 4/sampleCode3.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Example page with embedded chart
6 |
7 |
8 | Embedded a PixieDust Chart in a custom HTML Page
9 |
10 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode1.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.app import *
2 | import requests
3 | from wordcloud import WordCloud
4 | import matplotlib.pyplot as plt
5 |
6 | @PixieApp
7 | class WordCloudApp():
8 | @route()
9 | def main_screen(self):
10 | return """
11 |
12 |
13 |
14 |
18 |
19 |
20 | """
21 |
22 | @route(url="*")
23 | @captureOutput
24 | def generate_word_cloud(self, url):
25 | text = requests.get(url).text
26 | plt.axis("off")
27 | plt.imshow(
28 | WordCloud(max_font_size=40).generate(text),
29 | interpolation='bilinear'
30 | )
31 |
32 | app = WordCloudApp()
33 | app.run()
34 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode10.py:
--------------------------------------------------------------------------------
1 | def newDisplayHandler(self, options, entity):
2 | if self.streamingDisplay is None:
3 | self.streamingDisplay = LineChartStreamingDisplay(options, entity)
4 | else:
5 | self.streamingDisplay.options = options
6 | return self.streamingDisplay
7 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode11.html:
--------------------------------------------------------------------------------
1 |
4 |
5 | or
6 |
7 |
16 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode12.html:
--------------------------------------------------------------------------------
1 |
3 | Row 1
4 | Row 2
5 | Row 3
6 |
7 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode13.html:
--------------------------------------------------------------------------------
1 |
2 | Listening to button event
3 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode14.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.app import *
2 | @PixieApp
3 | class TestEvents():
4 | @route()
5 | def main_screen(self):
6 | return """
7 |
8 |
17 |
18 | Row 1
19 | Row 2
20 | Row 3
21 |
22 |
23 |
24 |
25 |
26 | Listening to button event
27 |
28 |
29 |
30 |
31 | Listening to table event
32 |
33 |
34 |
35 |
36 |
37 | """
38 | app = TestEvents()
39 | app.run()
40 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode15.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.display import *
2 | import pandas
3 | @PixiedustDisplay()
4 | class SimpleDisplayMeta(DisplayHandlerMeta):
5 | @addId
6 | def getMenuInfo(self,entity,dataHandler):
7 | if type(entity) is pandas.core.frame.DataFrame:
8 | return [
9 | {"categoryId": "Table", "title": "Simple Table", "icon": "fa-table", "id": "simpleTest"}
10 | ]
11 | return []
12 | def newDisplayHandler(self,options,entity):
13 | return SimpleDisplay(options,entity)
14 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode16.py:
--------------------------------------------------------------------------------
1 | class SimpleDisplay(Display):
2 | def doRender(self, handlerId):
3 | self._addHTMLTemplateString("""
4 |
5 |
6 | {%for column in entity.columns.tolist()%}
7 | {{column}}
8 | {%endfor%}
9 |
10 |
11 | {%for _, row in entity.iterrows()%}
12 |
13 | {%for value in row.tolist()%}
14 | {{value}}
15 | {%endfor%}
16 |
17 | {%endfor%}
18 |
19 |
20 | """)
21 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode17.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.chart.renderers import PixiedustRenderer
2 | from pixiedust.display.chart.renderers.baseChartDisplay import BaseChartDisplay
3 |
4 | @PixiedustRenderer(rendererId="simpletable", id="tableView")
5 | class SimpleDisplayWithRenderer(BaseChartDisplay):
6 | def get_options_dialog_pixieapp(self):
7 | return None #No options needed
8 |
9 | def doRenderChart(self):
10 | return self.renderTemplateString("""
11 |
12 |
13 | {%for column in entity.columns.tolist()%}
14 | {{column}}
15 | {%endfor%}
16 |
17 |
18 | {%for _, row in entity.iterrows()%}
19 |
20 | {%for value in row.tolist()%}
21 | {{value}}
22 | {%endfor%}
23 |
24 | {%endfor%}
25 |
26 |
27 | """)
28 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode18.py:
--------------------------------------------------------------------------------
1 | import pdb
2 | def my_function(arg1, arg2):
3 | pdb.set_trace()
4 | do_something_here()
5 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode19.py:
--------------------------------------------------------------------------------
1 | %%pixie_debugger
2 | import pixiedust
3 | cars = pixiedust.sampleData(1, forcePandas=True)
4 |
5 | def count_cars(name):
6 | count = 0
7 | for row in cars.itertuples():
8 | if name in row.name:
9 | count += 1
10 | return count
11 |
12 | count_cars('chevrolet')
13 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode2.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.app import *
2 | import requests
3 | from wordcloud import WordCloud
4 | import matplotlib.pyplot as plt
5 |
6 | @PixieApp
7 | class WCChildApp():
8 | @route(url='*')
9 | @captureOutput
10 | def generate_word_cloud(self, url):
11 | text = requests.get(url).text
12 | plt.axis("off")
13 | plt.imshow(
14 | WordCloud(max_font_size=40).generate(text),
15 | interpolation='bilinear'
16 | )
17 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode20.py:
--------------------------------------------------------------------------------
1 | %%pixie_debugger -b count_cars 11
2 | import pixiedust
3 | cars = pixiedust.sampleData(1, forcePandas=True)
4 |
5 | def count_cars(name):
6 | count = 0
7 | for row in cars.itertuples():
8 | if name in row.name:
9 | count += 1
10 | return count
11 |
12 | count_cars('chevrolet')
13 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode21.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.app import *
2 | @PixieApp
3 | class DisplayCars():
4 | @route()
5 | def main_screen(self):
6 | return """
7 |
8 |
9 |
10 |
11 |
12 |
16 |
17 |
18 | """
19 | @route(col="*", query="*")
20 | def display_screen(self, col, query):
21 | self.pdf = cars.loc[cars[col].str.contains(query)]
22 | return """
23 |
24 |
25 | {
26 | "handlerId": "tableView",
27 | "table_noschema": "true",
28 | "table_nosearch": "true",
29 | "table_nocount": "true"
30 | }
31 |
32 |
33 | """
34 | app = DisplayCars()
35 | app.run()
36 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode22.py:
--------------------------------------------------------------------------------
1 | import pixiedust
2 | my_logger = pixiedust.getLogger(__name__)
3 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode23.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.app import *
2 | from pixiedust.utils import Logger
3 |
4 | @PixieApp
5 | @Logger()
6 | class AppWithLogger():
7 | @route()
8 | def main_screen(self):
9 | self.info("Calling default route")
10 | return "hello world"
11 |
12 | app = AppWithLogger()
13 | app.run()
14 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode24.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.app import *
2 |
3 | @PixieApp
4 | class TestJSDebugger():
5 | @route()
6 | def main_screen(self):
7 | return """
8 |
14 |
15 | """
16 |
17 | @route(state="*")
18 | def my_route(self, state):
19 | return "Route called with state {{state}}"
20 |
21 | app = TestJSDebugger()
22 | app.run()
23 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode25.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.app import *
2 | @PixieApp
3 | class MyApp():
4 | @route(key1="value1", key2="*")
5 | def myroute_screen(self, key1, key2):
6 | return "fragment: Key1 = {{key1}} - Key2 = {{key2}}"
7 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode26.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.app import *
2 | @PixieApp
3 | class MyApp():
4 | @route(key1="value1", key2="*")
5 | @templateArgs
6 | def myroute_screen(self, key1, key2):
7 | local_var = "some value"
8 | return "fragment: local_var = {{local_var}}"
9 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode27.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.app import *
2 | import matplotlib.pyplot as plt
3 | @PixieApp
4 | class MyApp():
5 | @route()
6 | @captureOutput
7 | def main_screen(self):
8 | plt.plot([1,2,3,4])
9 | plt.show()
10 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode28.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.app import *
2 | from pixiedust.utils import Logger
3 | @PixieApp
4 | @Logger()
5 | class MyApp():
6 | @route()
7 | def main_screen(self):
8 | self.debug("In main_screen")
9 | return "Hello World"
10 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode29.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | {
4 | "mapboxtoken": "XXXXX",
5 | "chartsize": "90",
6 | "aggregation": "SUM",
7 | "rowCount": "500",
8 | "handlerId": "mapView",
9 | "rendererId": "mapbox",
10 | "valueFields": "IncidntNum",
11 | "keyFields": "X,Y",
12 | "basemap": "light-v9"
13 | }
14 |
15 |
16 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode3.py:
--------------------------------------------------------------------------------
1 | @PixieApp
2 | class WordCloudApp():
3 | @route()
4 | def main_screen(self):
5 | return """
6 |
7 |
8 |
9 |
15 |
16 |
17 | """
18 |
19 | app = WordCloudApp()
20 | app.run()
21 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode30.html:
--------------------------------------------------------------------------------
1 |
2 |
5 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode31.html:
--------------------------------------------------------------------------------
1 |
2 |
5 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode32.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
10 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode33.html:
--------------------------------------------------------------------------------
1 |
4 |
5 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode34.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | print('hello world rendered on load')
4 |
5 |
6 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode35.html:
--------------------------------------------------------------------------------
1 |
2 |
7 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode36.html:
--------------------------------------------------------------------------------
1 |
4 |
13 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode37.html:
--------------------------------------------------------------------------------
1 |
2 | Listening to button event
3 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode38.html:
--------------------------------------------------------------------------------
1 |
3 |
4 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode39.py:
--------------------------------------------------------------------------------
1 | def setup(self):
2 | self.var1 = "some initial value"
3 | self.pandas_dataframe = pandas.DataFrame(data)
4 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode4.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.app import *
2 | import requests
3 | from wordcloud import WordCloud
4 | import matplotlib.pyplot as plt
5 |
6 | @PixieApp
7 | class WCChildApp():
8 | @route(widget='wordcloud')
9 | @captureOutput
10 | def generate_word_cloud(self):
11 | text = requests.get(self.url).text if self.url else ""
12 | plt.axis("off")
13 | plt.imshow(
14 | WordCloud(max_font_size=40).generate(text),
15 | interpolation='bilinear'
16 | )
17 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode5.py:
--------------------------------------------------------------------------------
1 | @PixieApp
2 | class WordCloudApp(WCChildApp):
3 | @route()
4 | def main_screen(self):
5 | self.url=None
6 | return """
7 |
8 |
9 |
10 |
15 |
16 |
17 | """
18 |
19 | app = WordCloudApp()
20 | app.run()
21 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode6.py:
--------------------------------------------------------------------------------
1 | @abstractmethod
2 | def doGetNextData(self):
3 | """Return the next batch of data from the underlying stream.
4 | Accepted return values are:
5 | 1. (x,y): tuple of list/numpy arrays representing the x and y axis
6 | 2. pandas dataframe
7 | 3. y: list/numpy array representing the y axis. In this case, the x axis is automatically created
8 | 4. pandas serie: similar to #3
9 | 5. json
10 | 6. geojson
11 | 7. url with supported payload (json/geojson)
12 | """
13 | Pass
14 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode7.json:
--------------------------------------------------------------------------------
1 | {
2 | "geometry": {
3 | "type": "Point",
4 | "coordinates": [
5 | -93.824908715741202, 10.875051131034805
6 | ]
7 | },
8 | "type": "Feature",
9 | "properties": {}
10 | }
11 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode8.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.streaming import *
2 |
3 | class DroneStreamingAdapter(StreamingDataAdapter):
4 | def getMetadata(self):
5 | iconImage = "rocket-15"
6 | return {
7 | "layout": {"icon-image": iconImage, "icon-size": 1.5},
8 | "type": "symbol"
9 | }
10 | def doGetNextData(self):
11 | return "https://wanderdrone.appspot.com/"
12 | adapter = DroneStreamingAdapter()
13 | display(adapter)
14 |
--------------------------------------------------------------------------------
/chapter 5/sampleCode9.py:
--------------------------------------------------------------------------------
1 | @route(topic="*",streampreview="*",schemaX="*")
2 | def showChart(self, schemaX):
3 | self.schemaX = schemaX
4 | self.avgChannelData = self.streamingData.getStreamingChannel(self.computeAverages)
5 | return """
6 |
7 | Real-time chart for {{this.schemaX}}(average).
8 |
9 | …
10 |
11 | """
12 |
--------------------------------------------------------------------------------
/chapter 6/Tensorflow VR Part 1.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# TensorFlow Visual Recognition Sample Application Part 1\n",
8 | "## Define the model metadata"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 42,
14 | "metadata": {
15 | "pixiedust": {
16 | "displayParams": {}
17 | }
18 | },
19 | "outputs": [],
20 | "source": [
21 | "import tensorflow as tf\n",
22 | "import requests\n",
23 | "models = {\n",
24 | " \"mobilenet\": {\n",
25 | " \"base_url\":\"https://github.com/DTAIEB/Thoughtful-Data-Science/raw/master/chapter%206/Visual%20Recognition/mobilenet_v1_0.50_224\",\n",
26 | " \"model_file_url\": \"frozen_graph.pb\",\n",
27 | " \"label_file\": \"labels.txt\",\n",
28 | " \"output_layer\": \"MobilenetV1/Predictions/Softmax\"\n",
29 | " }\n",
30 | "}\n",
31 | "\n",
32 | "# helper method for reading attributes from the model metadata\n",
33 | "def get_model_attribute(model, key, default_value = None):\n",
34 | " if key not in model:\n",
35 | " if default_value is None:\n",
36 | " raise Exception(\"Require model attribute {} not found\".format(key))\n",
37 | " return default_value\n",
38 | " return model[key]"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {},
44 | "source": [
45 | "## Helper methods for loading the graph and labels for a given model"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 33,
51 | "metadata": {},
52 | "outputs": [],
53 | "source": [
54 | "# Helper method for resolving url relative to the selected model\n",
55 | "def get_url(model, path):\n",
56 | " return model[\"base_url\"] + \"/\" + path\n",
57 | " \n",
58 | "# Download the serialized model and create a TensorFlow graph\n",
59 | "def load_graph(model):\n",
60 | " graph = tf.Graph()\n",
61 | " graph_def = tf.GraphDef()\n",
62 | " graph_def.ParseFromString(\n",
63 | " requests.get( get_url( model, model[\"model_file_url\"] ) ).content\n",
64 | " )\n",
65 | " with graph.as_default():\n",
66 | " tf.import_graph_def(graph_def)\n",
67 | " return graph\n",
68 | "\n",
69 | "# Load the labels\n",
70 | "def load_labels(model, as_json = False):\n",
71 | " labels = [line.rstrip() \\\n",
72 | " for line in requests.get( get_url( model, model[\"label_file\"] ) ).text.split(\"\\n\") \\\n",
73 | " if line != \"\"]\n",
74 | " if as_json:\n",
75 | " return [{\"index\": item.split(\":\")[0], \"label\" : item.split(\":\")[1]} for item in labels]\n",
76 | " return labels"
77 | ]
78 | },
79 | {
80 | "cell_type": "markdown",
81 | "metadata": {},
82 | "source": [
83 | "## Use BeautifulSoup to scrape the images from a given url"
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": 34,
89 | "metadata": {},
90 | "outputs": [],
91 | "source": [
92 | "from bs4 import BeautifulSoup as BS\n",
93 | "import re\n",
94 | "\n",
95 | "# return an array of all the images scraped from an html page\n",
96 | "def get_image_urls(url):\n",
97 | " # Instantiate a BeautifulSoup parser\n",
98 | " soup = BS(requests.get(url).text, \"html.parser\")\n",
99 | " \n",
100 | " # Local helper method for extracting url\n",
101 | " def extract_url(val):\n",
102 | " m = re.match(r\"url\\((.*)\\)\", val)\n",
103 | " val = m.group(1) if m is not None else val\n",
104 | " return \"http:\" + val if val.startswith(\"//\") else val\n",
105 | " \n",
106 | " # List comprehension that look for
elements and backgroud-image styles\n",
107 | " return [extract_url(imgtag['src']) for imgtag in soup.find_all('img')] + [ \\\n",
108 | " extract_url(val.strip()) for key,val in \\\n",
109 | " [tuple(selector.split(\":\")) for elt in soup.select(\"[style]\") \\\n",
110 | " for selector in elt[\"style\"].strip(\" ;\").split(\";\")] \\\n",
111 | " if key.strip().lower()=='background-image' \\\n",
112 | " ]"
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "## Helper method for downloading an image into a temp file"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": 35,
125 | "metadata": {},
126 | "outputs": [],
127 | "source": [
128 | "import tempfile\n",
129 | "def download_image(url):\n",
130 | " response = requests.get(url, stream=True)\n",
131 | " if response.status_code == 200:\n",
132 | " with tempfile.NamedTemporaryFile(delete=False) as f:\n",
133 | " for chunk in response.iter_content(2048):\n",
134 | " f.write(chunk)\n",
135 | " return f.name\n",
136 | " else:\n",
137 | " raise Exception(\"Unable to download image: {}\".format(response.status_code))"
138 | ]
139 | },
140 | {
141 | "cell_type": "markdown",
142 | "metadata": {},
143 | "source": [
144 | "## Decode an image into a tensor"
145 | ]
146 | },
147 | {
148 | "cell_type": "code",
149 | "execution_count": 36,
150 | "metadata": {},
151 | "outputs": [],
152 | "source": [
153 | "# decode a given image into a tensor\n",
154 | "def read_tensor_from_image_file(model, file_name):\n",
155 | " file_reader = tf.read_file(file_name, \"file_reader\")\n",
156 | " if file_name.endswith(\".png\"):\n",
157 | " image_reader = tf.image.decode_png(file_reader, channels = 3,name='png_reader')\n",
158 | " elif file_name.endswith(\".gif\"):\n",
159 | " image_reader = tf.squeeze(tf.image.decode_gif(file_reader,name='gif_reader'))\n",
160 | " elif file_name.endswith(\".bmp\"):\n",
161 | " image_reader = tf.image.decode_bmp(file_reader, name='bmp_reader')\n",
162 | " else:\n",
163 | " image_reader = tf.image.decode_jpeg(file_reader, channels = 3, name='jpeg_reader')\n",
164 | " float_caster = tf.cast(image_reader, tf.float32)\n",
165 | " dims_expander = tf.expand_dims(float_caster, 0);\n",
166 | " \n",
167 | " # Read some info from the model metadata, providing default values\n",
168 | " input_height = get_model_attribute(model, \"input_height\", 224)\n",
169 | " input_width = get_model_attribute(model, \"input_width\", 224)\n",
170 | " input_mean = get_model_attribute(model, \"input_mean\", 0)\n",
171 | " input_std = get_model_attribute(model, \"input_std\", 255)\n",
172 | " \n",
173 | " resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width])\n",
174 | " normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std])\n",
175 | " sess = tf.Session()\n",
176 | " result = sess.run(normalized)\n",
177 | " return result"
178 | ]
179 | },
180 | {
181 | "cell_type": "markdown",
182 | "metadata": {},
183 | "source": [
184 | "## Score_image method that run the model and return the top 5 candidate answers"
185 | ]
186 | },
187 | {
188 | "cell_type": "code",
189 | "execution_count": 37,
190 | "metadata": {},
191 | "outputs": [],
192 | "source": [
193 | "import numpy as np\n",
194 | "\n",
195 | "# classify an image given its url\n",
196 | "def score_image(graph, model, url):\n",
197 | " # Get the input and output layer from the model\n",
198 | " input_layer = get_model_attribute(model, \"input_layer\", \"input\")\n",
199 | " output_layer = get_model_attribute(model, \"output_layer\")\n",
200 | " \n",
201 | " # Download the image and build a tensor from its data\n",
202 | " t = read_tensor_from_image_file(model, download_image(url))\n",
203 | " \n",
204 | " # Retrieve the tensors corresponding to the input and output layers\n",
205 | " input_tensor = graph.get_tensor_by_name(\"import/\" + input_layer + \":0\");\n",
206 | " output_tensor = graph.get_tensor_by_name(\"import/\" + output_layer + \":0\");\n",
207 | "\n",
208 | " with tf.Session(graph=graph) as sess:\n",
209 | " # Execute the output, overriding the input tensor with the one corresponding\n",
210 | " # to the image in the feed_dict argument\n",
211 | " results = sess.run(output_tensor, {input_tensor: t})\n",
212 | " results = np.squeeze(results)\n",
213 | " # select the top 5 candidate and match them to the labels\n",
214 | " top_k = results.argsort()[-5:][::-1]\n",
215 | " labels = load_labels(model)\n",
216 | " return [(labels[i].split(\":\")[1], results[i]) for i in top_k]"
217 | ]
218 | },
219 | {
220 | "cell_type": "markdown",
221 | "metadata": {},
222 | "source": [
223 | "## Test the model using a Flickr page"
224 | ]
225 | },
226 | {
227 | "cell_type": "code",
228 | "execution_count": 41,
229 | "metadata": {},
230 | "outputs": [
231 | {
232 | "name": "stdout",
233 | "output_type": "stream",
234 | "text": [
235 | "Results for https://geo.yahoo.com/b?s=792600534: \n",
236 | "\t[('nail', 0.034935154), ('screw', 0.03144558), ('puck, hockey puck', 0.03032596), ('envelope', 0.0285034), ('Band Aid', 0.027891463)]\n",
237 | "Results for http://c1.staticflickr.com/6/5598/14934282524_344c84246b_n.jpg: \n",
238 | "\t[('Egyptian cat', 0.4644194), ('tiger cat', 0.1485573), ('tabby, tabby cat', 0.09759513), ('plastic bag', 0.03814263), ('Siamese cat, Siamese', 0.033892646)]\n",
239 | "Results for http://c1.staticflickr.com/4/3677/13545844805_170ec3746b_n.jpg: \n",
240 | "\t[('tabby, tabby cat', 0.7330132), ('Egyptian cat', 0.14256532), ('tiger cat', 0.11719289), ('plastic bag', 0.0028653105), ('bow tie, bow-tie, bowtie', 0.00082955)]\n",
241 | "Results for http://c1.staticflickr.com/6/5170/5372754294_db6acaa1e5_n.jpg: \n",
242 | "\t[('Persian cat', 0.607673), ('Angora, Angora rabbit', 0.20204937), ('hamster', 0.02988311), ('Egyptian cat', 0.027227053), ('lynx, catamount', 0.018035706)]\n",
243 | "Results for http://c1.staticflickr.com/6/5589/14818641818_b0058c0cfc_m.jpg: \n",
244 | "\t[('Egyptian cat', 0.5786173), ('tabby, tabby cat', 0.27942237), ('tiger cat', 0.11966114), ('lynx, catamount', 0.016066141), ('plastic bag', 0.002206809)]\n",
245 | "Results for http://c1.staticflickr.com/6/5036/5881933297_7974eaff82_n.jpg: \n",
246 | "\t[('tiger cat', 0.26617262), ('tabby, tabby cat', 0.2417825), ('Persian cat', 0.18471399), ('lynx, catamount', 0.11543496), ('Egyptian cat', 0.025188642)]\n",
247 | "Results for http://c1.staticflickr.com/3/2602/3977203168_b9d02a0233.jpg: \n",
248 | "\t[('tabby, tabby cat', 0.75482476), ('tiger cat', 0.13780454), ('Egyptian cat', 0.05675489), ('Siamese cat, Siamese', 0.02073992), ('lynx, catamount', 0.010187127)]\n",
249 | "Results for http://c1.staticflickr.com/8/7401/16393044637_72e93d96b6_n.jpg: \n",
250 | "\t[('Egyptian cat', 0.67294717), ('tiger cat', 0.18149199), ('tabby, tabby cat', 0.0952419), ('lynx, catamount', 0.025225954), ('candle, taper, wax light', 0.003860443)]\n",
251 | "Results for http://c1.staticflickr.com/9/8110/8594699278_dd256c10fd_m.jpg: \n",
252 | "\t[('tabby, tabby cat', 0.5829553), ('Egyptian cat', 0.15930973), ('tiger cat', 0.12964381), ('lynx, catamount', 0.11114485), ('plastic bag', 0.006467772)]\n",
253 | "Results for http://c1.staticflickr.com/8/7023/6581178955_7e23af8bf9_m.jpg: \n",
254 | "\t[('tabby, tabby cat', 0.28574014), ('Egyptian cat', 0.190615), ('plastic bag', 0.17165014), ('lynx, catamount', 0.101593874), ('tiger cat', 0.040527806)]\n",
255 | "Results for http://c1.staticflickr.com/8/7313/9775005856_9b5e0ebe16_n.jpg: \n",
256 | "\t[('tiger cat', 0.40977326), ('tabby, tabby cat', 0.31697693), ('Egyptian cat', 0.16972947), ('lynx, catamount', 0.059500016), ('washer, automatic washer, washing machine', 0.0046033794)]\n",
257 | "Results for http://c1.staticflickr.com/8/7496/16236770082_205f4e358f_n.jpg: \n",
258 | "\t[('Egyptian cat', 0.40310237), ('Siamese cat, Siamese', 0.23720524), ('tiger cat', 0.100198396), ('tabby, tabby cat', 0.08537914), ('plastic bag', 0.0352822)]\n",
259 | "Results for http://c1.staticflickr.com/8/7049/13244364473_7b71bc5a4f_n.jpg: \n",
260 | "\t[('Egyptian cat', 0.59387493), ('candle, taper, wax light', 0.057717346), ('paper towel', 0.046397187), ('plastic bag', 0.035106137), ('tabby, tabby cat', 0.018382242)]\n",
261 | "Results for http://c1.staticflickr.com/4/3753/9837176706_9ecc1cddac_n.jpg: \n",
262 | "\t[('tabby, tabby cat', 0.55699265), ('Egyptian cat', 0.19758604), ('tiger cat', 0.12088148), ('lynx, catamount', 0.057880934), ('plastic bag', 0.01653284)]\n",
263 | "Results for http://c1.staticflickr.com/4/3488/4051998735_5b4863ac11_m.jpg: \n",
264 | "\t[('Egyptian cat', 0.5310361), ('tabby, tabby cat', 0.26919606), ('tiger cat', 0.13531871), ('lynx, catamount', 0.050503224), ('washer, automatic washer, washing machine', 0.0053878534)]\n",
265 | "Results for http://c1.staticflickr.com/9/8335/8086459588_46aae939c8.jpg: \n",
266 | "\t[('Siamese cat, Siamese', 0.827261), ('mouse, computer mouse', 0.046974737), ('screen, CRT screen', 0.029382586), ('carton', 0.0076049017), ('lynx, catamount', 0.0067297667)]\n",
267 | "Results for http://c1.staticflickr.com/8/7472/16230028882_c03cd6f2cc_n.jpg: \n",
268 | "\t[('tiger cat', 0.5394526), ('lynx, catamount', 0.14366476), ('Egyptian cat', 0.10943988), ('red fox, Vulpes vulpes', 0.07641454), ('tabby, tabby cat', 0.034076575)]\n",
269 | "Results for http://c1.staticflickr.com/4/3940/15504684310_f555c88915_n.jpg: \n",
270 | "\t[('tabby, tabby cat', 0.49280357), ('Egyptian cat', 0.31668788), ('tiger cat', 0.12977621), ('lynx, catamount', 0.022205332), ('plastic bag', 0.008769177)]\n",
271 | "Results for http://c1.staticflickr.com/9/8630/16556634997_ef0f9dd5f1_n.jpg: \n",
272 | "\t[('West Highland white terrier', 0.8534684), ('Angora, Angora rabbit', 0.038167812), ('Samoyed, Samoyede', 0.024762549), ('Scotch terrier, Scottish terrier, Scottie', 0.01685713), ('Persian cat', 0.01484343)]\n",
273 | "Results for http://c1.staticflickr.com/6/5226/5674849391_824822628c_n.jpg: \n",
274 | "\t[('tiger cat', 0.45084468), ('tabby, tabby cat', 0.40245533), ('Egyptian cat', 0.11048719), ('lynx, catamount', 0.024745336), ('tiger, Panthera tigris', 0.0064596836)]\n",
275 | "Results for http://c1.staticflickr.com/3/2234/1704658865_3b982b56cf_m.jpg: \n",
276 | "\t[('Angora, Angora rabbit', 0.21852449), ('Egyptian cat', 0.19025268), ('tabby, tabby cat', 0.14283349), ('Persian cat', 0.085699804), ('tiger cat', 0.06147669)]\n",
277 | "Results for http://c1.staticflickr.com/2/1361/5110233061_aa3b1c47ef_n.jpg: \n",
278 | "\t[('tabby, tabby cat', 0.6095775), ('tiger cat', 0.24819912), ('Egyptian cat', 0.13453156), ('lynx, catamount', 0.0021140918), ('carton', 0.0015312452)]\n",
279 | "Results for http://c1.staticflickr.com/4/3294/2434900370_17c1221ccf_n.jpg: \n",
280 | "\t[('Egyptian cat', 0.4372107), ('tabby, tabby cat', 0.26445335), ('tiger cat', 0.13057052), ('bow tie, bow-tie, bowtie', 0.06754344), ('lynx, catamount', 0.037636597)]\n",
281 | "Results for http://c1.staticflickr.com/3/2858/12174748174_27491cde33_n.jpg: \n",
282 | "\t[('tiger cat', 0.4069278), ('tabby, tabby cat', 0.23834446), ('Egyptian cat', 0.23789576), ('lynx, catamount', 0.11284405), ('tiger, Panthera tigris', 0.0008611009)]\n",
283 | "Results for http://c1.staticflickr.com/4/3674/13336301695_1cab4f5c85_n.jpg: \n",
284 | "\t[('weasel', 0.25950897), ('black-footed ferret, ferret, Mustela nigripes', 0.1795659), ('polecat, fitch, foulmart, foumart, Mustela putorius', 0.15248777), ('mink', 0.07626065), ('Egyptian cat', 0.04768039)]\n"
285 | ]
286 | }
287 | ],
288 | "source": [
289 | "model = models['mobilenet']\n",
290 | "graph = load_graph(model)\n",
291 | "image_urls = get_image_urls(\"https://www.flickr.com/search/?text=cats\")\n",
292 | "for url in image_urls:\n",
293 | " results = score_image(graph, model, url)\n",
294 | " print(\"Results for {}: \\n\\t{}\".format(url, results))"
295 | ]
296 | },
297 | {
298 | "cell_type": "code",
299 | "execution_count": null,
300 | "metadata": {},
301 | "outputs": [],
302 | "source": []
303 | }
304 | ],
305 | "metadata": {
306 | "celltoolbar": "Edit Metadata",
307 | "kernelspec": {
308 | "display_name": "Python 3",
309 | "language": "python",
310 | "name": "python3"
311 | },
312 | "language_info": {
313 | "codemirror_mode": {
314 | "name": "ipython",
315 | "version": 3
316 | },
317 | "file_extension": ".py",
318 | "mimetype": "text/x-python",
319 | "name": "python",
320 | "nbconvert_exporter": "python",
321 | "pygments_lexer": "ipython3",
322 | "version": "3.5.4"
323 | }
324 | },
325 | "nbformat": 4,
326 | "nbformat_minor": 2
327 | }
328 |
--------------------------------------------------------------------------------
/chapter 6/Tensorflow VR Part 2.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# TensorFlow Visual Recognition Sample Application Part 2\n",
8 | "\n",
9 | "## Provide a User Interface with a PixieApp\n",
10 | "\n",
11 | "## Define the model metadata"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 1,
17 | "metadata": {
18 | "pixiedust": {
19 | "displayParams": {}
20 | }
21 | },
22 | "outputs": [
23 | {
24 | "name": "stderr",
25 | "output_type": "stream",
26 | "text": [
27 | "/Users/dtaieb/anaconda/envs/dashboard/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: compiletime version 3.6 of module 'tensorflow.python.framework.fast_tensor_util' does not match runtime version 3.5\n",
28 | " return f(*args, **kwds)\n"
29 | ]
30 | }
31 | ],
32 | "source": [
33 | "import tensorflow as tf\n",
34 | "import requests\n",
35 | "models = {\n",
36 | " \"mobilenet\": {\n",
37 | " \"base_url\":\"https://github.com/DTAIEB/Thoughtful-Data-Science/raw/master/chapter%206/Visual%20Recognition/mobilenet_v1_0.50_224\",\n",
38 | " \"model_file_url\": \"frozen_graph.pb\",\n",
39 | " \"label_file\": \"labels.txt\",\n",
40 | " \"output_layer\": \"MobilenetV1/Predictions/Softmax\"\n",
41 | " }\n",
42 | "}\n",
43 | "\n",
44 | "# helper method for reading attributes from the model metadata\n",
45 | "def get_model_attribute(model, key, default_value = None):\n",
46 | " if key not in model:\n",
47 | " if default_value is None:\n",
48 | " raise Exception(\"Require model attribute {} not found\".format(key))\n",
49 | " return default_value\n",
50 | " return model[key]"
51 | ]
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "metadata": {},
56 | "source": [
57 | "## Helper methods for loading the graph and labels for a given model"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": 2,
63 | "metadata": {},
64 | "outputs": [],
65 | "source": [
66 | "# Helper method for resolving url relative to the selected model\n",
67 | "def get_url(model, path):\n",
68 | " return model[\"base_url\"] + \"/\" + path\n",
69 | " \n",
70 | "# Download the serialized model and create a TensorFlow graph\n",
71 | "def load_graph(model):\n",
72 | " graph = tf.Graph()\n",
73 | " graph_def = tf.GraphDef()\n",
74 | " graph_def.ParseFromString(\n",
75 | " requests.get( get_url( model, model[\"model_file_url\"] ) ).content\n",
76 | " )\n",
77 | " with graph.as_default():\n",
78 | " tf.import_graph_def(graph_def)\n",
79 | " return graph\n",
80 | "\n",
81 | "# Load the labels\n",
82 | "def load_labels(model, as_json = False):\n",
83 | " labels = [line.rstrip() \\\n",
84 | " for line in requests.get( get_url( model, model[\"label_file\"] ) ).text.split(\"\\n\") \\\n",
85 | " if line != \"\"]\n",
86 | " if as_json:\n",
87 | " return [{\"index\": item.split(\":\")[0], \"label\" : item.split(\":\")[1]} for item in labels]\n",
88 | " return labels"
89 | ]
90 | },
91 | {
92 | "cell_type": "markdown",
93 | "metadata": {},
94 | "source": [
95 | "## Use BeautifulSoup to scrape the images from a given url"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 3,
101 | "metadata": {},
102 | "outputs": [],
103 | "source": [
104 | "from bs4 import BeautifulSoup as BS\n",
105 | "import re\n",
106 | "\n",
107 | "# return an array of all the images scraped from an html page\n",
108 | "def get_image_urls(url):\n",
109 | " # Instantiate a BeautifulSoup parser\n",
110 | " soup = BS(requests.get(url).text, \"html.parser\")\n",
111 | " \n",
112 | " # Local helper method for extracting url\n",
113 | " def extract_url(val):\n",
114 | " m = re.match(r\"url\\((.*)\\)\", val)\n",
115 | " val = m.group(1) if m is not None else val\n",
116 | " return \"http:\" + val if val.startswith(\"//\") else val\n",
117 | " \n",
118 | " # List comprehension that look for
elements and backgroud-image styles\n",
119 | " return [extract_url(imgtag['src']) for imgtag in soup.find_all('img')] + [ \\\n",
120 | " extract_url(val.strip()) for key,val in \\\n",
121 | " [tuple(selector.split(\":\")) for elt in soup.select(\"[style]\") \\\n",
122 | " for selector in elt[\"style\"].strip(\" ;\").split(\";\")] \\\n",
123 | " if key.strip().lower()=='background-image' \\\n",
124 | " ]"
125 | ]
126 | },
127 | {
128 | "cell_type": "markdown",
129 | "metadata": {},
130 | "source": [
131 | "## Helper method for downloading an image into a temp file"
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": 4,
137 | "metadata": {},
138 | "outputs": [],
139 | "source": [
140 | "import tempfile\n",
141 | "def download_image(url):\n",
142 | " response = requests.get(url, stream=True)\n",
143 | " if response.status_code == 200:\n",
144 | " with tempfile.NamedTemporaryFile(delete=False) as f:\n",
145 | " for chunk in response.iter_content(2048):\n",
146 | " f.write(chunk)\n",
147 | " return f.name\n",
148 | " else:\n",
149 | " raise Exception(\"Unable to download image: {}\".format(response.status_code))"
150 | ]
151 | },
152 | {
153 | "cell_type": "markdown",
154 | "metadata": {},
155 | "source": [
156 | "## Decode an image into a tensor"
157 | ]
158 | },
159 | {
160 | "cell_type": "code",
161 | "execution_count": 5,
162 | "metadata": {},
163 | "outputs": [],
164 | "source": [
165 | "# decode a given image into a tensor\n",
166 | "def read_tensor_from_image_file(model, file_name):\n",
167 | " file_reader = tf.read_file(file_name, \"file_reader\")\n",
168 | " if file_name.endswith(\".png\"):\n",
169 | " image_reader = tf.image.decode_png(file_reader, channels = 3,name='png_reader')\n",
170 | " elif file_name.endswith(\".gif\"):\n",
171 | " image_reader = tf.squeeze(tf.image.decode_gif(file_reader,name='gif_reader'))\n",
172 | " elif file_name.endswith(\".bmp\"):\n",
173 | " image_reader = tf.image.decode_bmp(file_reader, name='bmp_reader')\n",
174 | " else:\n",
175 | " image_reader = tf.image.decode_jpeg(file_reader, channels = 3, name='jpeg_reader')\n",
176 | " float_caster = tf.cast(image_reader, tf.float32)\n",
177 | " dims_expander = tf.expand_dims(float_caster, 0);\n",
178 | " \n",
179 | " # Read some info from the model metadata, providing default values\n",
180 | " input_height = get_model_attribute(model, \"input_height\", 224)\n",
181 | " input_width = get_model_attribute(model, \"input_width\", 224)\n",
182 | " input_mean = get_model_attribute(model, \"input_mean\", 0)\n",
183 | " input_std = get_model_attribute(model, \"input_std\", 255)\n",
184 | " \n",
185 | " resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width])\n",
186 | " normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std])\n",
187 | " sess = tf.Session()\n",
188 | " result = sess.run(normalized)\n",
189 | " return result"
190 | ]
191 | },
192 | {
193 | "cell_type": "markdown",
194 | "metadata": {},
195 | "source": [
196 | "## Score_image method that run the model and return the top 5 candidate answers"
197 | ]
198 | },
199 | {
200 | "cell_type": "code",
201 | "execution_count": 6,
202 | "metadata": {},
203 | "outputs": [],
204 | "source": [
205 | "import numpy as np\n",
206 | "\n",
207 | "# classify an image given its url\n",
208 | "def score_image(graph, model, url):\n",
209 | " # Get the input and output layer from the model\n",
210 | " input_layer = get_model_attribute(model, \"input_layer\", \"input\")\n",
211 | " output_layer = get_model_attribute(model, \"output_layer\")\n",
212 | " \n",
213 | " # Download the image and build a tensor from its data\n",
214 | " t = read_tensor_from_image_file(model, download_image(url))\n",
215 | " \n",
216 | " # Retrieve the tensors corresponding to the input and output layers\n",
217 | " input_tensor = graph.get_tensor_by_name(\"import/\" + input_layer + \":0\");\n",
218 | " output_tensor = graph.get_tensor_by_name(\"import/\" + output_layer + \":0\");\n",
219 | "\n",
220 | " with tf.Session(graph=graph) as sess:\n",
221 | " # Execute the output, overriding the input tensor with the one corresponding\n",
222 | " # to the image in the feed_dict argument\n",
223 | " results = sess.run(output_tensor, {input_tensor: t})\n",
224 | " results = np.squeeze(results)\n",
225 | " # select the top 5 candidate and match them to the labels\n",
226 | " top_k = results.argsort()[-5:][::-1]\n",
227 | " labels = load_labels(model)\n",
228 | " return [(labels[i].split(\":\")[1], results[i]) for i in top_k]"
229 | ]
230 | },
231 | {
232 | "cell_type": "markdown",
233 | "metadata": {},
234 | "source": [
235 | "## PixieApp with the following screens:\n",
236 | "1. Ask the user for a url to a web page\n",
237 | "2. Display the images with top 5 candidate classifications"
238 | ]
239 | },
240 | {
241 | "cell_type": "code",
242 | "execution_count": 9,
243 | "metadata": {
244 | "pixiedust": {
245 | "displayParams": {}
246 | }
247 | },
248 | "outputs": [
249 | {
250 | "data": {
251 | "text/html": [
252 | "Hey, there's something awesome here! To see it, open this notebook outside GitHub, in a viewer like Jupyter"
253 | ],
254 | "text/plain": [
255 | ""
256 | ]
257 | },
258 | "metadata": {
259 | "pixieapp_metadata": null
260 | },
261 | "output_type": "display_data"
262 | }
263 | ],
264 | "source": [
265 | "from pixiedust.display.app import *\n",
266 | "\n",
267 | "@PixieApp\n",
268 | "class ScoreImageApp():\n",
269 | " def setup(self):\n",
270 | " self.model = models[\"mobilenet\"]\n",
271 | " self.graph = load_graph( self.model )\n",
272 | "\n",
273 | " @route()\n",
274 | " def main_screen(self):\n",
275 | " return \"\"\"\n",
276 | "\n",
284 | "\n",
285 | " \n",
286 | " \n",
287 | " \n",
288 | " \n",
291 | " \n",
292 | " \n",
293 | " \n",
294 | " \n",
295 | " \n",
296 | " \n",
297 | "\"\"\"\n",
298 | " \n",
299 | " @route(image_url=\"*\")\n",
300 | " @templateArgs\n",
301 | " def do_process_url(self, image_url):\n",
302 | " image_urls = get_image_urls(image_url)\n",
303 | " return \"\"\"\n",
304 | "\n",
305 | "{%for url in image_urls%}\n",
306 | "\n",
307 | "
\n",
308 | "\n",
309 | "\n",
310 | "{%endfor%}\n",
311 | "\n",
312 | "
\n",
313 | " \"\"\"\n",
314 | " \n",
315 | " @route(score_url=\"*\")\n",
316 | " @templateArgs\n",
317 | " def do_score_url(self, score_url):\n",
318 | " results = score_image(self.graph, self.model, score_url)\n",
319 | " return \"\"\"\n",
320 | "\n",
321 | "{%for label, confidence in results%}\n",
322 | "- {{label}}: {{confidence}}
\n",
323 | "{%endfor%}\n",
324 | "
\n",
325 | "\"\"\"\n",
326 | " \n",
327 | "app = ScoreImageApp()\n",
328 | "app.run()"
329 | ]
330 | },
331 | {
332 | "cell_type": "code",
333 | "execution_count": null,
334 | "metadata": {},
335 | "outputs": [],
336 | "source": []
337 | }
338 | ],
339 | "metadata": {
340 | "celltoolbar": "Edit Metadata",
341 | "kernelspec": {
342 | "display_name": "Python 3",
343 | "language": "python",
344 | "name": "python3"
345 | },
346 | "language_info": {
347 | "codemirror_mode": {
348 | "name": "ipython",
349 | "version": 3
350 | },
351 | "file_extension": ".py",
352 | "mimetype": "text/x-python",
353 | "name": "python",
354 | "nbconvert_exporter": "python",
355 | "pygments_lexer": "ipython3",
356 | "version": "3.5.4"
357 | }
358 | },
359 | "nbformat": 4,
360 | "nbformat_minor": 2
361 | }
362 |
--------------------------------------------------------------------------------
/chapter 6/Visual Recognition/mobilenet_v1_0.50_224/frozen_graph.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DTAIEB/Thoughtful-Data-Science/8b80e8f3e33b6fdc6672ecee1f27e0b983b28241/chapter 6/Visual Recognition/mobilenet_v1_0.50_224/frozen_graph.pb
--------------------------------------------------------------------------------
/chapter 6/Visual Recognition/mobilenet_v1_0.50_224/quantized_graph.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DTAIEB/Thoughtful-Data-Science/8b80e8f3e33b6fdc6672ecee1f27e0b983b28241/chapter 6/Visual Recognition/mobilenet_v1_0.50_224/quantized_graph.pb
--------------------------------------------------------------------------------
/chapter 6/sampleCode1.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | x_input = tf.placeholder(tf.float32)
3 | y_output = tf.placeholder(tf.float32)
4 | eps = 0.01
5 | W1 = tf.Variable(tf.random_uniform([2,2], -eps, eps))
6 | W2 = tf.Variable(tf.random_uniform([2,1], -eps, eps))
7 | layer1 = tf.sigmoid(tf.matmul(x_input, W1))
8 | output_layer = tf.sigmoid(tf.matmul(layer1, W2))
9 | cost = tf.reduce_mean(tf.square(y_output - output_layer))
10 | train = tf.train.GradientDescentOptimizer(0.05).minimize(cost)
11 | training_data = ([[0,0],[0,1],[1,0],[1,1]], [[0],[1],[1],[0]])
12 | with tf.Session() as sess:
13 | sess.run(tf.global_variables_initializer())
14 | for i in range(5000):
15 | sess.run(train, feed_dict={x_input: training_data[0], y_output: training_data[1]})
16 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode10.py:
--------------------------------------------------------------------------------
1 | # Load the labels
2 | def load_labels(model, as_json = False):
3 | labels = [line.rstrip() \
4 | for line in requests.get(get_url(model, model["label_file"]) ).text.split("\n") if line != ""]
5 | if as_json:
6 | return [{"index": item.split(":")[0],"label":item.split(":")[1]} for item in labels]
7 | return labels
8 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode11.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup as BS
2 | import re
3 |
4 | # return an array of all the images scraped from an html page
5 | def get_image_urls(url):
6 | # Instantiate a BeautifulSoup parser
7 | soup = BS(requests.get(url).text, "html.parser")
8 |
9 | # Local helper method for extracting url
10 | def extract_url(val):
11 | m = re.match(r"url\((.*)\)", val)
12 | val = m.group(1) if m is not None else val
13 | return "http:" + val if val.startswith("//") else val
14 |
15 | # List comprehension that look for
elements and backgroud-image styles
16 | return [extract_url(imgtag['src']) for imgtag in soup.find_all('img')] + [ \
17 | extract_url(val.strip()) for key,val in \
18 | [tuple(selector.split(":")) for elt in soup.select("[style]") \
19 | for selector in elt["style"].strip(" ;").split(";")] \
20 | if key.strip().lower()=='background-image' \
21 | ]
22 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode12.py:
--------------------------------------------------------------------------------
1 | import tempfile
2 | def download_image(url):
3 | response = requests.get(url, stream=True)
4 | if response.status_code == 200:
5 | with tempfile.NamedTemporaryFile(delete=False) as f:
6 | for chunk in response.iter_content(2048):
7 | f.write(chunk)
8 | return f.name
9 | else:
10 | raise Exception("Unable to download image: {}".format(response.status_code))
11 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode13.py:
--------------------------------------------------------------------------------
1 | # decode a given image into a tensor
2 | def read_tensor_from_image_file(model, file_name):
3 | file_reader = tf.read_file(file_name, "file_reader")
4 | if file_name.endswith(".png"):
5 | image_reader = tf.image.decode_png(file_reader, channels = 3,name='png_reader')
6 | elif file_name.endswith(".gif"):
7 | image_reader = tf.squeeze(tf.image.decode_gif(file_reader,name='gif_reader'))
8 | elif file_name.endswith(".bmp"):
9 | image_reader = tf.image.decode_bmp(file_reader, name='bmp_reader')
10 | else:
11 | image_reader = tf.image.decode_jpeg(file_reader, channels = 3, name='jpeg_reader')
12 | float_caster = tf.cast(image_reader, tf.float32)
13 | dims_expander = tf.expand_dims(float_caster, 0);
14 |
15 | # Read some info from the model metadata, providing default values
16 | input_height = get_model_attribute(model, "input_height", 224)
17 | input_width = get_model_attribute(model, "input_width", 224)
18 | input_mean = get_model_attribute(model, "input_mean", 0)
19 | input_std = get_model_attribute(model, "input_std", 255)
20 |
21 | resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width])
22 | normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std])
23 | sess = tf.Session()
24 | result = sess.run(normalized)
25 | return result
26 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode14.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | # classify an image given its url
4 | def score_image(graph, model, url):
5 | # Get the input and output layer from the model
6 | input_layer = get_model_attribute(model, "input_layer", "input")
7 | output_layer = get_model_attribute(model, "output_layer")
8 |
9 | # Download the image and build a tensor from its data
10 | t = read_tensor_from_image_file(model, download_image(url))
11 |
12 | # Retrieve the tensors corresponding to the input and output layers
13 | input_tensor = graph.get_tensor_by_name("import/" + input_layer + ":0");
14 | output_tensor = graph.get_tensor_by_name("import/" + output_layer + ":0");
15 |
16 | with tf.Session(graph=graph) as sess:
17 | results = sess.run(output_tensor, {input_tensor: t})
18 | results = np.squeeze(results)
19 | # select the top 5 candidate and match them to the labels
20 | top_k = results.argsort()[-5:][::-1]
21 | labels = load_labels(model)
22 | return [(labels[i].split(":")[1], results[i]) for i in top_k]
23 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode15.py:
--------------------------------------------------------------------------------
1 | model = models['mobilenet']
2 | graph = load_graph(model)
3 | image_urls = get_image_urls("https://www.flickr.com/search/?text=cats")
4 | for url in image_urls:
5 | results = score_image(graph, model, url)
6 | print("Result for {}: \n\t{}".format(url, results))
7 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode16.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.app import *
2 |
3 | @PixieApp
4 | class ScoreImageApp():
5 | def setup(self):
6 | self.model = models["mobilenet"]
7 | self.graph = load_graph( self.model )
8 | …
9 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode17.py:
--------------------------------------------------------------------------------
1 | [[ScoreImageApp]]
2 | @route()
3 | def main_screen(self):
4 | return """
5 |
13 |
14 |
15 |
16 |
17 |
20 |
21 |
22 |
23 |
24 |
25 |
26 | """
27 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode18.py:
--------------------------------------------------------------------------------
1 | [[ScoreImageApp]]
2 | @route(image_url="*")
3 | @templateArgs
4 | def do_process_url(self, image_url):
5 | image_urls = get_image_urls(image_url)
6 | return """
7 |
8 | {%for url in image_urls%}
9 |
10 |
11 |
12 |
13 |
14 | {%endfor%}
15 |
16 |
17 | """
18 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode19.py:
--------------------------------------------------------------------------------
1 | [[ScoreImageApp]]
2 | @route(score_url="*")
3 | @templateArgs
4 | def do_score_url(self, score_url):
5 | results = score_image(self.graph, self.model, score_url)
6 | return """
7 |
8 | {%for label, confidence in results%}
9 | - {{label}}: {{confidence}}
10 | {%endfor%}
11 |
12 | """
13 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode2.py:
--------------------------------------------------------------------------------
1 | def do_training(train, train_labels, test, test_labels, num_classes):
2 | #set TensorFlow logging level to INFO
3 | tf.logging.set_verbosity(tf.logging.INFO)
4 |
5 | # Build 2 hidden layer DNN with 10, 10 units respectively.
6 | classifier = tf.estimator.DNNClassifier(
7 | # Compute feature_columns from dataframe keys using list comprehension
8 | feature_columns =
9 | [tf.feature_column.numeric_column(key=key) for key in train.keys()],
10 | hidden_units=[10, 10],
11 | n_classes=num_classes)
12 |
13 | # Train the Model
14 | classifier.train(
15 | input_fn=lambda:train_input_fn(train, train_labels,100),
16 | steps=1000
17 | )
18 |
19 | # Evaluate the model
20 | eval_result = classifier.evaluate(
21 | input_fn=lambda:eval_input_fn(test, test_labels,100)
22 | )
23 |
24 | return (classifier, eval_result)
25 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode20.py:
--------------------------------------------------------------------------------
1 | [[ImageRecoApp]]
2 | from pixiedust.apps.template import TemplateTabbedApp
3 | @PixieApp
4 | class ImageRecoApp(TemplateTabbedApp):
5 | def setup(self):
6 | self.apps = [
7 | {"title": "Score", "app_class": "ScoreImageApp"},
8 | {"title": "Model", "app_class": "TensorGraphApp"},
9 | {"title": "Labels", "app_class": "LabelsApp"}
10 | ]
11 | self.model = models["mobilenet"]
12 | self.graph = self.load_graph(self.model)
13 |
14 | app = ImageRecoApp()
15 | app.run()
16 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode21.py:
--------------------------------------------------------------------------------
1 | @PixieApp
2 | class TensorGraphApp():
3 | """Visualize TensorFlow graph."""
4 | def setup(self):
5 | self.graph = self.parent_pixieapp.graph
6 |
7 | @route()
8 | @templateArgs
9 | def main_screen(self):
10 | strip_def = self.strip_consts(self.graph.as_graph_def())
11 | code = """
12 |
17 |
18 |
19 |
20 |
21 | """.format(data=repr(str(strip_def)), id='graph'+ self.getPrefix()).replace('"', '"')
22 |
23 | return """
24 |
25 | """
26 |
27 | def strip_consts(self, graph_def, max_const_size=32):
28 | """Strip large constant values from graph_def."""
29 | strip_def = tf.GraphDef()
30 | for n0 in graph_def.node:
31 | n = strip_def.node.add()
32 | n.MergeFrom(n0)
33 | if n.op == 'Const':
34 | tensor = n.attr['value'].tensor
35 | size = len(tensor.tensor_content)
36 | if size > max_const_size:
37 | tensor.tensor_content = "".format(size).encode("UTF-8")
38 | return strip_def
39 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode22.py:
--------------------------------------------------------------------------------
1 | [[LabelsApp]]
2 | @PixieApp
3 | class LabelsApp():
4 | def setup(self):
5 | self.labels = self.parent_pixieapp.load_labels(
6 | self.parent_pixieapp.model, as_json=True
7 | )
8 |
9 | @route()
10 | def main_screen(self):
11 | return """
12 |
13 |
14 | {
15 | "table_noschema": "true",
16 | "handlerId": "tableView",
17 | "rowCount": "10000"
18 | }
19 |
20 |
21 | """
22 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode23.py:
--------------------------------------------------------------------------------
1 | import pandas
2 | wnid_to_urls = pandas.read_csv('/Users/dtaieb/Downloads/fall11_urls.txt', sep='\t', names=["wnid", "url"],
3 | header=0, error_bad_lines=False, warn_bad_lines=False, encoding="ISO-8859-1")
4 | wnid_to_urls['wnid'] = wnid_to_urls['wnid'].apply(lambda x: x.split("_")[0])
5 | wnid_to_urls = wnid_to_urls.dropna()
6 |
7 | wnid_to_words = pandas.read_csv('/Users/dtaieb/Downloads/words.txt', sep='\t', names=["wnid", "description"],
8 | header=0, error_bad_lines=False, warn_bad_lines=False, encoding="ISO-8859-1")
9 | wnid_to_words = wnid_to_words.dropna()
10 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode24.py:
--------------------------------------------------------------------------------
1 | def get_url_for_keywords(keywords):
2 | results = {}
3 | for keyword in keywords:
4 | df = wnid_to_words.loc[wnid_to_words['description'] == keyword]
5 | row_list = df['wnid'].values.tolist()
6 | descriptions = df['description'].values.tolist()
7 | if len(row_list) > 0:
8 | results[descriptions[0]] = wnid_to_urls.loc[wnid_to_urls['wnid'] == row_list[0]]["url"].values.tolist()
9 | return results
10 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode25.py:
--------------------------------------------------------------------------------
1 | from pixiedust.utils.environment import Environment
2 | root_dir = ensure_dir_exists(os.path.join(Environment.pixiedustHome, "imageRecoApp")
3 | image_dir = root_dir
4 | image_dict = get_url_for_keywords(["apple", "orange", "pear", "banana"])
5 | with open(os.path.join(image_dir, "retrained_label.txt"), "w") as f_label:
6 | for key in image_dict:
7 | f_label.write(key + "\n")
8 | path = ensure_dir_exists(os.path.join(image_dir, key))
9 | count = 0
10 | for url in image_dict[key]:
11 | download_image_into_dir(url, path)
12 | count += 1
13 | if count > 500:
14 | break;
15 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode26.py:
--------------------------------------------------------------------------------
1 | def add_jpeg_decoding(model):
2 | input_height = get_model_attribute(model, "input_height")
3 | input_width = get_model_attribute(model, "input_width")
4 | input_depth = get_model_attribute(model, "input_depth")
5 | input_mean = get_model_attribute(model, "input_mean", 0)
6 | input_std = get_model_attribute(model, "input_std", 255)
7 |
8 | jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput')
9 | decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth)
10 | decoded_image_as_float = tf.cast(decoded_image, dtype=tf.float32)
11 | decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)
12 | resize_shape = tf.stack([input_height, input_width])
13 | resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32)
14 | resized_image = tf.image.resize_bilinear(decoded_image_4d,
15 | resize_shape_as_int)
16 | offset_image = tf.subtract(resized_image, input_mean)
17 | mul_image = tf.multiply(offset_image, 1.0 / input_std)
18 | return jpeg_data, mul_image
19 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode27.py:
--------------------------------------------------------------------------------
1 | def run_bottleneck_on_image(sess, image_data, image_data_tensor,decoded_image_tensor,
2 | resized_input_tensor,bottleneck_tensor):
3 | # First decode the JPEG image, resize it, and rescale the pixel values.
4 | resized_input_values = sess.run(decoded_image_tensor,{image_data_tensor: image_data})
5 | # Then run it through the recognition network.
6 | bottleneck_values = sess.run(bottleneck_tensor,{resized_input_tensor: resized_input_values})
7 | bottleneck_values = np.squeeze(bottleneck_values)
8 | return bottleneck_values
9 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode28.py:
--------------------------------------------------------------------------------
1 | [[TensorGraphApp]]
2 | return """
3 | {%if this.custom_graph%}
4 |
5 |
6 | self.graph = self.custom_graph if self.graph is not self.custom_graph else self.parent_pixieapp.graph
7 |
8 | Select a model to display:
9 |
13 | {%endif%}
14 |
15 | """
16 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode29.py:
--------------------------------------------------------------------------------
1 | with tf.name_scope('cross_entropy'):
2 | cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=ground_truth_input, logits=logits)
3 | with tf.name_scope('total'):
4 | cross_entropy_mean = tf.reduce_mean(cross_entropy)
5 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode3.py:
--------------------------------------------------------------------------------
1 | def input_fn(features, labels, batch_size, train):
2 | # Convert the inputs to a Dataset and shuffle.
3 | dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels)).shuffle(1000)
4 | if train:
5 | #repeat only for training
6 | dataset = dataset.repeat()
7 | # Return the dataset in batch
8 | return dataset.batch(batch_size)
9 |
10 | def train_input_fn(features, labels, batch_size):
11 | return input_fn(features, labels, batch_size, train=True)
12 |
13 | def eval_input_fn(features, labels, batch_size):
14 | return input_fn(features, labels, batch_size, train=False)
15 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode30.py:
--------------------------------------------------------------------------------
1 | [[LabelsApp]]
2 | @PixieApp
3 | class LabelsApp():
4 | def setup(self):
5 | …
6 |
7 | @route()
8 | def main_screen(self):
9 | return """
10 | {%if this.custom_labels%}
11 |
12 |
13 | self.current_labels = self.custom_labels if self.current_labels is not self.custom_labels else self.labels
14 |
15 | Select a model to display:
16 |
20 | {%endif%}
21 |
22 |
23 | {
24 | "table_noschema": "true",
25 | "handlerId": "tableView",
26 | "rowCount": "10000",
27 | "noChartCache": "true"
28 |
29 | }
30 |
31 |
32 | """
33 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode31.py:
--------------------------------------------------------------------------------
1 | # classify an image given its url
2 | def score_image(graph, model, url):
3 | # Download the image and build a tensor from its data
4 | t = read_tensor_from_image_file(model, download_image(url))
5 |
6 | def do_score_image(graph, output_layer, labels):
7 | # Retrieve the tensors corresponding to the input and output layers
8 | input_tensor = graph.get_tensor_by_name("import/" + input_layer + ":0");
9 | output_tensor = graph.get_tensor_by_name( output_layer + ":0");
10 |
11 | with tf.Session(graph=graph) as sess:
12 | # Initialize the variables
13 | sess.run(tf.global_variables_initializer())
14 | results = sess.run(output_tensor, {input_tensor: t})
15 | results = np.squeeze(results)
16 | # select the top 5 candidates and match them to the labels
17 | top_k = results.argsort()[-5:][::-1]
18 | return [(labels[i].split(":")[1], results[i]) for i in top_k]
19 |
20 | results = {}
21 | input_layer = get_model_attribute(model, "input_layer", "input")
22 | labels = load_labels(model)
23 | results["mobilenet"] = do_score_image(graph, "import/" + get_model_attribute(model, "output_layer"), labels)
24 | if "custom_graph" in model and "custom_labels" in model:
25 | with open(model["custom_labels"]) as f:
26 | labels = [line.rstrip() for line in f.readlines() if line != ""]
27 | custom_labels = ["{}:{}".format(i, label) for i,label in zip(range(len(labels)), labels)]
28 | results["custom"] = do_score_image(model["custom_graph"], "final_result", custom_labels)
29 | return results
30 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode32.py:
--------------------------------------------------------------------------------
1 | @route(score_url="*")
2 | @templateArgs
3 | def do_score_url(self, score_url):
4 | scores_dict = score_image(self.graph, self.model, score_url)
5 | return """
6 | {%for model, results in scores_dict.items()%}
7 | {{model}}
8 |
9 | {%for label, confidence in results%}
10 | - {{label}}: {{confidence}}
11 | {%endfor%}
12 |
13 | {%endfor%}
14 | """
15 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode4.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.app import *
2 | @PixieApp
3 | class SimpleClassificationDNN():
4 | @route()
5 | def main_screen(self):
6 | return """
7 |
8 | The classificiation model will be trained on all the numeric columns of the dataset
9 |
10 |
18 |
19 |
20 |
21 |
22 |
28 |
29 |
30 |
31 | """
32 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode5.py:
--------------------------------------------------------------------------------
1 | @route(predictor="*")
2 | @templateArgs
3 | def prepare_training(self, predictor):
4 | #select only numerical columns
5 | self.dataset = self.pixieapp_entity.dropna(axis=1).select_dtypes(
6 | include=['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
7 | )
8 | #Compute the number of classed by counting the groups
9 | self.num_classes = self.dataset.groupby(predictor).size().shape[0]
10 | #Create the train and test feature and labels
11 | self.train_x=self.dataset.sample(frac=0.8)
12 | self.full_train = self.train_x.copy()
13 | self.train_y = self.train_x.pop(predictor)
14 | self.test_x=self.dataset.drop(self.train_x.index)
15 | self.full_test = self.test_x.copy()
16 | self.test_y=self.test_x.pop(predictor)
17 |
18 | bar_chart_options = {
19 | "rowCount": "100",
20 | "keyFields": predictor,
21 | "handlerId": "barChart",
22 | "noChartCache": "true"
23 | }
24 |
25 | return """
26 |
27 |
28 |
29 | Train set class distribution
30 |
31 | {{bar_chart_options|tojson}}
32 |
33 |
34 |
35 | Test set class distribution
36 |
37 | {{bar_chart_options|tojson}}
38 |
39 |
40 |
41 |
42 |
43 |
44 |
47 |
48 | """
49 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode6.py:
--------------------------------------------------------------------------------
1 | @route(do_training="*")
2 | @captureOutput
3 | def do_training_screen(self):
4 | self.classifier, self.eval_results = \
5 | do_training(
6 | self.train_x, self.train_y, self.test_x, self.test_y, self.num_classes
7 | )
8 | return """
9 | Training completed successfully
10 |
11 |
12 | Metric
13 | Value
14 |
15 |
16 | {%for key,value in this.eval_results.items()%}
17 |
18 | {{key}}
19 | {{value}}
20 |
21 | {%endfor%}
22 |
23 |
24 | """
25 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode7.py:
--------------------------------------------------------------------------------
1 | models = {
2 | "mobilenet": {
3 | "base_url":"https://github.com/DTAIEB/Thoughtful-Data-Science/raw/master/chapter%206/Visual%20Recognition/mobilenet_v1_0.50_224",
4 | "model_file_url": "frozen_graph.pb",
5 | "label_file": "labels.txt",
6 | "output_layer": "MobilenetV1/Predictions/Softmax"
7 | }
8 | }
9 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode8.py:
--------------------------------------------------------------------------------
1 | # helper method for reading attributes from the model metadata
2 | def get_model_attribute(model, key, default_value = None):
3 | if key not in model:
4 | if default_value is None:
5 | raise Exception("Require model attribute {} not found".format(key))
6 | return default_value
7 | return model[key]
8 |
--------------------------------------------------------------------------------
/chapter 6/sampleCode9.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import requests
3 | # Helper method for resolving url relative to the selected model
4 | def get_url(model, path):
5 | return model["base_url"] + "/" + path
6 |
7 | # Download the serialized model and create a TensorFlow graph
8 | def load_graph(model):
9 | graph = tf.Graph()
10 | graph_def = tf.GraphDef()
11 | graph_def.ParseFromString(
12 | requests.get( get_url( model, model["model_file_url"] ) ).content
13 | )
14 | with graph.as_default():
15 | tf.import_graph_def(graph_def)
16 | return graph
17 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode1.py:
--------------------------------------------------------------------------------
1 | from six import iteritems
2 | import json
3 | import csv
4 | from tweepy.streaming import StreamListener
5 | class RawTweetsListener(StreamListener):
6 | def __init__(self):
7 | self.buffered_data = []
8 | self.counter = 0
9 |
10 | def flush_buffer_if_needed(self):
11 | "Check the buffer capacity and write to a new file if needed"
12 | length = len(self.buffered_data)
13 | if length > 0 and length % 10 == 0:
14 | with open(os.path.join( output_dir, "tweets{}.csv".format(self.counter)), "w") as fs:
15 | self.counter += 1
16 | csv_writer = csv.DictWriter( fs, fieldnames = fieldnames)
17 | for data in self.buffered_data:
18 | csv_writer.writerow(data)
19 | self.buffered_data = []
20 |
21 | def on_data(self, data):
22 | def transform(key, value):
23 | return transforms[key](value) if key in transforms else value
24 |
25 | self.buffered_data.append(
26 | {key:transform(key,value) \
27 | for key,value in iteritems(json.loads(data)) \
28 | if key in fieldnames}
29 | )
30 | self.flush_buffer_if_needed()
31 | return True
32 |
33 | def on_error(self, status):
34 | print("An error occured while receiving streaming data: {}".format(status))
35 | return False
36 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode10.py:
--------------------------------------------------------------------------------
1 | parquet_batch_df = spark.sql(
2 | "select * from parquet.`{}`".format(
3 | os.path.join(root_dir, "output_parquet")
4 | )
5 | )
6 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode11.py:
--------------------------------------------------------------------------------
1 | from watson_developer_cloud import NaturalLanguageUnderstandingV1
2 | from watson_developer_cloud.natural_language_understanding_v1 import Features, SentimentOptions, EntitiesOptions
3 |
4 | nlu = NaturalLanguageUnderstandingV1(
5 | version='2017-02-27',
6 | username='XXXX',
7 | password='XXXX'
8 | )
9 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode12.py:
--------------------------------------------------------------------------------
1 | [[RawTweetsListener]]
2 | def enrich(self, data):
3 | try:
4 | response = nlu.analyze(
5 | text = data['text'],
6 | features = Features(
7 | sentiment=SentimentOptions(),
8 | entities=EntitiesOptions()
9 | )
10 | )
11 | data["sentiment"] = response["sentiment"]["document"]["label"]
12 | top_entity = response["entities"][0] if len(response["entities"]) > 0 else None
13 | data["entity"] = top_entity["text"] if top_entity is not None else ""
14 | data["entity_type"] = top_entity["type"] if top_entity is not None else ""
15 | return data
16 | except Exception as e:
17 | self.warn("Error from Watson service while enriching data: {}".format(e))
18 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode13.py:
--------------------------------------------------------------------------------
1 | field_metadata = [
2 | {"name": "created_at","type": DateType()},
3 | {"name": "text", "type": StringType()},
4 | {"name": "source", "type": StringType(),
5 | "transform": lambda s: BS(s, "html.parser").text.strip()
6 | },
7 | {"name": "sentiment", "type": StringType()},
8 | {"name": "entity", "type": StringType()},
9 | {"name": "entity_type", "type": StringType()}
10 | ]
11 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode14.py:
--------------------------------------------------------------------------------
1 | def on_data(self, data):
2 | def transform(key, value):
3 | return transforms[key](value) if key in transforms else value
4 | data = self.enrich(json.loads(data))
5 | if data is not None:
6 | self.buffered_data.append(
7 | {key:transform(key,value) \
8 | for key,value in iteritems(data) \
9 | if key in fieldnames}
10 | )
11 | self.flush_buffer_if_needed()
12 | return True
13 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode15.py:
--------------------------------------------------------------------------------
1 | schema = StructType(
2 | [StructField(f["name"], f["type"], True) for f in field_metadata]
3 | )
4 | csv_sdf = spark.readStream \
5 | .csv(
6 | output_dir,
7 | schema=schema,
8 | multiLine = True,
9 | dateFormat = 'EEE MMM dd kk:mm:ss Z y',
10 | ignoreTrailingWhiteSpace = True,
11 | ignoreLeadingWhiteSpace = True
12 | )
13 | csv_sdf.printSchema()
14 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode16.py:
--------------------------------------------------------------------------------
1 | def start_stream(queries):
2 | "Asynchronously start a new Twitter stream"
3 | stream = Stream(auth, RawTweetsListener())
4 | stream.filter(track=queries, languages=["en"], async=True)
5 | return stream
6 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode17.py:
--------------------------------------------------------------------------------
1 | def start_streaming_dataframe(output_dir):
2 | "Start a Spark Streaming DataFrame from a file source"
3 | schema = StructType(
4 | [StructField(f["name"], f["type"], True) for f in field_metadata]
5 | )
6 | return spark.readStream \
7 | .csv(
8 | output_dir,
9 | schema=schema,
10 | multiLine = True,
11 | timestampFormat = 'EEE MMM dd kk:mm:ss Z yyyy',
12 | ignoreTrailingWhiteSpace = True,
13 | ignoreLeadingWhiteSpace = True
14 | )
15 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode18.py:
--------------------------------------------------------------------------------
1 | def start_parquet_streaming_query(csv_sdf):
2 | """
3 | Create and run a streaming query from a Structured DataFrame
4 | outputing the results into a parquet database
5 | """
6 | streaming_query = csv_sdf \
7 | .writeStream \
8 | .format("parquet") \
9 | .option("path", os.path.join(root_dir, "output_parquet")) \
10 | .trigger(processingTime="2 seconds") \
11 | .option("checkpointLocation", os.path.join(root_dir, "output_chkpt")) \
12 | .start()
13 | return streaming_query
14 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode19.py:
--------------------------------------------------------------------------------
1 | class StreamsManager():
2 | def __init__(self):
3 | self.twitter_stream = None
4 | self.csv_sdf = None
5 |
6 | def reset(self, search_query = None):
7 | if self.twitter_stream is not None:
8 | self.twitter_stream.disconnect()
9 | #stop all the active streaming queries and re_initialize the directories
10 | for query in spark.streams.active:
11 | query.stop()
12 | # initialize the directories
13 | self.root_dir, self.output_dir = init_output_dirs()
14 | # start the tweepy stream
15 | self.twitter_stream = start_stream([search_query]) if search_query is not None else None
16 | # start the spark streaming stream
17 | self.csv_sdf = start_streaming_dataframe(output_dir) if search_query is not None else None
18 |
19 | def __del__(self):
20 | # Automatically called when the class is garbage collected
21 | self.reset()
22 |
23 | streams_manager = StreamsManager()
24 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode2.py:
--------------------------------------------------------------------------------
1 | from pyspark.sql.types import StringType, DateType
2 | from bs4 import BeautifulSoup as BS
3 | fieldnames = [f["name"] for f in field_metadata]
4 | transforms = {item['name']:item['transform'] for item in field_metadata if "transform" in item}
5 | field_metadata = [
6 | {"name": "created_at","type": DateType()},
7 | {"name": "text", "type": StringType()},
8 | {"name": "source", "type": StringType(),
9 | "transform": lambda s: BS(s, "html.parser").text.strip()
10 | }
11 | ]
12 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode20.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.app import *
2 | @PixieApp
3 | class TweetInsightApp():
4 | @route()
5 | def main_screen(self):
6 | return """
7 |
15 |
16 |
17 |
18 |
19 |
22 |
23 |
26 |
27 |
28 |
29 |
30 | """
31 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode21.py:
--------------------------------------------------------------------------------
1 | import time
2 | [[TweetInsightApp]]
3 | @route(search_query="*")
4 | def do_search_query(self, search_query):
5 | streams_manager.reset(search_query)
6 | start_parquet_streaming_query(streams_manager.csv_sdf)
7 | while True:
8 | try:
9 | parquet_dir = os.path.join(root_dir, "output_parquet")
10 | self.parquet_df = spark.sql("select * from parquet.`{}`".format(parquet_dir))
11 | break
12 | except:
13 | time.sleep(5)
14 | return """
15 |
16 |
17 |
18 | print("Number of tweets received: {}".format(streams_manager.twitter_stream.listener.tweet_count))
19 |
20 |
21 |
22 |
23 |
25 |
26 |
27 |
28 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
38 |
39 |
40 |
41 | """
42 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode22.py:
--------------------------------------------------------------------------------
1 | [[TweetInsightApp]]
2 | @route(display_metric1="*")
3 | def do_display_metric1(self, display_metric1):
4 | parquet_dir = os.path.join(root_dir, "output_parquet")
5 | self.parquet_df = spark.sql("select * from parquet.`{}`".format(parquet_dir))
6 | return """
7 |
8 |
9 | {
10 | "legend": "true",
11 | "keyFields": "sentiment",
12 | "clusterby": "entity_type",
13 | "handlerId": "barChart",
14 | "rendererId": "bokeh",
15 | "rowCount": "10",
16 | "sortby": "Values DESC",
17 | "noChartCache": "true"
18 | }
19 |
20 |
21 | """
22 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode23.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from wordcloud import WordCloud
3 | [[TweetInsightApp]]
4 | @route(display_wc="*")
5 | @captureOutput
6 | def do_display_wc(self):
7 | text = "\n".join(
8 | [r['entity'] for r in self.parquet_df.select("entity").collect() if r['entity'] is not None]
9 | )
10 | plt.figure( figsize=(13,7) )
11 | plt.axis("off")
12 | plt.imshow(
13 | WordCloud(width=750, height=350).generate(text),
14 | interpolation='bilinear'
15 | )
16 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode24.py:
--------------------------------------------------------------------------------
1 | @PixieApp
2 | class StreamingQueriesApp():
3 | @route()
4 | def main_screen(self):
5 | return """
6 |
7 |
8 | """
9 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode25.py:
--------------------------------------------------------------------------------
1 | @route(show_progress="true")
2 | def do_show_progress(self):
3 | return """
4 | {%for query in this.spark.streams.active%}
5 |
6 |
7 | Progress Report for Spark Stream: {{query.id}}
8 |
9 |
10 |
11 |
12 | metric
13 | value
14 |
15 |
16 |
17 | {%for key, value in query.lastProgress.items()%}
18 |
19 | {{key}}
20 | {{value}}
21 |
22 | {%endfor%}
23 |
24 |
25 | {%endfor%}
26 | """
27 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode26.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.app import *
2 | from pixiedust.apps.template import TemplateTabbedApp
3 |
4 | @PixieApp
5 | class TwitterSentimentApp(TemplateTabbedApp):
6 | def setup(self):
7 | self.apps = [
8 | {"title": "Tweets Insights", "app_class": "TweetInsightApp"},
9 | {"title": "Streaming Queries", "app_class": "StreamingQueriesApp"}
10 | ]
11 |
12 | app = TwitterSentimentApp()
13 | app.run()
14 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode27.py:
--------------------------------------------------------------------------------
1 | message_hub_creds = {
2 | "instance_id": "XXXXX",
3 | "mqlight_lookup_url": "https://mqlight-lookup-prod02.messagehub.services.us-south.bluemix.net/Lookup?serviceId=XXXX",
4 | "api_key": "XXXX",
5 | "kafka_admin_url": "https://kafka-admin-prod02.messagehub.services.us-south.bluemix.net:443",
6 | "kafka_rest_url": "https://kafka-rest-prod02.messagehub.services.us-south.bluemix.net:443",
7 | "kafka_brokers_sasl": [
8 | "kafka03-prod02.messagehub.services.us-south.bluemix.net:9093",
9 | "kafka01-prod02.messagehub.services.us-south.bluemix.net:9093",
10 | "kafka02-prod02.messagehub.services.us-south.bluemix.net:9093",
11 | "kafka05-prod02.messagehub.services.us-south.bluemix.net:9093",
12 | "kafka04-prod02.messagehub.services.us-south.bluemix.net:9093"
13 | ],
14 | "user": "XXXX",
15 | "password": "XXXX"
16 | }
17 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode28.py:
--------------------------------------------------------------------------------
1 | [[RawTweetsListener]]
2 | context = ssl.create_default_context()
3 | context.options &= ssl.OP_NO_TLSv1
4 | context.options &= ssl.OP_NO_TLSv1_1
5 | kafka_conf = {
6 | 'sasl_mechanism': 'PLAIN',
7 | 'security_protocol': 'SASL_SSL',
8 | 'ssl_context': context,
9 | "bootstrap_servers": message_hub_creds["kafka_brokers_sasl"],
10 | "sasl_plain_username": message_hub_creds["user"],
11 | "sasl_plain_password": message_hub_creds["password"],
12 | "api_version":(0, 10, 1),
13 | "value_serializer" : lambda v: json.dumps(v).encode('utf-8')
14 | }
15 | self.producer = KafkaProducer(**kafka_conf)
16 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode29.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import json
3 |
4 | def ensure_topic_exists(topic_name):
5 | response = requests.post(
6 | message_hub_creds["kafka_rest_url"] + "/admin/topics",
7 | data = json.dumps({"name": topic_name}),
8 | headers={"X-Auth-Token": message_hub_creds["api_key"]}
9 | )
10 | if response.status_code != 200 and response.status_code != 202 and \
11 | response.status_code != 422 and response.status_code != 403:
12 | raise Exception(response.json())
13 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode3.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | def ensure_dir(dir, delete_tree = False):
3 | if not os.path.exists(dir):
4 | os.makedirs(dir)
5 | elif delete_tree:
6 | shutil.rmtree(dir)
7 | os.makedirs(dir)
8 | return os.path.abspath(dir)
9 |
10 | root_dir = ensure_dir("output", delete_tree = True)
11 | output_dir = ensure_dir(os.path.join(root_dir, "raw"))
12 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode30.py:
--------------------------------------------------------------------------------
1 | [[RawTweetsListener]]
2 | def on_data(self, data):
3 | self.tweet_count += 1
4 | self.producer.send(
5 | self.topic,
6 | {key:transform(key,value) \
7 | for key,value in iteritems(json.loads(data)) \
8 | if key in fieldnames}
9 | )
10 | return True
11 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode31.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from watson_developer_cloud import NaturalLanguageUnderstandingV1
3 | from watson_developer_cloud.natural_language_understanding_v1 import Features, SentimentOptions, EntitiesOptions
4 |
5 | # init() function will be called once on pipeline initialization
6 | # @state a Python dictionary object for keeping state. The state object is passed to the process function
7 | def init(state):
8 | # do something once on pipeline initialization and save in the state object
9 | state["nlu"] = NaturalLanguageUnderstandingV1(
10 | version='2017-02-27',
11 | username='XXXX’,
12 | password='XXXX'
13 | )
14 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode32.py:
--------------------------------------------------------------------------------
1 | # @event a Python dictionary object representing the input event tuple as defined by the input schema
2 | # @state a Python dictionary object for keeping state over subsequent function calls
3 | # return must be a Python dictionary object. It will be the output of this operator.
4 | # Returning None results in not submitting an output tuple for this invocation.
5 | # You must declare all output attributes in the Edit Schema window.
6 | def process(event, state):
7 | # Enrich the event, such as by:
8 | # event['wordCount'] = len(event['phrase'].split())
9 | try:
10 | event['text'] = event['text'].replace('"', "'")
11 | response = state["nlu"].analyze(
12 | text = event['text'],
13 | features=Features(sentiment=SentimentOptions(), entities=EntitiesOptions())
14 | )
15 | event["sentiment"] = response["sentiment"]["document"]["label"]
16 | top_entity = response["entities"][0] if len(response["entities"]) > 0 else None
17 | event["entity"] = top_entity["text"] if top_entity is not None else ""
18 | event["entity_type"] = top_entity["type"] if top_entity is not None else ""
19 | except Exception as e:
20 | return None
21 | return event
22 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode33.py:
--------------------------------------------------------------------------------
1 | def start_streaming_dataframe():
2 | "Start a Spark Streaming DataFrame from a Kafka Input source"
3 | schema = StructType(
4 | [StructField(f["name"], f["type"], True) for f in field_metadata]
5 | )
6 | kafka_options = {
7 | "kafka.ssl.protocol":"TLSv1.2",
8 | "kafka.ssl.enabled.protocols":"TLSv1.2",
9 | "kafka.ssl.endpoint.identification.algorithm":"HTTPS",
10 | 'kafka.sasl.mechanism': 'PLAIN',
11 | 'kafka.security.protocol': 'SASL_SSL'
12 | }
13 | return spark.readStream \
14 | .format("kafka") \
15 | .option("kafka.bootstrap.servers", ",".join(message_hub_creds["kafka_brokers_sasl"])) \
16 | .option("subscribe", "enriched_tweets") \
17 | .load(**kafka_options)
18 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode34.json:
--------------------------------------------------------------------------------
1 | {
2 | "language": "python",
3 | "env": {
4 | "SCALA_HOME": "/Users/dtaieb/pixiedust/bin/scala/scala-2.11.8",
5 | "PYTHONPATH": "/Users/dtaieb/pixiedust/bin/spark/spark-2.3.0-bin-hadoop2.7/python/:/Users/dtaieb/pixiedust/bin/spark/spark-2.3.0-bin-hadoop2.7/python/lib/py4j-0.10.6-src.zip",
6 | "SPARK_HOME": "/Users/dtaieb/pixiedust/bin/spark/spark-2.3.0-bin-hadoop2.7",
7 | "PYSPARK_SUBMIT_ARGS": "--driver-java-options=-Djava.security.auth.login.config=/Users/dtaieb/pixiedust/jaas.conf --jars /Users/dtaieb/pixiedust/bin/cloudant-spark-v2.0.0-185.jar --driver-class-path /Users/dtaieb/pixiedust/data/libs/* --master local[10] --packages org.apache.spark:spark-sql-kafka-0-10_2.11:2.3.0 pyspark-shell",
8 | "PIXIEDUST_HOME": "/Users/dtaieb/pixiedust",
9 | "SPARK_DRIVER_MEMORY": "10G",
10 | "SPARK_LOCAL_IP": "127.0.0.1",
11 | "PYTHONSTARTUP": "/Users/dtaieb/pixiedust/bin/spark/spark-2.3.0-bin-hadoop2.7/python/pyspark/shell.py"
12 | },
13 | "display_name": "Python with Pixiedust (Spark 2.3)",
14 | "argv": [
15 | "python",
16 | "-m",
17 | "ipykernel",
18 | "-f",
19 | "{connection_file}"
20 | ]
21 | }
22 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode4.py:
--------------------------------------------------------------------------------
1 | from tweepy import Stream
2 | def start_stream(queries):
3 | "Asynchronously start a new Twitter stream"
4 | stream = Stream(auth, RawTweetsListener())
5 | stream.filter(track=queries, async=True)
6 | return stream
7 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode5.py:
--------------------------------------------------------------------------------
1 | schema = StructType(
2 | [StructField(f["name"], f["type"], True) for f in field_metadata]
3 | )
4 | csv_sdf = spark.readStream\
5 | .format("csv")\
6 | .option("schema", schema)\
7 | .option("multiline", True)\
8 | .option("dateFormat", 'EEE MMM dd kk:mm:ss Z y')\
9 | .option("ignoreTrailingWhiteSpace", True)\
10 | .option("ignoreLeadingWhiteSpace", True)\
11 | .load(output_dir)
12 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode6.py:
--------------------------------------------------------------------------------
1 | csv_sdf = spark.readStream \
2 | .csv(
3 | output_dir,
4 | schema=schema,
5 | multiLine = True,
6 | dateFormat = 'EEE MMM dd kk:mm:ss Z y',
7 | ignoreTrailingWhiteSpace = True,
8 | ignoreLeadingWhiteSpace = True
9 | )
10 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode7.py:
--------------------------------------------------------------------------------
1 | tweet_streaming_query = csv_sdf \
2 | .writeStream \
3 | .format("parquet") \
4 | .option("path", os.path.join(root_dir, "output_parquet")) \
5 | .trigger(processingTime="2 seconds") \
6 | .option("checkpointLocation", os.path.join(root_dir, "output_chkpt")) \
7 | .start()
8 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode8.py:
--------------------------------------------------------------------------------
1 | tweet_streaming_query = csv_sdf.writeStream\
2 | .outputMode("append")\
3 | .format("console")\
4 | .trigger(processingTime='2 seconds')\
5 | .start()
6 |
--------------------------------------------------------------------------------
/chapter 7/sampleCode9.py:
--------------------------------------------------------------------------------
1 | import json
2 | for query in spark.streams.active:
3 | print("-----------")
4 | print("id: {}".format(query.id))
5 | print(json.dumps(query.lastProgress, indent=2, sort_keys=True))
6 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode1.py:
--------------------------------------------------------------------------------
1 | import statsmodels
2 | np.lookfor("acf", module = statsmodels)
3 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode10.py:
--------------------------------------------------------------------------------
1 | import statsmodels.tsa.api as smt
2 | smt.graphics.plot_pacf(msft['Adj. Close'], lags=50)
3 | plt.show()
4 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode11.py:
--------------------------------------------------------------------------------
1 | @PixieApp
2 | class StockExplorer():
3 | @route()
4 | def main_screen(self):
5 | return """
6 |
14 |
15 |
16 |
17 |
18 |
21 |
22 |
28 |
29 |
30 |
31 |
32 | """
33 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode12.py:
--------------------------------------------------------------------------------
1 | [[StockExplorer]]
2 | def select_tickers(self, tickers):
3 | self.tickers = {ticker.strip():{} for ticker in tickers}
4 | self.set_active_ticker(tickers[0].strip())
5 |
6 | def set_active_ticker(self, ticker):
7 | self.active_ticker = ticker
8 | if 'df' not in self.tickers[ticker]:
9 | self.tickers[ticker]['df'] = quandl.get('WIKI/{}'.format(ticker))
10 | self.tickers[ticker]['df']['daily_spread'] = self.tickers[ticker]['df']['Adj. Close'] - self.tickers[ticker]['df']['Adj. Open']
11 | self.tickers[ticker]['df'] = self.tickers[ticker]['df'].reset_index()
12 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode13.py:
--------------------------------------------------------------------------------
1 | [[StockExplorer]]
2 | @route(explore="*")
3 | @templateArgs
4 | def stock_explore_screen(self):
5 | tabs = [("Explore","StockExploreSubApp"), ("Moving Average", "MovingAverageSubApp"),
6 | ("ACF and PACF", "AutoCorrelationSubApp")]
7 | return """
8 |
13 |
14 | Stock Explorer PixieApp
15 |
16 |
17 |
18 |
19 | {%for title, subapp in tabs%}
20 |
26 | {%endfor%}
27 |
28 |
29 |
30 |
31 | """
32 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode14.py:
--------------------------------------------------------------------------------
1 | @route(show_analytic="*")
2 | def show_analytic_screen(self, show_analytic):
3 | return """
4 |
5 | """
6 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode15.py:
--------------------------------------------------------------------------------
1 | [[BaseSubApp]]
2 | def add_ticker_selection_markup(refresh_ids):
3 | def deco(fn):
4 | def wrap(self, *args, **kwargs):
5 | return """
6 |
7 |
8 | {%for ticker, state in this.parent_pixieapp.tickers.items()%}
9 |
14 | {%endfor%}
15 |
16 |
17 | """ + fn(self, *args, **kwargs)
18 | return wrap
19 | return deco
20 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode16.py:
--------------------------------------------------------------------------------
1 | @PixieApp
2 | class StockExploreSubApp(BaseSubApp):
3 | @route()
4 | @BaseSubApp.add_ticker_selection_markup(['chart{{prefix}}', 'daily_spread{{prefix}}'])
5 | def main_screen(self):
6 | return """
7 |
8 |
9 |
10 |
11 |
12 |
13 | """
14 |
15 | @route(show_chart="*")
16 | def show_chart_screen(self, show_chart):
17 | return """
18 |
19 |
20 | {
21 | "handlerId": "lineChart",
22 | "valueFields": "{{show_chart}}",
23 | "rendererId": "bokeh",
24 | "keyFields": "Date",
25 | "noChartCache": "true",
26 | "rowCount": "10000"
27 | }
28 |
29 |
30 | """
31 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode17.py:
--------------------------------------------------------------------------------
1 | [[StockExplorer]]
2 | def get_active_df(self):
3 | return self.tickers[self.active_ticker]['df']
4 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode18.py:
--------------------------------------------------------------------------------
1 | tabs = [("Explore","StockExploreSubApp"), ("Moving Average", "MovingAverageSubApp"),("ACF and PACF", "AutoCorrelationSubApp")]
--------------------------------------------------------------------------------
/chapter 8/sampleCode19.py:
--------------------------------------------------------------------------------
1 | [[BaseSubApp]]
2 | @route(widget="lag_slider")
3 | def slider_screen(self):
4 | return """
5 |
6 |
7 |
9 |
11 |
12 |
13 |
33 | """
34 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode2.py:
--------------------------------------------------------------------------------
1 | ar = np.arange(20)
2 | print(ar)
3 | print(ar.reshape(4,5))
4 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode20.py:
--------------------------------------------------------------------------------
1 | @PixieApp
2 | class MovingAverageSubApp(BaseSubApp):
3 | @route()
4 | @BaseSubApp.add_ticker_selection_markup(['chart{{prefix}}'])
5 | def main_screen(self):
6 | return """
7 |
8 |
9 | Moving Average for {{this.parent_pixieapp.active_ticker}}
10 |
11 |
12 |
13 | {
14 | "valueFields": "Adj. Close",
15 | "keyFields": "x",
16 | "rendererId": "bokeh",
17 | "handlerId": "lineChart",
18 | "rowCount": "10000"
19 | }
20 |
21 |
22 |
23 |
24 |
25 |
29 |
30 |
31 |
32 | """
33 | def get_moving_average_df(self):
34 | ma = self.parent_pixieapp.get_active_df()['Adj. Close'].rolling(window=self.lag).mean()
35 | ma_df = pd.DataFrame(ma)
36 | ma_df["x"] = ma_df.index
37 | return ma_df
38 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode21.py:
--------------------------------------------------------------------------------
1 | import statsmodels.tsa.api as smt
2 | @PixieApp
3 | class AutoCorrelationSubApp(BaseSubApp):
4 | @route()
5 | @BaseSubApp.add_ticker_selection_markup(['chart_acf{{prefix}}', 'chart_pacf{{prefix}}'])
6 | def main_screen(self):
7 | return """
8 |
9 |
10 |
11 | Auto-correlation Function
12 |
13 |
14 |
15 |
16 |
17 |
18 | Partial Auto-correlation Function
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
31 |
32 |
33 |
34 | """
35 | @route(show_acf='*')
36 | @captureOutput
37 | def show_acf_screen(self):
38 | smt.graphics.plot_acf(self.parent_pixieapp.get_active_df()['Adj. Close'], lags=self.lag)
39 |
40 | @route(show_pacf='*')
41 | @captureOutput
42 | def show_pacf_screen(self):
43 | smt.graphics.plot_pacf(self.parent_pixieapp.get_active_df()['Adj. Close'], lags=self.lag)
44 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode22.py:
--------------------------------------------------------------------------------
1 | logmsft = np.log(train_set['Adj. Close'])
2 | logmsft.index = train_set['Date']
3 | logmsft_diff = pd.DataFrame(logmsft - logmsft.shift()).reset_index()
4 | logmsft_diff.dropna(inplace=True)
5 | display(logmsft_diff)
6 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode23.py:
--------------------------------------------------------------------------------
1 | from statsmodels.tsa.stattools import adfuller
2 | import pprint
3 |
4 | ad_fuller_results = adfuller(
5 | logmsft_diff['Adj. Close'], autolag = 'AIC', regression = 'c'
6 | )
7 | labels = ['Test Statistic','p-value','#Lags Used','Number of Observations Used']
8 | pp = pprint.PrettyPrinter(indent=4)
9 | pp.pprint({labels[i]: ad_fuller_results[i] for i in range(4)})
10 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode24.json:
--------------------------------------------------------------------------------
1 | {
2 | 'Number of lags used': 3,
3 | 'Number of Observations Used': 8057,
4 | 'Test statistic': -48.071592138591136,
5 | 'MacKinnon’s approximate p-value': 0.0
6 | }
7 |
8 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode25.py:
--------------------------------------------------------------------------------
1 | import statsmodels.tsa.api as smt
2 | smt.graphics.plot_acf(logmsft_diff['Adj. Close'], lags=100)
3 | plt.show()
4 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode26.py:
--------------------------------------------------------------------------------
1 | smt.graphics.plot_pacf(logmsft_diff['Adj. Close'], lags=100)
2 | plt.show()
3 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode27.py:
--------------------------------------------------------------------------------
1 | from statsmodels.tsa.arima_model import ARIMA
2 |
3 | import warnings
4 | with warnings.catch_warnings():
5 | warnings.simplefilter("ignore")
6 | arima_model_class = ARIMA(train_set['Adj. Close'], dates=train_set['Date'], order=(1,1,1))
7 | arima_model = arima_model_class.fit(disp=0)
8 |
9 | print(arima_model.resid.describe())
10 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode28.py:
--------------------------------------------------------------------------------
1 | def plot_predict(model, dates_series, num_observations):
2 | fig = plt.figure(figsize = (12,5))
3 | model.plot_predict(
4 | start = str(dates_series[len(dates_series)-num_observations]),
5 | end = str(dates_series[len(dates_series)-1])
6 | )
7 | plt.show()
8 |
9 | plot_predict(arima_model, train_set['Date'], 100)
10 | plot_predict(arima_model, train_set['Date'], 10)
11 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode29.py:
--------------------------------------------------------------------------------
1 | def compute_test_set_predictions(train_set, test_set):
2 | with warnings.catch_warnings():
3 | warnings.simplefilter("ignore")
4 | history = train_set['Adj. Close'].values
5 | forecast = np.array([])
6 | for t in range(len(test_set)):
7 | prediction = ARIMA(history, order=(1,1,0)).fit(disp=0).forecast()
8 | history = np.append(history, test_set['Adj. Close'].iloc[t])
9 | forecast = np.append(forecast, prediction[0])
10 | return pd.DataFrame(
11 | {"forecast": forecast,
12 | "test": test_set['Adj. Close'],
13 | "Date": pd.date_range(start=test_set['Date'].iloc[len(test_set)-1], periods = len(test_set))
14 | }
15 | )
16 |
17 | results = compute_test_set_predictions(train_set, test_set)
18 | display(results)
19 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode3.py:
--------------------------------------------------------------------------------
1 | sample = np.arange(10)
2 | print("Sample:", sample)
3 | print("Access by index: ", sample[2])
4 | print("First 5 elements: ", sample[:5])
5 | print("From 8 to the end: ", sample[8:])
6 | print("Last 3 elements: ", sample[-3:])
7 | print("Every 2 elements: ", sample[::2])
8 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode30.py:
--------------------------------------------------------------------------------
1 | from sklearn.metrics import mean_squared_error
2 | def compute_mean_squared_error(test_series, forecast_series):
3 | return mean_squared_error(test_series, forecast_series)
4 |
5 | print('Mean Squared Error: {}'.format(
6 | compute_mean_squared_error( test_set['Adj. Close'], results.forecast))
7 | )
8 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode31.py:
--------------------------------------------------------------------------------
1 | [[StockExplorer]]
2 | @route(explore="*")
3 | @templateArgs
4 | def stock_explore_screen(self):
5 | tabs = [("Explore","StockExploreSubApp"), ("Moving Average", "MovingAverageSubApp"),
6 | ("ACF and PACF", "AutoCorrelationSubApp"), ("Forecast with ARIMA", "ForecastArimaSubApp")]
7 | …
8 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode32.py:
--------------------------------------------------------------------------------
1 | from statsmodels.tsa.arima_model import ARIMA
2 |
3 | @PixieApp
4 | class ForecastArimaSubApp(BaseSubApp):
5 | def setup(self):
6 | self.entity_dataframe = self.parent_pixieapp.get_active_df().copy()
7 | self.differencing = False
8 |
9 | def set_active_ticker(self, ticker):
10 | BaseSubApp.set_active_ticker(self, ticker)
11 | self.setup()
12 |
13 | @route()
14 | @BaseSubApp.add_ticker_selection_markup([])
15 | def main_screen(self):
16 | return """
17 |
18 | 1. Data Exploration to test for Stationarity
19 |
22 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 | Auto-correlation Function
37 |
38 |
39 |
40 |
41 |
42 |
43 | Partial Auto-correlation Function
44 |
45 |
46 |
47 |
48 |
49 | """
50 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode33.py:
--------------------------------------------------------------------------------
1 | [[BaseSubApp]]
2 | def add_ticker_selection_markup(refresh_ids):
3 | def deco(fn):
4 | def wrap(self, *args, **kwargs):
5 | return """
6 |
7 |
8 | {%for ticker, state in this.parent_pixieapp.tickers.items()%}
9 |
14 | {%endfor%}
15 |
16 |
17 | """ + fn(self, *args, **kwargs)
18 | return wrap
19 | return deco
20 |
21 | def set_active_ticker(self, ticker):
22 | self.parent_pixieapp.set_active_ticker(ticker)
23 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode34.py:
--------------------------------------------------------------------------------
1 | [[ForecastArimaSubApp]]
2 | def set_active_ticker(self, ticker):
3 | BaseSubApp.set_active_ticker(self, ticker)
4 | self.setup()
5 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode35.py:
--------------------------------------------------------------------------------
1 | @route()
2 | @BaseSubApp.add_ticker_selection_markup([])
3 | def main_screen(self):
4 | return """
5 |
6 | 1. Data Exploration to test for Stationarity
7 |
10 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | Auto-correlation Function
25 |
26 |
27 |
28 |
29 |
30 |
31 | Partial Auto-correlation Function
32 |
33 |
34 |
35 |
36 |
37 | """
38 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode36.py:
--------------------------------------------------------------------------------
1 | def toggle_differencing(self):
2 | if self.differencing:
3 | self.entity_dataframe = self.parent_pixieapp.get_active_df().copy()
4 | self.differencing = False
5 | else:
6 | log_df = np.log(self.entity_dataframe['Adj. Close'])
7 | log_df.index = self.entity_dataframe['Date']
8 | self.entity_dataframe = pd.DataFrame(log_df - log_df.shift()).reset_index()
9 | self.entity_dataframe.dropna(inplace=True)
10 | self.differencing = True
11 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode37.py:
--------------------------------------------------------------------------------
1 | @route(show_acf='*')
2 | @captureOutput
3 | def show_acf_screen(self):
4 | smt.graphics.plot_acf(self.entity_dataframe['Adj. Close'], lags=50)
5 |
6 | @route(show_pacf='*')
7 | @captureOutput
8 | def show_pacf_screen(self):
9 | smt.graphics.plot_pacf(self.entity_dataframe['Adj. Close'], lags=50)
10 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode38.py:
--------------------------------------------------------------------------------
1 | @route(do_forecast="true")
2 | @BaseSubApp.add_ticker_selection_markup([])
3 | def do_forecast_screen(self):
4 | return """
5 |
6 | 2. Build Arima model
7 |
10 |
11 |
12 |
13 | Enter the p,d,q order for the ARIMA model you want to build
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
31 |
32 |
33 | """
34 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode39.py:
--------------------------------------------------------------------------------
1 | @route(plot_predict="true")
2 | @captureOutput
3 | def plot_predict(self):
4 | plot_predict(self.arima_model, self.train_set['Date'], 100)
5 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode4.json:
--------------------------------------------------------------------------------
1 | {
2 | "databases": [{
3 | "id": 231,
4 | "name": "Deutsche Bundesbank Data Repository",
5 | "database_code": "BUNDESBANK",
6 | "description": "Data on the German economy, …",
7 | "datasets_count": 49358,
8 | "downloads": 43209922,
9 | "premium": false,
10 | "image": "https://quandl--upload.s3.amazonaws/...thumb_bundesbank.png",
11 | "favorite": false,
12 | "url_name": "Deutsche-Bundesbank-Data-Repository"
13 | },…
14 | ],
15 | "meta": {
16 | "query": "",
17 | "per_page": 100,
18 | "current_page": 1,
19 | "prev_page": null,
20 | "total_pages": 3,
21 | "total_count": 274,
22 | "next_page": 2,
23 | "current_first_item": 1,
24 | "current_last_item": 100
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode40.py:
--------------------------------------------------------------------------------
1 | @route(p_order="*",d_order="*",q_order="*")
2 | def build_arima_model_screen(self, p_order, d_order, q_order):
3 | #Build the arima model
4 | self.train_set = self.parent_pixieapp.get_active_df()[:-14]
5 | self.test_set = self.parent_pixieapp.get_active_df()[-14:]
6 | self.arima_model = ARIMA(
7 | self.train_set['Adj. Close'], dates=self.train_set['Date'],
8 | order=(int(p_order),int(d_order),int(q_order))
9 | ).fit(disp=0)
10 | self.residuals = self.arima_model.resid.describe().to_frame().reset_index()
11 | return """
12 |
13 | ARIMA Model succesfully created
14 |
15 |
16 |
17 |
18 |
19 | Predicted values against the train set
20 |
21 |
22 |
23 |
24 |
25 | {
26 | "handlerId": "tableView",
27 | "table_noschema": "true",
28 | "table_nosearch": "true",
29 | "table_nocount": "true"
30 | }
31 |
32 |
33 | Residual errors statistics
34 |
35 | """
36 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode41.py:
--------------------------------------------------------------------------------
1 | def compute_test_set_predictions(self):
2 | return compute_test_set_predictions(self.train_set, self.test_set)
3 |
4 | @route(do_diagnose="true")
5 | @BaseSubApp.add_ticker_selection_markup([])
6 | def do_diagnose_screen(self):
7 | return """
8 | 3. Diagnose the model against the test set
9 |
10 |
11 |
12 | {
13 | "keyFields": "Date",
14 | "valueFields": "forecast,test",
15 | "handlerId": "lineChart",
16 | "rendererId": "bokeh",
17 | "noChartCache": "true"
18 | }
19 |
20 |
21 |
22 | """
23 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode5.py:
--------------------------------------------------------------------------------
1 | import requests
2 | databases = []
3 | page = 1
4 | while(page is not None):
5 | payload = requests.get("https://www.quandl.com/api/v3/databases?api_key={}&page={}"\
6 | .format(quandl.ApiConfig.api_key, page)).json()
7 | databases += payload['databases']
8 | page = payload['meta']['next_page']
9 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode6.py:
--------------------------------------------------------------------------------
1 | codes = pixiedust.sampleData( "https://www.quandl.com/api/v3/databases/WIKI/codes?api_key=" + quandl.ApiConfig.api_key)
2 | display(codes)
3 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode7.py:
--------------------------------------------------------------------------------
1 | msft = quandl.get('WIKI/MSFT')
2 | msft['daily_spread'] = msft['Adj. Close'].diff()
3 | msft = msft.reset_index()
4 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode8.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | tail = msft[msft['Date'] > '2016-05-16']
3 | investment = np.cumsum((10000 / tail['Adj. Close'].values[0]) * tail['daily_spread']) + 10000
4 | investment = investment.astype(int)
5 | investment.index = tail['Date']
6 | investment = investment.resample('M').mean()
7 | investment = pd.DataFrame(investment).reset_index()
8 | display(investment)
9 |
--------------------------------------------------------------------------------
/chapter 8/sampleCode9.py:
--------------------------------------------------------------------------------
1 | smt.graphics.plot_acf(np.cos(np.linspace(0, 1000, 100)), lags=50)
2 | plt.show()
3 |
--------------------------------------------------------------------------------
/chapter 9/USFlightsAnalysis/airlines.csv:
--------------------------------------------------------------------------------
1 | IATA_CODE,AIRLINE
2 | UA,United Air Lines Inc.
3 | AA,American Airlines Inc.
4 | US,US Airways Inc.
5 | F9,Frontier Airlines Inc.
6 | B6,JetBlue Airways
7 | OO,Skywest Airlines Inc.
8 | AS,Alaska Airlines Inc.
9 | NK,Spirit Air Lines
10 | WN,Southwest Airlines Co.
11 | DL,Delta Air Lines Inc.
12 | EV,Atlantic Southeast Airlines
13 | HA,Hawaiian Airlines Inc.
14 | MQ,American Eagle Airlines Inc.
15 | VX,Virgin America
16 |
--------------------------------------------------------------------------------
/chapter 9/USFlightsAnalysis/flights.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DTAIEB/Thoughtful-Data-Science/8b80e8f3e33b6fdc6672ecee1f27e0b983b28241/chapter 9/USFlightsAnalysis/flights.zip
--------------------------------------------------------------------------------
/chapter 9/sampleCode1.py:
--------------------------------------------------------------------------------
1 | G = nx.DiGraph()
2 | G.add_nodes_from(['A', 'B', 'C', 'D', 'E'])
3 | G.add_edge('A', 'B')
4 | G.add_edge('B', 'B')
5 | G.add_edges_from([('A', 'E'),('A', 'D'),('B', 'C'),('C', 'E'),('D', 'C')])
6 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode10.py:
--------------------------------------------------------------------------------
1 | import matplotlib.cm as cm
2 | fig = plt.figure(figsize = (12,12))
3 | nx.draw(flight_graph, arrows=True, with_labels=True, width = 0.5,style="dotted",
4 | node_color=range(len(flight_graph)), cmap=cm.get_cmap(name="cool"),
5 | edge_color=range(len(flight_graph.edges)), edge_cmap=cm.get_cmap(name="spring"),
6 | pos = nx.random_layout(flight_graph)
7 | )
8 | plt.show()
9 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode11.py:
--------------------------------------------------------------------------------
1 | degree_df = pd.DataFrame([{"IATA_CODE":k, "DEGREE":v} for k,v in flight_graph.degree], columns=["IATA_CODE", "DEGREE"])
2 | airports_centrality = pd.merge(airports, degree_df, on='IATA_CODE')
3 | airports_centrality
4 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode12.py:
--------------------------------------------------------------------------------
1 | from six import iteritems
2 | def compute_centrality(g, centrality_df, compute_fn, col_name, *args, **kwargs):
3 | # create a temporary DataFrame that contains the computed centrality values
4 | temp_df = pd.DataFrame(
5 | [{"IATA_CODE":k, col_name:v} for k,v in iteritems(compute_fn(g, *args, **kwargs))],
6 | columns=["IATA_CODE", col_name]
7 | )
8 | # make sure to remove the col_name from the centrality_df is already there
9 | if col_name in centrality_df.columns:
10 | centrality_df.drop([col_name], axis=1, inplace=True)
11 | # merge the 2 DataFrame on the IATA_CODE column
12 | centrality_df = pd.merge(centrality_df, temp_df, on='IATA_CODE')
13 | return centrality_df
14 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode13.py:
--------------------------------------------------------------------------------
1 | airports_centrality = compute_centrality(flight_graph, airports_centrality, nx.pagerank, "PAGE_RANK")
2 | airports_centrality = compute_centrality(flight_graph, airports_centrality, nx.closeness_centrality, "CLOSENESS")
3 | airports_centrality = compute_centrality(
4 | flight_graph, airports_centrality, nx.betweenness_centrality, "BETWEENNESS", k=len(flight_graph))
5 | airports_centrality
6 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode14.py:
--------------------------------------------------------------------------------
1 | for col_name in ["DEGREE", "PAGE_RANK", "CLOSENESS", "BETWEENNESS"]:
2 | print("{} : {}".format(
3 | col_name,
4 | airports_centrality.nlargest(10, col_name)["IATA_CODE"].values)
5 | )
6 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode15.py:
--------------------------------------------------------------------------------
1 | import matplotlib.cm as cm
2 | def visualize_neighbors(parent_node):
3 | fig = plt.figure(figsize = (12,12))
4 | # Create a subgraph and add an edge from the parent node to all its neighbors
5 | graph = nx.DiGraph()
6 | for neighbor in flight_graph.neighbors(parent_node):
7 | graph.add_edge(parent_node, neighbor)
8 | # draw the subgraph
9 | nx.draw(graph, arrows=True, with_labels=True, width = 0.5,style="dotted",
10 | node_color=range(len(graph)), cmap=cm.get_cmap(name="cool"),
11 | edge_color=range(len(graph.edges)), edge_cmap=cm.get_cmap(name="spring"),
12 | )
13 | plt.show()
14 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode16.py:
--------------------------------------------------------------------------------
1 | # use a cache so we don't recompute the weight for the same airport every time
2 | cache = {}
3 | def compute_weight(centrality_indice_col):
4 | # wrapper function that conform to the dijkstra weight argument
5 | def wrapper(source, target, attribute):
6 | # try the cache first and compute the weight if not there
7 | source_weight = cache.get(source, None)
8 | if source_weight is None:
9 | # look up the airports_centrality for the value
10 | source_weight = airports_centrality.loc[airports_centrality["IATA_CODE"] == source][centrality_indice_col].values[0]
11 | cache[source] = source_weight
12 | target_weight = cache.get(target, None)
13 | if target_weight is None:
14 | target_weight = airports_centrality.loc[airports_centrality["IATA_CODE"] == target][centrality_indice_col].values[0]
15 | cache[target] = target_weight
16 | # Return weight is inversely proportional to the computed weighted since
17 | # the Dijkstra algorithm give precedence to shorter distances
18 | return float(1/source_weight) + float(1/target_weight)
19 | return wrapper
20 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode17.py:
--------------------------------------------------------------------------------
1 | for col_name in ["DEGREE", "PAGE_RANK", "CLOSENESS"]:
2 | #clear the cache
3 | cache.clear()
4 | print("{} : {}".format(
5 | col_name,
6 | nx.dijkstra_path(flight_graph, "BOS", "PSC", weight=compute_weight(col_name))
7 | ))
8 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode18.py:
--------------------------------------------------------------------------------
1 | [[USFlightsAnalysis]]
2 | from pixiedust.display.app import *
3 | from pixiedust.apps.mapboxBase import MapboxBase
4 | from collections import OrderedDict
5 |
6 | @PixieApp
7 | class USFlightsAnalysis(MapboxBase):
8 | …
9 | @route()
10 | def main_screen(self):
11 | return """
12 |
20 |
21 |
22 |
23 |
24 | Select origin airport:
25 |
26 |
27 |
33 |
34 |
35 |
36 |
37 |
38 | Select destination airport:
39 |
40 |
41 |
47 |
48 |
50 |
51 |
52 |
53 |
54 |
55 |
66 |
67 | """
68 |
69 | def get_airports(self):
70 | return [tuple(l) for l in airports_centrality[["IATA_CODE", "AIRPORT"]].values.tolist()]
71 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode19.py:
--------------------------------------------------------------------------------
1 | [[USFlightsAnalysis]]
2 | @route(visualize_graph="*")
3 | @captureOutput
4 | def visualize_graph_screen(self, visualize_graph):
5 | visualize_neighbors(visualize_graph, (5,5))
6 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode2.py:
--------------------------------------------------------------------------------
1 | %matplotlib inline
2 | import matplotlib.pyplot as plt
3 | nx.draw(G_complete, with_labels=True)
4 | plt.show()
5 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode20.html:
--------------------------------------------------------------------------------
1 |
11 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode21.py:
--------------------------------------------------------------------------------
1 | [[USFlightsAnalysis]]
2 | @route(org_airport="*", dest_airport="*")
3 | def compute_path_screen(self, org_airport, dest_airport):
4 | return """
5 |
6 |
7 | Centrality Indices
8 | {% for centrality in this.centrality_indices.keys() %}
9 |
10 |
12 |
13 |
14 | {%endfor%}
15 |
16 |
17 | Select a centrality index to show the shortest flight path
18 |
19 |
20 |
21 | {
22 | "keyFields": "LATITUDE,LONGITUDE",
23 | "valueFields": "AIRPORT,DEGREE,PAGE_RANK,ELAPSED_TIME,CLOSENESS",
24 | "custombasecolorsecondary": "#fffb00",
25 | "colorrampname": "Light to Dark Red",
26 | "handlerId": "mapView",
27 | "quantiles": "0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0",
28 | "kind": "choropleth",
29 | "rowCount": "1000",
30 | "numbins": "5",
31 | "mapboxtoken": "pk.eyJ1IjoibWFwYm94IiwiYSI6ImNpejY4M29iazA2Z2gycXA4N2pmbDZmangifQ.-g_vE53SD2WrJ6tFX7QHmA",
32 | "custombasecolor": "#ffffff"
33 | }
34 |
35 |
36 |
37 |
38 | """
39 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode22.py:
--------------------------------------------------------------------------------
1 | [[USFlightsAnalysis]]
2 | def setup(self):
3 | self.centrality_indices = OrderedDict([
4 | ("ELAPSED_TIME","rgba(256,0,0,0.65)"),
5 | ("DEGREE", "rgba(0,256,0,0.65)"),
6 | ("PAGE_RANK", "rgba(0,0,256,0.65)"),
7 | ("CLOSENESS", "rgba(128,0,128,0.65)")
8 | ])
9 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode23.json:
--------------------------------------------------------------------------------
1 | {
2 | "geometry": {
3 | "type": "LineString",
4 | "coordinates": [
5 | [-93.21692, 44.88055],
6 | [-119.11903000000001, 46.26468]
7 | ]
8 | },
9 | "type": "Feature",
10 | "properties": {}
11 | }
12 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode24.py:
--------------------------------------------------------------------------------
1 | [[USFlightsAnalysis]]
2 | def compute_toggle_centrality_layer(self, org_airport, dest_airport, centrality):
3 | cache.clear()
4 | cities = nx.dijkstra_path(flight_graph, org_airport, dest_airport, weight=compute_weight(centrality))
5 | layer_index = self.get_layer_index(centrality, {
6 | "name": centrality,
7 | "geojson": {
8 | "type": "FeatureCollection",
9 | "features":[
10 | {"type":"Feature",
11 | "properties":{"route":"{} to {}".format(cities[i], cities[i+1])},
12 | "geometry":{
13 | "type":"LineString",
14 | "coordinates":[
15 | self.get_airport_location(cities[i]),
16 | self.get_airport_location(cities[i+1])
17 | ]
18 | }
19 | } for i in range(len(cities) - 1)
20 | ]
21 | },
22 | "paint":{
23 | "line-width": 8,
24 | "line-color": self.centrality_indices[centrality]
25 | }
26 | })
27 | self.toggleLayer(layer_index)
28 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode25.py:
--------------------------------------------------------------------------------
1 | [[USFlightsAnalysis]]
2 | def get_airport_location(self, airport_code):
3 | row = airports_centrality.loc[airports["IATA_CODE"] == airport_code]
4 | if row is not None:
5 | return [row["LONGITUDE"].values[0], row["LATITUDE"].values[0]]
6 | return None
7 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode26.py:
--------------------------------------------------------------------------------
1 | def compute_delay_airline_df(airline, org_airport=None):
2 | # create a mask for selecting the data
3 | mask = (flights["AIRLINE"] == airline)
4 | if org_airport is not None:
5 | # Add the org_airport to the mask
6 | mask = mask & (flights["ORIGIN_AIRPORT"] == org_airport)
7 | # Apply the mask to the Pandas dataframe
8 | df = flights[mask]
9 | # Convert the YEAR, MONTH and DAY column into a DateTime
10 | df["DATE"] = pd.to_datetime(flights[['YEAR','MONTH', 'DAY']])
11 | # Select only the columns that we need
12 | return df[["DATE", "ARRIVAL_DELAY"]]
13 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode27.py:
--------------------------------------------------------------------------------
1 | from pixiedust.apps.template import TemplateTabbedApp
2 |
3 | @PixieApp
4 | class RouteAnalysisApp(TemplateTabbedApp):
5 | def setup(self):
6 | self.apps = [
7 | {"title": "Search Shortest Route", "app_class": "SearchShortestRouteApp"},
8 | {"title": "Explore Airlines", "app_class": "AirlinesApp"}
9 | ]
10 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode28.py:
--------------------------------------------------------------------------------
1 | [[USFlightsAnalysis]]
2 | @route(org_airport="*", dest_airport="*")
3 | def analyze_route(self, org_airport, dest_airport):
4 | return """
5 |
8 |
9 | """
10 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode29.py:
--------------------------------------------------------------------------------
1 | [[SearchShortestRouteApp]]
2 | from pixiedust.display.app import *
3 | from pixiedust.apps.mapboxBase import MapboxBase
4 | from collections import OrderedDict
5 |
6 | @PixieApp
7 | class SearchShortestRouteApp(MapboxBase):
8 | def setup(self):
9 | self.org_airport = self.parent_pixieapp.options.get("org_airport")
10 | self.dest_airport = self.parent_pixieapp.options.get("dest_airport")
11 | self.centrality_indices = OrderedDict([
12 | ("ELAPSED_TIME","rgba(256,0,0,0.65)"),
13 | ("DEGREE", "rgba(0,256,0,0.65)"),
14 | ("PAGE_RANK", "rgba(0,0,256,0.65)"),
15 | ("CLOSENESS", "rgba(128,0,128,0.65)")
16 | ])
17 | …
18 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode3.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import datetime
3 | import numpy as np
4 |
5 | # clean up the flights data in flights.csv
6 | flights = pd.read_csv('flights.raw.csv', low_memory=False)
7 |
8 | # select only the rows that have a 3 letter IATA code in the ORIGIN and DESTINATION airports
9 | mask = (flights["ORIGIN_AIRPORT"].str.len() == 3) & (flights["DESTINATION_AIRPORT"].str.len() == 3)
10 | flights = flights[ mask ]
11 |
12 | # remove the unwanted columns
13 | dropped_columns=["SCHEDULED_DEPARTURE","SCHEDULED_TIME",
14 | "CANCELLATION_REASON","DIVERTED","DIVERTED","TAIL_NUMBER","TAXI_OUT",
15 | "WHEELS_OFF","WHEELS_ON",
16 | "TAXI_IN","SCHEDULED_ARRIVAL", "ARRIVAL_TIME", "AIR_SYSTEM_DELAY","SECURITY_DELAY",
17 | "AIRLINE_DELAY","LATE_AIRCRAFT_DELAY", "WEATHER_DELAY"]
18 | flights.drop(dropped_columns, axis=1, inplace=True)
19 |
20 | # remove the row that have NA in the ELAPSED_TIME column
21 | flights.dropna(subset=["ELAPSED_TIME"], inplace=True)
22 |
23 | # remove the row that have NA in the DEPARTURE_TIME column
24 | flights.dropna(subset=["ELAPSED_TIME"], inplace=True)
25 |
26 | # Create a new DEPARTURE_TIME columns that has the actual datetime
27 | def to_datetime(row):
28 | departure_time = str(int(row["DEPARTURE_TIME"])).zfill(4)
29 | hour = int(departure_time[0:2])
30 | return datetime.datetime(year=row["YEAR"], month=row["MONTH"], day=row["DAY"],
31 | hour = 0 if hour >= 24 else hour,
32 | minute=int(departure_time[2:4])
33 | )
34 | flights["DEPARTURE_TIME"] = flights.apply(to_datetime, axis=1)
35 |
36 | # write the data back to file without the index
37 | flights.to_csv('flights.csv', index=False)
38 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode30.py:
--------------------------------------------------------------------------------
1 | [[AirlinesApp]]
2 | @PixieApp
3 | class AirlinesApp():
4 | def setup(self):
5 | self.org_airport = self.parent_pixieapp.options.get("org_airport")
6 | self.dest_airport = self.parent_pixieapp.options.get("dest_airport")
7 | self.airlines = flights[flights["ORIGIN_AIRPORT"] == self.org_airport].groupby("AIRLINE").size().index.values.tolist()
8 | self.airlines = [(a, airlines.loc[airlines["IATA_CODE"] == a]["AIRLINE"].values[0]) for a in self.airlines]
9 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode31.py:
--------------------------------------------------------------------------------
1 | [[AirlinesApp]]
2 | @route()
3 | def main_screen(self):
4 | return """
5 |
6 | {%for airline_code, airline_name in this.airlines%}
7 |
8 | {{airline_name}}
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 | {%endfor%}
17 |
18 | """
19 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode32.py:
--------------------------------------------------------------------------------
1 | [[AirlinesApp]]
2 | @route(delay_org_airport="*",airline_code="*", airline_name="*")
3 | @templateArgs
4 | def delay_airline_screen(self, delay_org_airport, airline_code, airline_name):
5 | mask = (flights["AIRLINE"] == airline_code)
6 | if delay_org_airport == "true":
7 | mask = mask & (flights["ORIGIN_AIRPORT"] == self.org_airport)
8 | average_delay = round(flights[mask]["ARRIVAL_DELAY"].mean(), 2)
9 | return """
10 | {%if delay_org_airport == "true" %}
11 | Delay chart for all flights out of {{this.org_airport}}
12 | {%else%}
13 | Delay chart for all flights
14 | {%endif%}
15 | Average delay: {{average_delay}} minutes
16 |
17 |
18 | {
19 | "keyFields": "DATE",
20 | "handlerId": "lineChart",
21 | "valueFields": "ARRIVAL_DELAY",
22 | "noChartCache": "true"
23 | }
24 |
25 |
26 | """
27 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode33.py:
--------------------------------------------------------------------------------
1 | [[AirlinesApp]]
2 | def compute_delay_airline_df(self, airline, delay_org_airport):
3 | mask = (flights["AIRLINE"] == airline)
4 | if delay_org_airport == "true":
5 | mask = mask & (flights["ORIGIN_AIRPORT"] == self.org_airport)
6 | df = flights[mask]
7 | df["DATE"] = pd.to_datetime(flights[['YEAR','MONTH', 'DAY']])
8 | return df[["DATE", "ARRIVAL_DELAY"]]
9 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode34.py:
--------------------------------------------------------------------------------
1 | import statsmodels.tsa.api as smt
2 | smt.graphics.plot_acf(df['ARRIVAL_DELAY'], lags=100)
3 | plt.show()
4 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode35.py:
--------------------------------------------------------------------------------
1 | import statsmodels.tsa.api as smt
2 | smt.graphics.plot_pacf(df['ARRIVAL_DELAY'], lags=50)
3 | plt.show()
4 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode36.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | train_set, test_set = df[:-14], df[-14:]
3 | train_set.index = train_set["DEPARTURE_TIME"]
4 | test_set.index = test_set["DEPARTURE_TIME"]
5 | logdf = np.log(train_set['ARRIVAL_DELAY'])
6 | logdf.index = train_set['DEPARTURE_TIME']
7 | logdf_diff = pd.DataFrame(logdf - logdf.shift()).reset_index()
8 | logdf_diff.replace([np.inf, -np.inf], np.nan, inplace=True)
9 | logdf_diff.dropna(inplace=True)
10 | display(logdf_diff)
11 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode37.py:
--------------------------------------------------------------------------------
1 | smt.graphics.plot_acf(logdf_diff["ARRIVAL_DELAY"], lags=100)
2 | plt.show()
3 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode38.py:
--------------------------------------------------------------------------------
1 | smt.graphics.plot_pacf(logdf_diff["ARRIVAL_DELAY"], lags=100)
2 | plt.show()
3 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode39.py:
--------------------------------------------------------------------------------
1 | from statsmodels.tsa.arima_model import ARIMA
2 |
3 | import warnings
4 | with warnings.catch_warnings():
5 | warnings.simplefilter("ignore")
6 | arima_model_class = ARIMA(train_set['ARRIVAL_DELAY'], dates=train_set['DEPARTURE_TIME'], order=(1,1,1))
7 | arima_model = arima_model_class.fit(disp=0)
8 | print(arima_model.resid.describe())
9 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode4.py:
--------------------------------------------------------------------------------
1 | airports = pixiedust.sampleData("https://github.com/DTAIEB/Thoughtful-Data-Science/raw/master/chapter%209/USFlightsAnalysis/airports.csv")
2 | airlines = pixiedust.sampleData("https://github.com/DTAIEB/Thoughtful-Data-Science/raw/master/chapter%209/USFlightsAnalysis/airlines.csv")
3 | flights = pixiedust.sampleData("https://github.com/DTAIEB/Thoughtful-Data-Science/raw/master/chapter%209/USFlightsAnalysis/flights.zip")
4 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode40.py:
--------------------------------------------------------------------------------
1 | def plot_predict(model, dates_series, num_observations):
2 | fig,ax = plt.subplots(figsize = (12,8))
3 | model.plot_predict(
4 | start = dates_series[len(dates_series)-num_observations],
5 | end = dates_series[len(dates_series)-1],
6 | ax = ax
7 | )
8 | plt.show()
9 | plot_predict(arima_model, train_set['DEPARTURE_TIME'], 100)
10 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode41.py:
--------------------------------------------------------------------------------
1 | def compute_test_set_predictions(train_set, test_set):
2 | with warnings.catch_warnings():
3 | warnings.simplefilter("ignore")
4 | history = train_set['ARRIVAL_DELAY'].values
5 | forecast = np.array([])
6 | for t in range(len(test_set)):
7 | prediction = ARIMA(history, order=(1,1,0)).fit(disp=0).forecast()
8 | history = np.append(history, test_set['ARRIVAL_DELAY'].iloc[t])
9 | forecast = np.append(forecast, prediction[0])
10 | return pd.DataFrame(
11 | {"forecast": forecast,
12 | "test": test_set['ARRIVAL_DELAY'],
13 | "Date": pd.date_range(start=test_set['DEPARTURE_TIME'].iloc[len(test_set)-1], periods = len(test_set))
14 | }
15 | )
16 |
17 | results = compute_test_set_predictions(train_set, test_set)
18 | display(results)
19 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode42.py:
--------------------------------------------------------------------------------
1 | [[PredictDelayApp]]
2 | @route()
3 | def main_screen(self):
4 | return """
5 |
6 |
7 |
8 |
9 | Select a flight segment:
10 |
11 |
12 |
18 |
19 |
20 |
21 |
22 | Select an airline:
23 |
24 |
25 |
31 |
32 |
33 |
34 |
35 |
36 |
38 |
39 |
40 |
41 |
42 | """
43 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode43.py:
--------------------------------------------------------------------------------
1 | [[PredictDelayApp]]
2 | @route(flight_segment="*", airline="*")
3 | @captureOutput
4 | def predict_screen(self, flight_segment, airline):
5 | if flight_segment is None or flight_segment == "":
6 | return "Please select a flight segment"
7 | airport = flight_segment.split(":")[1]
8 | mask = (flights["DESTINATION_AIRPORT"] == airport)
9 | if airline is not None and airline != "":
10 | mask = mask & (flights["AIRLINE"] == airline)
11 | df = flights[mask]
12 | df.index = df["DEPARTURE_TIME"]
13 | df = df.tail(50000)
14 | df = df[~df.index.duplicated(keep='first')]
15 | with warnings.catch_warnings():
16 | warnings.simplefilter("ignore")
17 | arima_model_class = ARIMA(df["ARRIVAL_DELAY"], dates=df['DEPARTURE_TIME'], order=(1,1,1))
18 | arima_model = arima_model_class.fit(disp=0)
19 | fig, ax = plt.subplots(figsize = (12,8))
20 | num_observations = 100
21 | date_series = df["DEPARTURE_TIME"]
22 | arima_model.plot_predict(
23 | start = str(date_series[len(date_series)-num_observations]),
24 | end = str(date_series[len(date_series)-1]),
25 | ax = ax
26 | )
27 | plt.show()
28 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode44.py:
--------------------------------------------------------------------------------
1 | from pixiedust.apps.template import TemplateTabbedApp
2 |
3 | @PixieApp
4 | class RouteAnalysisApp(TemplateTabbedApp):
5 | def setup(self):
6 | self.apps = [
7 | {"title": "Search Shortest Route", "app_class": "SearchShortestRouteApp"},
8 | {"title": "Explore Airlines", "app_class": "AirlinesApp"},
9 | {"title": "Flight Delay Prediction", "app_class": "PredictDelayApp"}
10 | ]
11 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode5.py:
--------------------------------------------------------------------------------
1 | edges = flights.groupby(["ORIGIN_AIRPORT","DESTINATION_AIRPORT"]) [["ELAPSED_TIME"]].mean()
2 | edges
3 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode6.py:
--------------------------------------------------------------------------------
1 | edges = edges.reset_index()
2 | edges
3 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode7.py:
--------------------------------------------------------------------------------
1 | flight_graph = nx.from_pandas_edgelist(
2 | flights, "ORIGIN_AIRPORT","DESTINATION_AIRPORT", "ELAPSED_TIME",
3 | create_using = nx.DiGraph() )
4 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode8.py:
--------------------------------------------------------------------------------
1 | print("Nodes: {}".format(flight_graph.nodes))
2 | print("Edges: {}".format(flight_graph.edges))
3 |
--------------------------------------------------------------------------------
/chapter 9/sampleCode9.py:
--------------------------------------------------------------------------------
1 | import matplotlib.cm as cm
2 | fig = plt.figure(figsize = (12,12))
3 | nx.draw(flight_graph, arrows=True, with_labels=True, width = 0.5,style="dotted",
4 | node_color=range(len(flight_graph)), cmap=cm.get_cmap(name="cool"),
5 | edge_color=range(len(flight_graph.edges)), edge_cmap=cm.get_cmap(name="spring")
6 | )
7 | plt.show()
8 |
--------------------------------------------------------------------------------