├── .gitignore
├── LICENSE
├── chapter 2
    ├── sampleCode1.py
    ├── sampleCode2.py
    ├── sampleCode3.py
    ├── sampleCode4.py
    ├── sampleCode5.py
    ├── sampleCode6.py
    ├── sampleCode7.py
    ├── sampleCode8.py
    └── sampleCode9.py
├── chapter 3
    ├── GitHub Tracking Application
    │   ├── GitHub Sample Application - Part 1.ipynb
    │   ├── GitHub Sample Application - Part 2.ipynb
    │   ├── GitHub Sample Application - Part 3.ipynb
    │   └── GitHub Sample Application - Part 4.ipynb
    ├── sampleCode1.py
    ├── sampleCode10.py
    ├── sampleCode11.py
    ├── sampleCode12.py
    ├── sampleCode13.py
    ├── sampleCode14.py
    ├── sampleCode15.py
    ├── sampleCode16.py
    ├── sampleCode17.py
    ├── sampleCode18.py
    ├── sampleCode2.py
    ├── sampleCode3.py
    ├── sampleCode4.py
    ├── sampleCode5.py
    ├── sampleCode6.py
    ├── sampleCode7.py
    ├── sampleCode8.py
    └── sampleCode9.py
├── chapter 4
    ├── sampleCode1.py
    ├── sampleCode2.py
    └── sampleCode3.html
├── chapter 5
    ├── sampleCode1.py
    ├── sampleCode10.py
    ├── sampleCode11.html
    ├── sampleCode12.html
    ├── sampleCode13.html
    ├── sampleCode14.py
    ├── sampleCode15.py
    ├── sampleCode16.py
    ├── sampleCode17.py
    ├── sampleCode18.py
    ├── sampleCode19.py
    ├── sampleCode2.py
    ├── sampleCode20.py
    ├── sampleCode21.py
    ├── sampleCode22.py
    ├── sampleCode23.py
    ├── sampleCode24.py
    ├── sampleCode25.py
    ├── sampleCode26.py
    ├── sampleCode27.py
    ├── sampleCode28.py
    ├── sampleCode29.html
    ├── sampleCode3.py
    ├── sampleCode30.html
    ├── sampleCode31.html
    ├── sampleCode32.html
    ├── sampleCode33.html
    ├── sampleCode34.html
    ├── sampleCode35.html
    ├── sampleCode36.html
    ├── sampleCode37.html
    ├── sampleCode38.html
    ├── sampleCode39.py
    ├── sampleCode4.py
    ├── sampleCode5.py
    ├── sampleCode6.py
    ├── sampleCode7.json
    ├── sampleCode8.py
    └── sampleCode9.py
├── chapter 6
    ├── TensorFlow classification.ipynb
    ├── Tensorflow VR Part 1.ipynb
    ├── Tensorflow VR Part 2.ipynb
    ├── Tensorflow VR Part 3.ipynb
    ├── Tensorflow VR Part 4.ipynb
    ├── Visual Recognition
    │   └── mobilenet_v1_0.50_224
    │   │   ├── frozen_graph.pb
    │   │   ├── labels.txt
    │   │   └── quantized_graph.pb
    ├── sampleCode1.py
    ├── sampleCode10.py
    ├── sampleCode11.py
    ├── sampleCode12.py
    ├── sampleCode13.py
    ├── sampleCode14.py
    ├── sampleCode15.py
    ├── sampleCode16.py
    ├── sampleCode17.py
    ├── sampleCode18.py
    ├── sampleCode19.py
    ├── sampleCode2.py
    ├── sampleCode20.py
    ├── sampleCode21.py
    ├── sampleCode22.py
    ├── sampleCode23.py
    ├── sampleCode24.py
    ├── sampleCode25.py
    ├── sampleCode26.py
    ├── sampleCode27.py
    ├── sampleCode28.py
    ├── sampleCode29.py
    ├── sampleCode3.py
    ├── sampleCode30.py
    ├── sampleCode31.py
    ├── sampleCode32.py
    ├── sampleCode4.py
    ├── sampleCode5.py
    ├── sampleCode6.py
    ├── sampleCode7.py
    ├── sampleCode8.py
    └── sampleCode9.py
├── chapter 7
    ├── Twitter Sentiment Analysis - Part 1.ipynb
    ├── Twitter Sentiment Analysis - Part 2.ipynb
    ├── Twitter Sentiment Analysis - Part 3.ipynb
    ├── Twitter Sentiment Analysis - Part 4.ipynb
    ├── sampleCode1.py
    ├── sampleCode10.py
    ├── sampleCode11.py
    ├── sampleCode12.py
    ├── sampleCode13.py
    ├── sampleCode14.py
    ├── sampleCode15.py
    ├── sampleCode16.py
    ├── sampleCode17.py
    ├── sampleCode18.py
    ├── sampleCode19.py
    ├── sampleCode2.py
    ├── sampleCode20.py
    ├── sampleCode21.py
    ├── sampleCode22.py
    ├── sampleCode23.py
    ├── sampleCode24.py
    ├── sampleCode25.py
    ├── sampleCode26.py
    ├── sampleCode27.py
    ├── sampleCode28.py
    ├── sampleCode29.py
    ├── sampleCode3.py
    ├── sampleCode30.py
    ├── sampleCode31.py
    ├── sampleCode32.py
    ├── sampleCode33.py
    ├── sampleCode34.json
    ├── sampleCode4.py
    ├── sampleCode5.py
    ├── sampleCode6.py
    ├── sampleCode7.py
    ├── sampleCode8.py
    └── sampleCode9.py
├── chapter 8
    ├── StockExplorer - Part 1.ipynb
    ├── StockExplorer - Part 2.ipynb
    ├── sampleCode1.py
    ├── sampleCode10.py
    ├── sampleCode11.py
    ├── sampleCode12.py
    ├── sampleCode13.py
    ├── sampleCode14.py
    ├── sampleCode15.py
    ├── sampleCode16.py
    ├── sampleCode17.py
    ├── sampleCode18.py
    ├── sampleCode19.py
    ├── sampleCode2.py
    ├── sampleCode20.py
    ├── sampleCode21.py
    ├── sampleCode22.py
    ├── sampleCode23.py
    ├── sampleCode24.json
    ├── sampleCode25.py
    ├── sampleCode26.py
    ├── sampleCode27.py
    ├── sampleCode28.py
    ├── sampleCode29.py
    ├── sampleCode3.py
    ├── sampleCode30.py
    ├── sampleCode31.py
    ├── sampleCode32.py
    ├── sampleCode33.py
    ├── sampleCode34.py
    ├── sampleCode35.py
    ├── sampleCode36.py
    ├── sampleCode37.py
    ├── sampleCode38.py
    ├── sampleCode39.py
    ├── sampleCode4.json
    ├── sampleCode40.py
    ├── sampleCode41.py
    ├── sampleCode5.py
    ├── sampleCode6.py
    ├── sampleCode7.py
    ├── sampleCode8.py
    └── sampleCode9.py
└── chapter 9
    ├── USFlightsAnalysis
        ├── US Flight data analysis - Part 1.ipynb
        ├── US Flight data analysis - Part 2.ipynb
        ├── US Flight data analysis - Part 3.ipynb
        ├── US Flight data analysis - Part 4.ipynb
        ├── airlines.csv
        ├── airports.csv
        └── flights.zip
    ├── sampleCode1.py
    ├── sampleCode10.py
    ├── sampleCode11.py
    ├── sampleCode12.py
    ├── sampleCode13.py
    ├── sampleCode14.py
    ├── sampleCode15.py
    ├── sampleCode16.py
    ├── sampleCode17.py
    ├── sampleCode18.py
    ├── sampleCode19.py
    ├── sampleCode2.py
    ├── sampleCode20.html
    ├── sampleCode21.py
    ├── sampleCode22.py
    ├── sampleCode23.json
    ├── sampleCode24.py
    ├── sampleCode25.py
    ├── sampleCode26.py
    ├── sampleCode27.py
    ├── sampleCode28.py
    ├── sampleCode29.py
    ├── sampleCode3.py
    ├── sampleCode30.py
    ├── sampleCode31.py
    ├── sampleCode32.py
    ├── sampleCode33.py
    ├── sampleCode34.py
    ├── sampleCode35.py
    ├── sampleCode36.py
    ├── sampleCode37.py
    ├── sampleCode38.py
    ├── sampleCode39.py
    ├── sampleCode4.py
    ├── sampleCode40.py
    ├── sampleCode41.py
    ├── sampleCode42.py
    ├── sampleCode43.py
    ├── sampleCode44.py
    ├── sampleCode5.py
    ├── sampleCode6.py
    ├── sampleCode7.py
    ├── sampleCode8.py
    └── sampleCode9.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/chapter 2/sampleCode1.py:
--------------------------------------------------------------------------------
1 | import pandas
2 | data_url = "https://data.cityofnewyork.us/api/views/e98g-f8hy/rows.csv?accessType=DOWNLOAD"
3 | building_df = pandas.read_csv(data_url)
4 | 


--------------------------------------------------------------------------------
/chapter 2/sampleCode2.py:
--------------------------------------------------------------------------------
 1 | #Spark CSV Loading
 2 | from pyspark.sql import SparkSession
 3 | try:
 4 |     from urllib import urlretrieve
 5 | except ImportError:
 6 |     #urlretrieve package has been refactored in Python 3 
 7 |     from urllib.request import urlretrieve
 8 | 
 9 | data_url = "https://data.cityofnewyork.us/api/views/e98g-f8hy/rows.csv?accessType=DOWNLOAD"
10 | urlretrieve (data_url, "building.csv")
11 | 
12 | spark = SparkSession.builder.getOrCreate()
13 | building_df = spark.read\
14 |   .format('org.apache.spark.sql.execution.datasources.csv.CSVFileFormat')\
15 |   .load("building.csv")
16 | 


--------------------------------------------------------------------------------
/chapter 2/sampleCode3.py:
--------------------------------------------------------------------------------
1 | import pixiedust
2 | cars = pixiedust.sampleData(1)
3 | 


--------------------------------------------------------------------------------
/chapter 2/sampleCode4.py:
--------------------------------------------------------------------------------
1 | import pixiedust
2 | data_url = "https://data.cityofnewyork.us/api/views/e98g-f8hy/rows.csv?accessType=DOWNLOAD"
3 | building_dataframe = pixiedust.sampleData(data_url, forcePandas=True)
4 | 


--------------------------------------------------------------------------------
/chapter 2/sampleCode5.py:
--------------------------------------------------------------------------------
1 | import pixiedust
2 | london_info = pixiedust.sampleData("https://files.datapress.com/london/dataset/london-borough-profiles/2015-09-24T15:50:01/London-borough-profiles.zip")
3 | display(london_info)
4 | 


--------------------------------------------------------------------------------
/chapter 2/sampleCode6.py:
--------------------------------------------------------------------------------
1 | import pixiedust
2 | data_url = "https://server/path"
3 | pixiedust.wrangleData(data_url)
4 | 


--------------------------------------------------------------------------------
/chapter 2/sampleCode7.py:
--------------------------------------------------------------------------------
1 | import pixiedust
2 | cars = pixiedust.sampleData(1, forcePandas=True) #car performance data
3 | display(cars)
4 | 


--------------------------------------------------------------------------------
/chapter 2/sampleCode8.py:
--------------------------------------------------------------------------------
1 | import pixiedust
2 | homes = pixiedust.sampleData(6, forcePandas=True) #Million dollar home sales in NE Mass 
3 | display(homes)
4 | 


--------------------------------------------------------------------------------
/chapter 2/sampleCode9.py:
--------------------------------------------------------------------------------
 1 | #import the pixieapp decorators
 2 | from pixiedust.display.app import *
 3 | 
 4 | #Load the cars dataframe into the Notebook
 5 | cars = pixiedust.sampleData(1)
 6 | 
 7 | @PixieApp   #decorator for making the class a PixieApp
 8 | class HelloWorldApp():
 9 |     @route()  #decorator for making a method a route (no arguments means default route)
10 |     def main_screen(self):
11 |         return """
12 |         <button type="submit" pd_options="show_chart=true" pd_target="chart">Show Chart</button> 
13 |         <!--Placeholder div to display the chart-->
14 |         <div id="chart"></div>
15 |         """
16 |     
17 |     @route(show_chart="true")
18 |     def chart(self):
19 |         #Return a div bound to the cars dataframe using the pd_entity attribute
20 |         #pd_entity can refer a class variable or a global variable scoped to the notebook
21 |         return """
22 |         <div pd_render_onload pd_entity="cars">
23 |             <pd_options>
24 |                 {
25 |                   "title": "Average Mileage by Horsepower",
26 |                   "aggregation": "AVG",
27 |                   "clusterby": "origin",
28 |                   "handlerId": "barChart",
29 |                   "valueFields": "mpg",
30 |                   "rendererId": "bokeh",
31 |                   "keyFields": "horsepower"
32 |                 }
33 |             </pd_options>
34 |         </div>
35 |         """
36 | 
37 | #Instantiate the application and run it
38 | app = HelloWorldApp()
39 | app.run()
40 | 


--------------------------------------------------------------------------------
/chapter 3/GitHub Tracking Application/GitHub Sample Application - Part 1.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# GitHub Tracking Application Part 1"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {
 14 |     "pixiedust": {
 15 |      "displayParams": {}
 16 |     }
 17 |    },
 18 |    "outputs": [
 19 |     {
 20 |      "data": {
 21 |       "text/html": [
 22 |        "<style type=\"text/css\">.pd_warning{display:none;}</style><div class=\"pd_warning\"><em>Hey, there's something awesome here! To see it, open this notebook outside GitHub, in a viewer like Jupyter</em></div>"
 23 |       ],
 24 |       "text/plain": [
 25 |        "<IPython.core.display.HTML object>"
 26 |       ]
 27 |      },
 28 |      "metadata": {},
 29 |      "output_type": "display_data"
 30 |     }
 31 |    ],
 32 |    "source": [
 33 |     "from pixiedust.display.app import *\n",
 34 |     "import requests\n",
 35 |     "import pandas\n",
 36 |     "\n",
 37 |     "@PixieApp\n",
 38 |     "class GitHubTracking():\n",
 39 |     "    @route()\n",
 40 |     "    def main_screen(self):\n",
 41 |     "        return \"\"\"\n",
 42 |     "<style>\n",
 43 |     "    div.outer-wrapper {\n",
 44 |     "        display: table;width:100%;height:300px;\n",
 45 |     "    }\n",
 46 |     "    div.inner-wrapper {\n",
 47 |     "        display: table-cell;vertical-align: middle;height: 100%;width: 100%;\n",
 48 |     "    }\n",
 49 |     "</style>\n",
 50 |     "<div class=\"outer-wrapper\">\n",
 51 |     "    <div class=\"inner-wrapper\">\n",
 52 |     "        <div class=\"col-sm-3\"></div>\n",
 53 |     "        <div class=\"input-group col-sm-6\">\n",
 54 |     "          <input id=\"query{{prefix}}\" type=\"text\" class=\"form-control\" placeholder=\"Search projects on GitHub\">\n",
 55 |     "          <span class=\"input-group-btn\">\n",
 56 |     "            <button class=\"btn btn-default\" type=\"button\" pd_options=\"query=$val(query{{prefix}})\">Submit Query</button>\n",
 57 |     "          </span>\n",
 58 |     "        </div>\n",
 59 |     "    </div>\n",
 60 |     "</div>   \n",
 61 |     "\"\"\"\n",
 62 |     "    @route(query=\"*\")\n",
 63 |     "    @templateArgs\n",
 64 |     "    def do_search(self, query):\n",
 65 |     "        self.first_url = \"https://api.github.com/search/repositories?q={}\".format(query)\n",
 66 |     "        self.prev_url = None\n",
 67 |     "        self.next_url = None\n",
 68 |     "        self.last_url = None\n",
 69 |     "        \n",
 70 |     "        response = requests.get(self.first_url)\n",
 71 |     "        if not response.ok:\n",
 72 |     "            return \"<div>An Error occurred: {{response.text}}</div>\"\n",
 73 |     "\n",
 74 |     "        total_count = response.json()['total_count']\n",
 75 |     "        self.next_url = response.links.get('next', {}).get('url', None)\n",
 76 |     "        self.last_url = response.links.get('last', {}).get('url', None)\n",
 77 |     "        return \"\"\"\n",
 78 |     "<h1><center>{{total_count}} repositories were found</center></h1>\n",
 79 |     "<ul class=\"pagination\">\n",
 80 |     "  <li><a href=\"#\" pd_options=\"page=first_url\" pd_target=\"body{{prefix}}\">First</a></li>\n",
 81 |     "  <li><a href=\"#\" pd_options=\"page=prev_url\" pd_target=\"body{{prefix}}\">Prev</a></li>\n",
 82 |     "  <li><a href=\"#\" pd_options=\"page=next_url\" pd_target=\"body{{prefix}}\">Next</a></li>\n",
 83 |     "  <li><a href=\"#\" pd_options=\"page=last_url\" pd_target=\"body{{prefix}}\">Last</a></li>\n",
 84 |     "</ul>\n",
 85 |     "<table class=\"table\">\n",
 86 |     "    <thead>\n",
 87 |     "        <tr>\n",
 88 |     "            <th>Repo Name</th>\n",
 89 |     "            <th>Lastname</th>\n",
 90 |     "            <th>URL</th>\n",
 91 |     "            <th>Stars</th>\n",
 92 |     "        </tr>\n",
 93 |     "    </thead>\n",
 94 |     "    <tbody id=\"body{{prefix}}\">\n",
 95 |     "        {{this.invoke_route(this.do_retrieve_page, page='first_url')}}\n",
 96 |     "    </tbody>\n",
 97 |     "</table>\n",
 98 |     "\"\"\"\n",
 99 |     "    @route(page=\"*\")\n",
100 |     "    @templateArgs\n",
101 |     "    def do_retrieve_page(self, page):\n",
102 |     "        url = getattr(self, page)\n",
103 |     "        if url is None:\n",
104 |     "            return \"<div>No more rows</div>\"\n",
105 |     "        response = requests.get(url)\n",
106 |     "        self.prev_url = response.links.get('prev', {}).get('url', None)\n",
107 |     "        self.next_url = response.links.get('next', {}).get('url', None)\n",
108 |     "        items = response.json()['items']\n",
109 |     "        return \"\"\"\n",
110 |     "{%for row in items%}\n",
111 |     "<tr>\n",
112 |     "    <td>{{row['name']}}</td>\n",
113 |     "    <td>{{row.get('owner',{}).get('login', 'N/A')}}</td>\n",
114 |     "    <td><a href=\"{{row['html_url']}}\" target=\"_blank\">{{row['html_url']}}</a></td>\n",
115 |     "    <td>{{row['stargazers_count']}}</td>\n",
116 |     "</tr>\n",
117 |     "{%endfor%}\n",
118 |     "        \"\"\"\n",
119 |     "\n",
120 |     "app = GitHubTracking()\n",
121 |     "app.run()"
122 |    ]
123 |   }
124 |  ],
125 |  "metadata": {
126 |   "kernelspec": {
127 |    "display_name": "Python 3",
128 |    "language": "python",
129 |    "name": "python3"
130 |   },
131 |   "language_info": {
132 |    "codemirror_mode": {
133 |     "name": "ipython",
134 |     "version": 3
135 |    },
136 |    "file_extension": ".py",
137 |    "mimetype": "text/x-python",
138 |    "name": "python",
139 |    "nbconvert_exporter": "python",
140 |    "pygments_lexer": "ipython3",
141 |    "version": "3.5.4"
142 |   }
143 |  },
144 |  "nbformat": 4,
145 |  "nbformat_minor": 2
146 | }
147 | 


--------------------------------------------------------------------------------
/chapter 3/GitHub Tracking Application/GitHub Sample Application - Part 4.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# GitHub Tracking Application Part 4\n",
  8 |     "Include a Repo Analysis Page  \n",
  9 |     "Include a checkbox for switching between line chart and statistical summary  \n",
 10 |     "Make the checkbox responsible by directly updating the statistics table  "
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "# Setup the GitHub Credentials"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": null,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "github_user = \"dtaieb\"\n",
 27 |     "github_token = \"XXXXX\""
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "## Create the loader function for the commit activity GitHub API"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {
 41 |     "pixiedust": {
 42 |      "displayParams": {
 43 |       "aggregation": "SUM",
 44 |       "handlerId": "lineChart",
 45 |       "keyFields": "week",
 46 |       "rendererId": "bokeh",
 47 |       "rowCount": "500",
 48 |       "timeseries": "false",
 49 |       "valueFields": "total"
 50 |      }
 51 |     }
 52 |    },
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "from datetime import datetime\n",
 56 |     "import requests\n",
 57 |     "import pixiedust\n",
 58 |     "import pandas\n",
 59 |     "def load_commit_activity(owner, repo_name):\n",
 60 |     "    response = requests.get(\n",
 61 |     "        \"https://api.github.com/repos/{}/{}/stats/commit_activity\".format(owner, repo_name),\n",
 62 |     "        auth=(github_user, github_token)\n",
 63 |     "    ).json()\n",
 64 |     "    pdf = pandas.DataFrame([\n",
 65 |     "        {\"total\": item[\"total\"], \"week\":datetime.fromtimestamp(item[\"week\"])} for item in response\n",
 66 |     "    ])\n",
 67 |     "    \n",
 68 |     "    return {\n",
 69 |     "        \"pdf\":pdf,\n",
 70 |     "        \"chart_options\": {\n",
 71 |     "          \"handlerId\": \"lineChart\",\n",
 72 |     "          \"keyFields\": \"week\",\n",
 73 |     "          \"valueFields\": \"total\",\n",
 74 |     "          \"aggregation\": \"SUM\",\n",
 75 |     "          \"rendererId\": \"bokeh\"\n",
 76 |     "        }\n",
 77 |     "    }"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "## Use display to get the chart options JSON payload"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {
 91 |     "pixiedust": {
 92 |      "displayParams": {
 93 |       "aggregation": "SUM",
 94 |       "chartsize": "100",
 95 |       "handlerId": "lineChart",
 96 |       "keyFields": "week",
 97 |       "logx": "false",
 98 |       "logy": "false",
 99 |       "rendererId": "bokeh",
100 |       "rowCount": "500",
101 |       "valueFields": "total"
102 |      }
103 |     }
104 |    },
105 |    "outputs": [],
106 |    "source": [
107 |     "display(load_commit_activity(\"ibm-watson-data-lab\",\"pixiedust\")[\"pdf\"])"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {},
113 |    "source": [
114 |     "## Create the array that controls the type of data being analyzed"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": null,
120 |    "metadata": {},
121 |    "outputs": [],
122 |    "source": [
123 |     "analyses = [(\"Commit Activity\", load_commit_activity)]"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "markdown",
128 |    "metadata": {},
129 |    "source": [
130 |     "## Implement the RepoAnalysis PixieApp that is responsible for visualizing the data"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": null,
136 |    "metadata": {
137 |     "pixiedust": {
138 |      "displayParams": {}
139 |     }
140 |    },
141 |    "outputs": [],
142 |    "source": [
143 |     "from pixiedust.display.app import *\n",
144 |     "import requests\n",
145 |     "import pandas\n",
146 |     "\n",
147 |     "class RepoAnalysis():\n",
148 |     "    def setup(self):\n",
149 |     "        self.show_stats = False\n",
150 |     "        self.analyse_type = None\n",
151 |     "\n",
152 |     "    @route(analyse_repo_owner=\"*\", analyse_repo_name=\"*\")\n",
153 |     "    @templateArgs\n",
154 |     "    def do_analyse_repo(self, analyse_repo_owner, analyse_repo_name):\n",
155 |     "        self._analyse_repo_owner = analyse_repo_owner\n",
156 |     "        self._analyse_repo_name = analyse_repo_name\n",
157 |     "        return \"\"\"\n",
158 |     "<div class=\"container-fluid\">\n",
159 |     "    <div class=\"col-sm-2\">\n",
160 |     "        <div class=\"dropdown center-block\">\n",
161 |     "          <button class=\"btn btn-primary dropdown-toggle\" type=\"button\" data-toggle=\"dropdown\">\n",
162 |     "              Select Repo Data Set\n",
163 |     "              <span class=\"caret\"></span>\n",
164 |     "          </button>\n",
165 |     "          <ul class=\"dropdown-menu\" style=\"list-style:none;margin:0px;padding:0px\">\n",
166 |     "              {%for analysis,_ in this.analyses%}\n",
167 |     "                <li>\n",
168 |     "                    <a href=\"#\" pd_script=\"self.analyse_type='{{analysis}}'\" pd_refresh=\"analyse_vis{{prefix}}\"\n",
169 |     "                        style=\"text-decoration: none;background-color:transparent\">\n",
170 |     "                        {{analysis}}\n",
171 |     "                    </a>\n",
172 |     "                </li>\n",
173 |     "              {%endfor%}\n",
174 |     "          </ul>\n",
175 |     "        </div>\n",
176 |     "        <div class=\"checkbox\">\n",
177 |     "          <label>\n",
178 |     "              <input type=\"checkbox\"\n",
179 |     "                     id=\"show_stats{{prefix}}\"\n",
180 |     "                     pd_script=\"self.show_stats='$val(show_stats{{prefix}})'=='true'\"\n",
181 |     "                     pd_refresh=\"analyse_vis{{prefix}}\">\n",
182 |     "                     Show Statistics\n",
183 |     "          </label>\n",
184 |     "        </div>\n",
185 |     "    </div>\n",
186 |     "    <div id=\"analyse_vis{{prefix}}\" class=\"col-sm-10\" \n",
187 |     "        pd_options=\"display_analysis=true\" pd_target=\"analyse_vis{{prefix}}\"></div>\n",
188 |     "</div>\n",
189 |     "\"\"\"\n",
190 |     "    def get_pdf(self):\n",
191 |     "        if self.show_stats:\n",
192 |     "            summary = self.pdf.describe()\n",
193 |     "            summary.insert(0, \"Stat\", summary.index)\n",
194 |     "            return summary\n",
195 |     "        return self.pdf\n",
196 |     "    \n",
197 |     "    @route(display_analysis=\"*\")\n",
198 |     "    @templateArgs\n",
199 |     "    def do_analyse_type(self):\n",
200 |     "        fn = [analysis_fn for a_type,analysis_fn in analyses if a_type == self.analyse_type]\n",
201 |     "        if len(fn) == 0:\n",
202 |     "            return \"No loader function found for {{analyse_type}}\"\n",
203 |     "        vis_info = fn[0](self._analyse_repo_owner, self._analyse_repo_name)\n",
204 |     "        self.pdf = vis_info[\"pdf\"]\n",
205 |     "        chart_options = {\"handlerId\":\"dataframe\"} if self.show_stats else vis_info[\"chart_options\"]\n",
206 |     "        return \"\"\"\n",
207 |     "        <div pd_entity=\"get_pdf()\" pd_render_onload>\n",
208 |     "            <pd_options>{{chart_options | tojson}}</pd_options>\n",
209 |     "        </div>\n",
210 |     "        \"\"\""
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "markdown",
215 |    "metadata": {},
216 |    "source": [
217 |     "## GitHubTracking PixieApp class\n",
218 |     "Main PixieApp class that inherits from RepoAnalysis"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": null,
224 |    "metadata": {
225 |     "pixiedust": {
226 |      "displayParams": {},
227 |      "pixieapp": {
228 |       "query": "pixiedust"
229 |      }
230 |     }
231 |    },
232 |    "outputs": [],
233 |    "source": [
234 |     "@PixieApp\n",
235 |     "class GitHubTracking(RepoAnalysis):\n",
236 |     "    \"\"\"\n",
237 |     "    GitHub Tracking Sample Application\n",
238 |     "    \"\"\"\n",
239 |     "    @route()\n",
240 |     "    def main_screen(self):\n",
241 |     "        return \"\"\"\n",
242 |     "<style>\n",
243 |     "    div.outer-wrapper {\n",
244 |     "        display: table;width:100%;height:300px;\n",
245 |     "    }\n",
246 |     "    div.inner-wrapper {\n",
247 |     "        display: table-cell;vertical-align: middle;height: 100%;width: 100%;\n",
248 |     "    }\n",
249 |     "</style>\n",
250 |     "<div class=\"outer-wrapper\">\n",
251 |     "    <div class=\"inner-wrapper\">\n",
252 |     "        <div class=\"col-sm-3\"></div>\n",
253 |     "        <div class=\"input-group col-sm-6\">\n",
254 |     "          <input id=\"query{{prefix}}\" type=\"text\" class=\"form-control\" placeholder=\"Search projects on GitHub\">\n",
255 |     "          <span class=\"input-group-btn\">\n",
256 |     "            <button class=\"btn btn-default\" type=\"button\" pd_options=\"query=$val(query{{prefix}})\">Submit Query</button>\n",
257 |     "          </span>\n",
258 |     "        </div>\n",
259 |     "    </div>\n",
260 |     "</div>   \n",
261 |     "\"\"\"\n",
262 |     "    @route(query=\"*\", persist_args='true')\n",
263 |     "    @templateArgs\n",
264 |     "    def do_search(self, query):\n",
265 |     "        self.first_url = \"https://api.github.com/search/repositories?q={}\".format(query)\n",
266 |     "        self.prev_url = None\n",
267 |     "        self.next_url = None\n",
268 |     "        self.last_url = None\n",
269 |     "        \n",
270 |     "        response = requests.get(self.first_url)\n",
271 |     "        if not response.ok:\n",
272 |     "            return \"<div>An Error occurred: {{response.text}}</div>\"\n",
273 |     "\n",
274 |     "        total_count = response.json()['total_count']\n",
275 |     "        self.next_url = response.links.get('next', {}).get('url', None)\n",
276 |     "        self.last_url = response.links.get('last', {}).get('url', None)\n",
277 |     "        return \"\"\"\n",
278 |     "<h1><center>{{total_count}} repositories were found</center></h1>\n",
279 |     "<ul class=\"pagination\">\n",
280 |     "  <li><a href=\"#\" pd_options=\"page=first_url\" pd_target=\"body{{prefix}}\">First</a></li>\n",
281 |     "  <li><a href=\"#\" pd_options=\"page=prev_url\" pd_target=\"body{{prefix}}\">Prev</a></li>\n",
282 |     "  <li><a href=\"#\" pd_options=\"page=next_url\" pd_target=\"body{{prefix}}\">Next</a></li>\n",
283 |     "  <li><a href=\"#\" pd_options=\"page=last_url\" pd_target=\"body{{prefix}}\">Last</a></li>\n",
284 |     "</ul>\n",
285 |     "<table class=\"table\">\n",
286 |     "    <thead>\n",
287 |     "        <tr>\n",
288 |     "            <th>Repo Name</th>\n",
289 |     "            <th>Lastname</th>\n",
290 |     "            <th>URL</th>\n",
291 |     "            <th>Stars</th>\n",
292 |     "            <th>Actions</th>\n",
293 |     "        </tr>\n",
294 |     "    </thead>\n",
295 |     "    <tbody id=\"body{{prefix}}\">\n",
296 |     "        {{this.invoke_route(this.do_retrieve_page, page='first_url')}}\n",
297 |     "    </tbody>\n",
298 |     "</table>\n",
299 |     "\"\"\"\n",
300 |     "    @route(page=\"*\")\n",
301 |     "    @templateArgs\n",
302 |     "    def do_retrieve_page(self, page):\n",
303 |     "        url = getattr(self, page)\n",
304 |     "        if url is None:\n",
305 |     "            return \"<div>No more rows</div>\"\n",
306 |     "        response = requests.get(url)\n",
307 |     "        self.prev_url = response.links.get('prev', {}).get('url', None)\n",
308 |     "        self.next_url = response.links.get('next', {}).get('url', None)\n",
309 |     "        items = response.json()['items']\n",
310 |     "        return \"\"\"\n",
311 |     "{%for row in items%}\n",
312 |     "<tr>\n",
313 |     "    <td>{{row['name']}}</td>\n",
314 |     "    <td>{{row.get('owner',{}).get('login', 'N/A')}}</td>\n",
315 |     "    <td><a href=\"{{row['html_url']}}\" target=\"_blank\">{{row['html_url']}}</a></td>\n",
316 |     "    <td>{{row['stargazers_count']}}</td>\n",
317 |     "    <td>\n",
318 |     "        <button pd_options=\"analyse_repo_owner={{row[\"owner\"][\"login\"]}};analyse_repo_name={{row['name']}}\" \n",
319 |     "            class=\"btn btn-default btn-sm\" title=\"Analyze Repo\">\n",
320 |     "            <i class=\"fa fa-line-chart\"></i>\n",
321 |     "        </button>\n",
322 |     "    </td>\n",
323 |     "</tr>\n",
324 |     "{%endfor%}\n",
325 |     "        \"\"\"\n",
326 |     "\n",
327 |     "app = GitHubTracking()\n",
328 |     "app.run()"
329 |    ]
330 |   }
331 |  ],
332 |  "metadata": {
333 |   "celltoolbar": "Edit Metadata",
334 |   "kernelspec": {
335 |    "display_name": "Python 3",
336 |    "language": "python",
337 |    "name": "python3"
338 |   },
339 |   "language_info": {
340 |    "codemirror_mode": {
341 |     "name": "ipython",
342 |     "version": 3
343 |    },
344 |    "file_extension": ".py",
345 |    "mimetype": "text/x-python",
346 |    "name": "python",
347 |    "nbconvert_exporter": "python",
348 |    "pygments_lexer": "ipython3",
349 |    "version": "3.5.4"
350 |   }
351 |  },
352 |  "nbformat": 4,
353 |  "nbformat_minor": 2
354 | }
355 | 


--------------------------------------------------------------------------------
/chapter 3/sampleCode1.py:
--------------------------------------------------------------------------------
 1 | #import the pixieapp decorators
 2 | from pixiedust.display.app import *
 3 | 
 4 | @PixieApp   #decorator for making the class a PixieApp
 5 | class HelloWorldApp():
 6 |     @route()  #decorator for making a method a route (no arguments means default route)
 7 |     def main_screen(self):
 8 |         return """<div>Hello World</div>"""
 9 | 
10 | #Instantiate the application and run it
11 | app = HelloWorldApp()
12 | app.run()
13 | 


--------------------------------------------------------------------------------
/chapter 3/sampleCode10.py:
--------------------------------------------------------------------------------
 1 | from pixiedust.display.app import *
 2 | @PixieApp
 3 | class TestEntity():
 4 |     @route()
 5 |     def main_screen(self):
 6 |         return """
 7 |         <h1><center>Simple PixieApp with dynamically computed dataframe</center></h1>
 8 |         <div pd_entity="compute_pdf('prefix')" pd_options="handlerId=dataframe" pd_render_onload></div>
 9 |         """
10 | test = TestEntity()
11 | test.run()
12 | 


--------------------------------------------------------------------------------
/chapter 3/sampleCode11.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | import requests
 3 | import pandas
 4 | def load_commit_activity(owner, repo_name):
 5 |     response = requests.get(
 6 |         "https://api.github.com/repos/{}/{}/stats/commit_activity".format(owner, repo_name),
 7 |         auth=(github_user, github_token)
 8 |     ).json()
 9 |     pdf = pandas.DataFrame([
10 |         {"total": item["total"], "week":datetime.fromtimestamp(item["week"])} for item in response
11 |     ])
12 |     
13 |     return {
14 |         "pdf":pdf,
15 |         "chart_options": {
16 |           "handlerId": "lineChart",
17 |           "keyFields": "week",
18 |           "valueFields": "total",
19 |           "aggregation": "SUM",
20 |           "rendererId": "bokeh"
21 |         }
22 |     }
23 | 


--------------------------------------------------------------------------------
/chapter 3/sampleCode12.py:
--------------------------------------------------------------------------------
 1 | [[RepoAnalysis]]
 2 | @route(analyse_type="*")
 3 | @templateArgs
 4 | def do_analyse_type(self, analyse_type):
 5 |     fn = [analysis_fn for a_type,analysis_fn in analyses if a_type == analyse_type]
 6 |     if len(fn) == 0:
 7 |         return "No loader function found for {{analyse_type}}"
 8 |     vis_info = fn[0](self._analyse_repo_owner, self._analyse_repo_name)
 9 |     self.pdf = vis_info["pdf"]
10 |     return """
11 |     <div pd_entity="pdf" pd_render_onload>
12 |         <pd_options>{{vis_info["chart_options"] | tojson}}</pd_options>
13 |     </div>
14 |     """
15 | 


--------------------------------------------------------------------------------
/chapter 3/sampleCode13.py:
--------------------------------------------------------------------------------
 1 | @PixieApp
 2 | class GitHubTracking(RepoAnalysis):
 3 |     @route()
 4 |     def main_screen(self):
 5 |         <<Code omitted here>>
 6 | 
 7 |     @route(query="*")
 8 |     @templateArgs
 9 |     def do_search(self, query):
10 |         <<Code omitted here>>
11 | 
12 |     @route(page="*")
13 |     @templateArgs
14 |     def do_retrieve_page(self, page):
15 |         <<Code omitted here>>
16 | 
17 | app = GitHubTracking()
18 | app.run()
19 | 


--------------------------------------------------------------------------------
/chapter 3/sampleCode14.py:
--------------------------------------------------------------------------------
 1 | from pixiedust.display.app import *
 2 | 
 3 | def call_me():
 4 |     print("Hello from call_me")
 5 | 
 6 | @PixieApp
 7 | class Test():
 8 |     @route()
 9 |     def main_screen(self):
10 |         return """
11 |         <button type="submit" pd_script="call_me()" pd_target="target{{prefix}}">Click me</button>
12 |         
13 |         <div id="target{{prefix}}"></div>
14 |         """
15 | Test().run()
16 | 


--------------------------------------------------------------------------------
/chapter 3/sampleCode15.py:
--------------------------------------------------------------------------------
 1 | @PixieApp
 2 | class Test():
 3 |     @route()
 4 |     def main_screen(self):
 5 |         return """
 6 |         <button type="submit" 
 7 | pd_script="call_me()" 
 8 | pd_target="target{{prefix}}">
 9 |             <pd_script>
10 | self.name="some value"
11 | print("This is a multi-line pd_script")
12 |             </pd_script>
13 |             Click me
14 |         </button>
15 |         
16 |         <div id="target{{prefix}}"></div>
17 |         """
18 | Test().run()
19 | 


--------------------------------------------------------------------------------
/chapter 3/sampleCode16.py:
--------------------------------------------------------------------------------
 1 | [[RepoAnalysis]]
 2 | @route(analyse_type="*")
 3 | @templateArgs
 4 | def do_analyse_type(self, analyse_type):
 5 |     fn = [analysis_fn for a_type,analysis_fn in analyses if a_type == analyse_type]
 6 |     if len(fn) == 0:
 7 |         return "No loader function found for {{analyse_type}}"
 8 |     vis_info = fn[0](self._analyse_repo_owner, self._analyse_repo_name)
 9 |     self.pdf = vis_info["pdf"]
10 |     chart_options = {"handlerId":"dataframe"} if self.show_stats else vis_info["chart_options"]
11 |     return """
12 |     <div pd_entity="get_pdf()" pd_render_onload>
13 |         <pd_options>{{chart_options | tojson}}</pd_options>
14 |     </div>
15 |     """
16 | 


--------------------------------------------------------------------------------
/chapter 3/sampleCode17.py:
--------------------------------------------------------------------------------
1 | def get_pdf(self):
2 |     if self.show_stats:
3 |         summary = self.pdf.describe()
4 |         summary.insert(0, "Stat", summary.index)
5 |         return summary
6 |     return self.pdf
7 | 


--------------------------------------------------------------------------------
/chapter 3/sampleCode18.py:
--------------------------------------------------------------------------------
 1 | from pixiedust.display.app import *
 2 | 
 3 | @PixieApp
 4 | class WidgetApp():
 5 |     @route(widget="my_widget")
 6 |     def widget_main_screen(self):
 7 |         return "<div>Hello World Widget</div>"
 8 |     
 9 | @PixieApp
10 | class ConsumerApp(WidgetApp):
11 |     @route()
12 |     def main_screen(self):
13 |         return """<div pd_widget="my_widget"></div>"""
14 |     
15 | ConsumerApp().run()
16 | 


--------------------------------------------------------------------------------
/chapter 3/sampleCode2.py:
--------------------------------------------------------------------------------
1 | @route(state1="*", state2="*")
2 | def my_method(self, state1, state2):
3 |     return "<div>State1 is {{state1}}. State2 is {{state2}}</div>"
4 | 


--------------------------------------------------------------------------------
/chapter 3/sampleCode3.py:
--------------------------------------------------------------------------------
1 | @route()
2 | @templateArgs
3 | def main_screen(self):
4 |     var1 = self.compute_something()
5 |     var2 = self.compute_something_else()
6 |     return "<div>var1 is {{var1}}. var2 is {{var2}}</div>"
7 | 


--------------------------------------------------------------------------------
/chapter 3/sampleCode4.py:
--------------------------------------------------------------------------------
 1 | from pixiedust.display.app import *
 2 | 
 3 | @PixieApp
 4 | class GitHubTracking():
 5 |     @route()
 6 |     def main_screen(self):
 7 |         return """
 8 | <style>
 9 |     div.outer-wrapper {
10 |         display: table;width:100%;height:300px;
11 |     }
12 |     div.inner-wrapper {
13 |         display: table-cell;vertical-align: middle;height: 100%;width: 100%;
14 |     }
15 | </style>
16 | <div class="outer-wrapper">
17 |     <div class="inner-wrapper">
18 |         <div class="col-sm-3"></div>
19 |         <div class="input-group col-sm-6">
20 |           <input id="query{{prefix}}" type="text" class="form-control" placeholder="Search projects on GitHub">
21 |           <span class="input-group-btn">
22 |             <button class="btn btn-default" type="button">Submit Query</button>
23 |           </span>
24 |         </div>
25 |     </div>
26 | </div>   
27 | """
28 |     
29 | app = GitHubTracking()
30 | app.run()
31 | 


--------------------------------------------------------------------------------
/chapter 3/sampleCode5.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import pandas
 3 | [[GitHubTracking]]
 4 | @route(query="*")
 5 | @templateArgs
 6 | def do_search(self, query):
 7 |     response = requests.get( "https://api.github.com/search/repositories?q={}".format(query))
 8 |     frames = [pandas.DataFrame(response.json()['items'])]
 9 |     while response.ok and "next" in response.links:
10 |         response = requests.get(response.links['next']['url'])
11 |         frames.append(pandas.DataFrame(response.json()['items']))
12 | 
13 |     pdf = pandas.concat(frames)
14 |     response = requests.get( "https://api.github.com/search/repositories?q={}".format(query))
15 |     if not response.ok:
16 |         return "<div>An Error occurred: {{response.text}}</div>"
17 |     return """<h1><center>{{pdf|length}} repositories were found</center></h1>"""
18 | 


--------------------------------------------------------------------------------
/chapter 3/sampleCode6.py:
--------------------------------------------------------------------------------
 1 | [[GitHubTracking]]
 2 | @route(query="*")
 3 | @templateArgs
 4 | def do_search(self, query):
 5 |     self.first_url = "https://api.github.com/search/repositories?q={}".format(query)
 6 |     self.prev_url = None
 7 |     self.next_url = None
 8 |     self.last_url = None
 9 |     
10 |     response = requests.get(self.first_url)
11 |     if not response.ok:
12 |         return "<div>An Error occurred: {{response.text}}</div>"
13 | 
14 |     total_count = response.json()['total_count']
15 |     self.next_url = response.links.get('next', {}).get('url', None)
16 |     self.last_url = response.links.get('last', {}).get('url', None)
17 |     return """
18 | <h1><center>{{total_count}} repositories were found</center></h1>
19 | <ul class="pagination">
20 |   <li><a href="#" pd_options="page=first_url" pd_target="body{{prefix}}">First</a></li>
21 |   <li><a href="#" pd_options="page=prev_url" pd_target="body{{prefix}}">Prev</a></li>
22 |   <li><a href="#" pd_options="page=next_url" pd_target="body{{prefix}}">Next</a></li>
23 |   <li><a href="#" pd_options="page=last_url" pd_target="body{{prefix}}">Last</a></li>
24 | </ul>
25 | <table class="table">
26 |     <thead>
27 |         <tr>
28 |             <th>Repo Name</th>
29 |             <th>Lastname</th>
30 |             <th>URL</th>
31 |             <th>Stars</th>
32 |         </tr>
33 |     </thead>
34 |     <tbody id="body{{prefix}}">
35 |         {{this.invoke_route(this.do_retrieve_page, page='first_url')}}
36 |     </tbody>
37 | </table>
38 | """
39 | 


--------------------------------------------------------------------------------
/chapter 3/sampleCode7.py:
--------------------------------------------------------------------------------
 1 | [[GitHubTracking]]
 2 | @route(page="*")
 3 | @templateArgs
 4 | def do_retrieve_page(self, page):
 5 |     url = getattr(self, page)
 6 |     if url is None:
 7 |         return "<div>No more rows</div>"
 8 |     response = requests.get(url)
 9 |     self.prev_url = response.links.get('prev', {}).get('url', None)
10 |     self.next_url = response.links.get('next', {}).get('url', None)
11 |     items = response.json()['items']
12 |     return """
13 | {%for row in items%}
14 | <tr>
15 |     <td>{{row['name']}}</td>
16 |     <td>{{row.get('owner',{}).get('login', 'N/A')}}</td>
17 |     <td><a href="{{row['html_url']}}" target="_blank">{{row['html_url']}}</a></td>
18 |     <td>{{row['stargazers_count']}}</td>
19 | </tr>
20 | {%endfor%}
21 |         """
22 | 


--------------------------------------------------------------------------------
/chapter 3/sampleCode8.py:
--------------------------------------------------------------------------------
 1 | @PixieApp
 2 | class RepoAnalysis():
 3 |     @route(analyse_repo_owner="*", analyse_repo_name="*")
 4 |     @templateArgs
 5 |     def do_analyse_repo(self, analyse_repo_owner, analyse_repo_name):
 6 |         self._analyse_repo_owner = analyse_repo_owner
 7 |         self._analyse_repo_name = analyse_repo_name
 8 |         return """
 9 | <div class="container-fluid">
10 |     <div class="dropdown center-block col-sm-2">
11 |       <button class="btn btn-primary dropdown-toggle" type="button" data-toggle="dropdown">
12 |           Select Repo Data Set
13 |           <span class="caret"></span>
14 |       </button>
15 |       <ul class="dropdown-menu" style="list-style:none;margin:0px;padding:0px">
16 |           {%for analysis,_ in this.analyses%}
17 |             <li>
18 |                 <a href="#" pd_options="analyse_type={{analysis}}" pd_target="analyse_vis{{prefix}}"
19 |                     style="text-decoration: none;background-color:transparent">
20 |                     {{analysis}}
21 |                 </a>
22 |             </li>
23 |           {%endfor%}
24 |       </ul>
25 |     </div> 
26 |     <div id="analyse_vis{{prefix}}" class="col-sm-10"></div>
27 | </div>
28 | """
29 | 


--------------------------------------------------------------------------------
/chapter 3/sampleCode9.py:
--------------------------------------------------------------------------------
1 | def compute_pdf(key):
2 |     return pandas.DataFrame([
3 |         {"col{}".format(i): "{}{}-{}".format(key,i,j) for i in range(4)} for j in range(10)
4 |     ])
5 | 


--------------------------------------------------------------------------------
/chapter 4/sampleCode1.py:
--------------------------------------------------------------------------------
1 | @route(query="*", persist_args='true')
2 | @templateArgs
3 | def do_search(self, query):
4 |     self.first_url = "https://api.github.com/search/repositories?q={}".format(query)
5 |     self.prev_url = None
6 |     self.next_url = None
7 |     self.last_url = None
8 |     ...
9 | 


--------------------------------------------------------------------------------
/chapter 4/sampleCode2.py:
--------------------------------------------------------------------------------
1 | import pixiedust
2 | cars = pixiedust.sampleData(1, forcePandas=True) #car performance data
3 | display(cars)
4 | 


--------------------------------------------------------------------------------
/chapter 4/sampleCode3.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |     <head>
 4 |         <meta charset="utf-8">
 5 |         <title>Example page with embedded chart</title>
 6 |     </head>
 7 |     <body>
 8 |         <h1> Embedded a PixieDust Chart in a custom HTML Page</h1>
 9 |         <div>
10 |             <object type="text/html" width="600" height="400"
11 |                 data="http://localhost:8899/embed/04089782-7543-42a6-8dd1-e4d1cb06596a/600/400"> 
12 |         <a href="http://localhost:8899/embed/04089782-7543-42a6-8dd1-e4d1cb06596a">View Chart</a>
13 |             </object>
14 |         </div>
15 |     </body>
16 | </html>
17 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode1.py:
--------------------------------------------------------------------------------
 1 | from pixiedust.display.app import *
 2 | import requests
 3 | from wordcloud import WordCloud
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | @PixieApp
 7 | class WordCloudApp():
 8 |     @route()
 9 |     def main_screen(self):
10 |         return """
11 |         <div style="text-align:center">
12 |             <label>Enter a url: </label>
13 |             <input type="text" size="80" id="url{{prefix}}">
14 |             <button type="submit" pd_options="url=$val(url{{prefix}})" 
15 |                 pd_target="wordcloud{{prefix}}">
16 |                 Go
17 |             </button>
18 |         </div>
19 |         <center><div id="wordcloud{{prefix}}"></div></center>
20 |         """
21 |     
22 |     @route(url="*")
23 |     @captureOutput
24 |     def generate_word_cloud(self, url):
25 |         text = requests.get(url).text
26 |         plt.axis("off")
27 |         plt.imshow(
28 |             WordCloud(max_font_size=40).generate(text), 
29 |             interpolation='bilinear'
30 |         )
31 |     
32 | app = WordCloudApp()
33 | app.run()
34 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode10.py:
--------------------------------------------------------------------------------
1 | def newDisplayHandler(self, options, entity):
2 |     if self.streamingDisplay is None:
3 |         self.streamingDisplay = LineChartStreamingDisplay(options, entity)
4 |     else:
5 |         self.streamingDisplay.options = options
6 |     return self.streamingDisplay
7 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode11.html:
--------------------------------------------------------------------------------
 1 | <button type="submit" pd_event_payload="type=topicA;message=Button clicked">
 2 |     Send event A
 3 | </button>
 4 | 
 5 | or
 6 | 
 7 | <button type="submit">
 8 |     <pd_event_payload>
 9 |     {
10 |         "type":"topicA",
11 |         "message":"Button Clicked"
12 |     }
13 |     </pd_event_payload>
14 |     Send event A
15 | </button>
16 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode12.html:
--------------------------------------------------------------------------------
1 | <table
2 | onclick="pixiedust.sendEvent({type:'topicB',text:event.srcElement.innerText})">
3 |     <tr><td>Row 1</td></tr>
4 |     <tr><td>Row 2</td></tr>
5 |     <tr><td>Row 3</td></tr>
6 | </table>
7 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode13.html:
--------------------------------------------------------------------------------
1 | <div class="col-sm-6" id="listenerA{{prefix}}">
2 |     Listening to button event
3 |     <pd_event_handler 
4 |         pd_source="topicA" 
5 |         pd_script="print(eventInfo)" 
6 |         pd_target="listenerA{{prefix}}">
7 |     </pd_event_handler>
8 | </div>
9 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode14.py:
--------------------------------------------------------------------------------
 1 | from pixiedust.display.app import *
 2 | @PixieApp
 3 | class TestEvents():
 4 |     @route()
 5 |     def main_screen(self):
 6 |         return """
 7 | <div>
 8 |     <button type="submit">
 9 |         <pd_event_payload>
10 |         {
11 |             "type":"topicA",
12 |             "message":"Button Clicked"
13 |         }
14 |         </pd_event_payload>
15 |         Send event A
16 |     </button>
17 |     <table onclick="pixiedust.sendEvent({type:'topicB',text:event.srcElement.innerText})">
18 |         <tr><td>Row 1</td></tr>
19 |         <tr><td>Row 2</td></tr>
20 |         <tr><td>Row 3</td></tr>
21 |     </table>
22 | </div>
23 | <div class="container" style="margin-top:30px">
24 |     <div class="row">
25 |         <div class="col-sm-6" id="listenerA{{prefix}}">
26 |             Listening to button event
27 |             <pd_event_handler pd_source="topicA" pd_script="print(eventInfo)" pd_target="listenerA{{prefix}}">
28 |             </pd_event_handler>
29 |         </div>
30 |         <div class="col-sm-6" id="listenerB{{prefix}}">
31 |             Listening to table event
32 |             <pd_event_handler pd_source="topicB" pd_script="print(eventInfo)" pd_target="listenerB{{prefix}}">
33 |             </pd_event_handler>
34 |         </div>
35 |     </div>
36 | </div>
37 |         """ 
38 | app = TestEvents()
39 | app.run()
40 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode15.py:
--------------------------------------------------------------------------------
 1 | from pixiedust.display.display import *
 2 | import pandas
 3 | @PixiedustDisplay()
 4 | class SimpleDisplayMeta(DisplayHandlerMeta):
 5 |     @addId
 6 |     def getMenuInfo(self,entity,dataHandler):
 7 |         if type(entity) is pandas.core.frame.DataFrame:
 8 |             return [
 9 |                {"categoryId": "Table", "title": "Simple Table", "icon": "fa-table", "id": "simpleTest"}
10 |             ]
11 |         return []
12 |     def newDisplayHandler(self,options,entity):
13 |         return SimpleDisplay(options,entity)
14 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode16.py:
--------------------------------------------------------------------------------
 1 | class SimpleDisplay(Display):
 2 |     def doRender(self, handlerId):
 3 |         self._addHTMLTemplateString("""
 4 | <table class="table table-striped">
 5 |    <thead>
 6 |        {%for column in entity.columns.tolist()%}
 7 |        <th>{{column}}</th>
 8 |        {%endfor%}
 9 |    </thead>
10 |    <tbody>
11 |        {%for _, row in entity.iterrows()%}
12 |        <tr>
13 |            {%for value in row.tolist()%}
14 |            <td>{{value}}</td>
15 |            {%endfor%}
16 |        </tr>
17 |        {%endfor%}
18 |    </tbody>
19 | </table>
20 |         """)
21 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode17.py:
--------------------------------------------------------------------------------
 1 | from pixiedust.display.chart.renderers import PixiedustRenderer
 2 | from pixiedust.display.chart.renderers.baseChartDisplay import BaseChartDisplay
 3 | 
 4 | @PixiedustRenderer(rendererId="simpletable", id="tableView")
 5 | class SimpleDisplayWithRenderer(BaseChartDisplay):
 6 |     def get_options_dialog_pixieapp(self):
 7 |         return None #No options needed
 8 |     
 9 |     def doRenderChart(self):
10 |         return self.renderTemplateString("""
11 | <table class="table table-striped">
12 |    <thead>
13 |        {%for column in entity.columns.tolist()%}
14 |        <th>{{column}}</th>
15 |        {%endfor%}
16 |    </thead>
17 |    <tbody>
18 |        {%for _, row in entity.iterrows()%}
19 |        <tr>
20 |            {%for value in row.tolist()%}
21 |            <td>{{value}}</td>
22 |            {%endfor%}
23 |        </tr>
24 |        {%endfor%}
25 |    </tbody>
26 | </table>
27 |         """)
28 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode18.py:
--------------------------------------------------------------------------------
1 | import pdb
2 | def my_function(arg1, arg2):
3 |     pdb.set_trace()
4 |     do_something_here()
5 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode19.py:
--------------------------------------------------------------------------------
 1 | %%pixie_debugger
 2 | import pixiedust
 3 | cars = pixiedust.sampleData(1, forcePandas=True)
 4 | 
 5 | def count_cars(name):
 6 |     count = 0
 7 |     for row in cars.itertuples():
 8 |         if name in row.name:
 9 |             count += 1
10 |     return count
11 | 
12 | count_cars('chevrolet')
13 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode2.py:
--------------------------------------------------------------------------------
 1 | from pixiedust.display.app import *
 2 | import requests
 3 | from wordcloud import WordCloud
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | @PixieApp
 7 | class WCChildApp():
 8 |     @route(url='*')
 9 |     @captureOutput
10 |     def generate_word_cloud(self, url):
11 |         text = requests.get(url).text
12 |         plt.axis("off")
13 |         plt.imshow(
14 |             WordCloud(max_font_size=40).generate(text), 
15 |             interpolation='bilinear'
16 |         )
17 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode20.py:
--------------------------------------------------------------------------------
 1 | %%pixie_debugger -b count_cars 11
 2 | import pixiedust
 3 | cars = pixiedust.sampleData(1, forcePandas=True)
 4 | 
 5 | def count_cars(name):
 6 |     count = 0
 7 |     for row in cars.itertuples():
 8 |         if name in row.name:
 9 |             count += 1
10 |     return count
11 | 
12 | count_cars('chevrolet')
13 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode21.py:
--------------------------------------------------------------------------------
 1 | from pixiedust.display.app import *
 2 | @PixieApp
 3 | class DisplayCars():
 4 |     @route()
 5 |     def main_screen(self):
 6 |         return """
 7 |         <div>
 8 |             <label>Column to search</label>
 9 |             <input id="column{{prefix}}" value="name">
10 |             <label>Query</label>
11 |             <input id="search{{prefix}}">
12 |             <button type="submit" pd_options="col=$val(column{{prefix}});query=$val(search{{prefix}})"
13 |                 pd_target="target{{prefix}}">
14 |                 Search
15 |             </button>
16 |         </div>
17 |         <div id="target{{prefix}}"></div>
18 |         """
19 |     @route(col="*", query="*")
20 |     def display_screen(self, col, query):
21 |         self.pdf = cars.loc[cars[col].str.contains(query)]
22 |         return """
23 |         <div pd_render_onload pd_entity="pdf">
24 |             <pd_options>
25 |             {
26 |               "handlerId": "tableView",
27 |               "table_noschema": "true",
28 |               "table_nosearch": "true",
29 |               "table_nocount": "true"
30 |             }
31 |             </pd_options>
32 |         </div>
33 |         """
34 | app = DisplayCars()
35 | app.run()
36 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode22.py:
--------------------------------------------------------------------------------
1 | import pixiedust
2 | my_logger = pixiedust.getLogger(__name__)
3 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode23.py:
--------------------------------------------------------------------------------
 1 | from pixiedust.display.app import *
 2 | from pixiedust.utils import Logger
 3 | 
 4 | @PixieApp
 5 | @Logger()
 6 | class AppWithLogger():
 7 |     @route()
 8 |     def main_screen(self):
 9 |         self.info("Calling default route")
10 |         return "<div>hello world</div>"
11 |     
12 | app = AppWithLogger()
13 | app.run()
14 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode24.py:
--------------------------------------------------------------------------------
 1 | from pixiedust.display.app import *
 2 | 
 3 | @PixieApp
 4 | class TestJSDebugger():
 5 |     @route()
 6 |     def main_screen(self):
 7 |         return """
 8 | <script>
 9 | function FooJS(){
10 |     debugger;
11 |     return "value"
12 | }
13 | </script>
14 | <button type="submit" pd_options="state=$val(FooJS)">Call route</button>
15 |         """
16 |     
17 |     @route(state="*")
18 |     def my_route(self, state):
19 |         return "<div>Route called with state <b>{{state}}</b></div>"
20 |     
21 | app = TestJSDebugger()
22 | app.run()
23 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode25.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.app import *
2 | @PixieApp
3 | class MyApp():
4 |     @route(key1="value1", key2="*")
5 |     def myroute_screen(self, key1, key2):
6 |         return "<div>fragment: Key1 = {{key1}} - Key2 = {{key2}}"
7 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode26.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.app import *
2 | @PixieApp
3 | class MyApp():
4 |     @route(key1="value1", key2="*")
5 |     @templateArgs
6 |     def myroute_screen(self, key1, key2):
7 |         local_var = "some value"
8 |         return "<div>fragment: local_var = {{local_var}}"
9 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode27.py:
--------------------------------------------------------------------------------
 1 | from pixiedust.display.app import *
 2 | import matplotlib.pyplot as plt
 3 | @PixieApp
 4 | class MyApp():
 5 |     @route()
 6 |     @captureOutput
 7 |     def main_screen(self):
 8 |         plt.plot([1,2,3,4])
 9 |         plt.show()
10 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode28.py:
--------------------------------------------------------------------------------
 1 | from pixiedust.display.app import *
 2 | from pixiedust.utils import Logger
 3 | @PixieApp
 4 | @Logger()
 5 | class MyApp():
 6 |     @route()
 7 |     def main_screen(self):
 8 |         self.debug("In main_screen")
 9 |         return "<div>Hello World</div>"
10 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode29.html:
--------------------------------------------------------------------------------
 1 | <div pd_entity>
 2 |     <pd_options>
 3 |         {
 4 |             "mapboxtoken": "XXXXX",
 5 |             "chartsize": "90",
 6 |             "aggregation": "SUM",
 7 |             "rowCount": "500",
 8 |             "handlerId": "mapView",
 9 |             "rendererId": "mapbox",
10 |             "valueFields": "IncidntNum",
11 |             "keyFields": "X,Y",
12 |             "basemap": "light-v9"
13 |         }
14 |     </pd_options>
15 | </div>
16 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode3.py:
--------------------------------------------------------------------------------
 1 | @PixieApp
 2 | class WordCloudApp():
 3 |     @route()
 4 |     def main_screen(self):
 5 |         return """
 6 |         <div style="text-align:center">
 7 |             <label>Enter a url: </label>
 8 |             <input type="text" size="80" id="url{{prefix}}">
 9 |             <button type="submit" 
10 |                 pd_options="url=$val(url{{prefix}})" 
11 |                 pd_app="WCChildApp"
12 |                 pd_target="wordcloud{{prefix}}">
13 |                 Go
14 |             </button>
15 |         </div>
16 |         <center><div id="wordcloud{{prefix}}"></div></center>
17 |         """
18 |     
19 | app = WordCloudApp()
20 | app.run()
21 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode30.html:
--------------------------------------------------------------------------------
1 | <!-- Invoke a route that displays a chart -->
2 | <button type="submit" pd_options="showChart=true" pd_target="chart{{prefix}}">
3 |     Show Chart
4 | </button>
5 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode31.html:
--------------------------------------------------------------------------------
1 | <div id="chart{{prefix}}">
2 |     <button type="submit" pd_options="showChart=true" pd_target="chart{{prefix}}">
3 |         Show Chart
4 |     </button>
5 |     


--------------------------------------------------------------------------------
/chapter 5/sampleCode32.html:
--------------------------------------------------------------------------------
 1 | <!-- Invoke a method to load a dataframe before visualizing it -->
 2 | <div id="chart{{prefix}}">
 3 |     <button type="submit" 
 4 |         pd_entity="df" 
 5 |         pd_script="self.df = self.load_df()" 
 6 |         pd_options="handlerId=dataframe" 
 7 |         pd_target="chart{{prefix}}">
 8 |         Show Chart
 9 |     </button>
10 |     


--------------------------------------------------------------------------------
/chapter 5/sampleCode33.html:
--------------------------------------------------------------------------------
1 | <div pd_render_onload 
2 |      pd_option="show_route_X=true"
3 |      pd_app="some.package.RemoteApp">
4 | </div>
5 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode34.html:
--------------------------------------------------------------------------------
1 | <div pd_render_onload>
2 |     <pd_script>
3 | print('hello world rendered on load')
4 |     </pd_script>
5 | </div>
6 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode35.html:
--------------------------------------------------------------------------------
1 | <!-- Update state before refreshing a chart -->
2 | <button type="submit" 
3 |     pd_script="self.show_line_chart()" 
4 |     pd_refresh="chart{{prefix}}">
5 |     Show line chart
6 | </button>
7 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode36.html:
--------------------------------------------------------------------------------
 1 | <button type="submit" pd_event_payload="type=topicA;message=Button clicked">
 2 |     Send event A
 3 | </button>
 4 | <button type="submit">
 5 |     <pd_event_payload>
 6 |     {
 7 |         "type":"topicA",
 8 |         "message":"Button Clicked"
 9 |     }
10 |     </pd_event_payload>
11 |     Send event A
12 | </button>
13 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode37.html:
--------------------------------------------------------------------------------
1 | <div class="col-sm-6" id="listenerA{{prefix}}">
2 |     Listening to button event
3 |     <pd_event_handler 
4 |         pd_source="topicA" 
5 |         pd_script="print(eventInfo)" 
6 |         pd_target="listenerA{{prefix}}">
7 |     </pd_event_handler>
8 | </div>
9 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode38.html:
--------------------------------------------------------------------------------
1 | <div pd_refresh_rate="3000" 
2 |     pd_script="print(self.get_status())">
3 | </div>
4 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode39.py:
--------------------------------------------------------------------------------
1 | def setup(self):
2 |     self.var1 = "some initial value"
3 |     self.pandas_dataframe = pandas.DataFrame(data)
4 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode4.py:
--------------------------------------------------------------------------------
 1 | from pixiedust.display.app import *
 2 | import requests
 3 | from wordcloud import WordCloud
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | @PixieApp
 7 | class WCChildApp():
 8 |     @route(widget='wordcloud')
 9 |     @captureOutput
10 |     def generate_word_cloud(self):
11 |         text = requests.get(self.url).text if self.url else ""
12 |         plt.axis("off")
13 |         plt.imshow(
14 |             WordCloud(max_font_size=40).generate(text), 
15 |             interpolation='bilinear'
16 |         )
17 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode5.py:
--------------------------------------------------------------------------------
 1 | @PixieApp
 2 | class WordCloudApp(WCChildApp):
 3 |     @route()
 4 |     def main_screen(self):
 5 |         self.url=None
 6 |         return """
 7 |         <div style="text-align:center">
 8 |             <label>Enter a url: </label>
 9 |             <input type="text" size="80" id="url{{prefix}}">
10 |             <button type="submit"
11 |                 pd_script="self.url = '$val(url{{prefix}})'"
12 |                 pd_refresh="wordcloud{{prefix}}">
13 |                 Go
14 |             </button>
15 |         </div>
16 |         <center><div pd_widget="wordcloud" id="wordcloud{{prefix}}"></div></center>
17 |         """
18 |     
19 | app = WordCloudApp()
20 | app.run()
21 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode6.py:
--------------------------------------------------------------------------------
 1 | @abstractmethod
 2 | def doGetNextData(self):
 3 |     """Return the next batch of data from the underlying stream. 
 4 |     Accepted return values are:
 5 |     1. (x,y): tuple of list/numpy arrays representing the x and y axis
 6 |     2. pandas dataframe
 7 |     3. y: list/numpy array representing the y axis. In this case, the x axis is automatically created
 8 |     4. pandas serie: similar to #3
 9 |     5. json
10 |     6. geojson
11 |     7. url with supported payload (json/geojson)
12 |     """
13 |     Pass
14 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode7.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "geometry": {
 3 |         "type": "Point", 
 4 |         "coordinates": [
 5 |             -93.824908715741202, 10.875051131034805
 6 |         ]
 7 |     }, 
 8 |     "type": "Feature", 
 9 |     "properties": {}
10 | }
11 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode8.py:
--------------------------------------------------------------------------------
 1 | from pixiedust.display.streaming import *
 2 | 
 3 | class DroneStreamingAdapter(StreamingDataAdapter):
 4 |     def getMetadata(self):
 5 |         iconImage = "rocket-15"
 6 |         return {
 7 |             "layout": {"icon-image": iconImage, "icon-size": 1.5},
 8 |             "type": "symbol"
 9 |         }
10 |     def doGetNextData(self):
11 |         return "https://wanderdrone.appspot.com/"
12 | adapter = DroneStreamingAdapter()
13 | display(adapter)
14 | 


--------------------------------------------------------------------------------
/chapter 5/sampleCode9.py:
--------------------------------------------------------------------------------
 1 | @route(topic="*",streampreview="*",schemaX="*")
 2 |     def showChart(self, schemaX):
 3 |         self.schemaX = schemaX
 4 |         self.avgChannelData = self.streamingData.getStreamingChannel(self.computeAverages)
 5 |         return """
 6 | <div class="well" style="text-align:center">
 7 |     <div style="font-size:x-large">Real-time chart for {{this.schemaX}}(average).</div>
 8 | </div>
 9 | …
10 | <div pd_refresh_rate="1000" pd_entity="avgChannelData"></div>
11 |         """
12 | 


--------------------------------------------------------------------------------
/chapter 6/Tensorflow VR Part 1.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# TensorFlow Visual Recognition Sample Application Part 1\n",
  8 |     "## Define the model metadata"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 42,
 14 |    "metadata": {
 15 |     "pixiedust": {
 16 |      "displayParams": {}
 17 |     }
 18 |    },
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "import tensorflow as tf\n",
 22 |     "import requests\n",
 23 |     "models = {\n",
 24 |     "    \"mobilenet\": {\n",
 25 |     "        \"base_url\":\"https://github.com/DTAIEB/Thoughtful-Data-Science/raw/master/chapter%206/Visual%20Recognition/mobilenet_v1_0.50_224\",\n",
 26 |     "        \"model_file_url\": \"frozen_graph.pb\",\n",
 27 |     "        \"label_file\": \"labels.txt\",\n",
 28 |     "        \"output_layer\": \"MobilenetV1/Predictions/Softmax\"\n",
 29 |     "    }\n",
 30 |     "}\n",
 31 |     "\n",
 32 |     "# helper method for reading attributes from the model metadata\n",
 33 |     "def get_model_attribute(model, key, default_value = None):\n",
 34 |     "    if key not in model:\n",
 35 |     "        if default_value is None:\n",
 36 |     "            raise Exception(\"Require model attribute {} not found\".format(key))\n",
 37 |     "        return default_value\n",
 38 |     "    return model[key]"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "metadata": {},
 44 |    "source": [
 45 |     "## Helper methods for loading the graph and labels for a given model"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 33,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "# Helper method for resolving url relative to the selected model\n",
 55 |     "def get_url(model, path):\n",
 56 |     "    return model[\"base_url\"] + \"/\" + path\n",
 57 |     "    \n",
 58 |     "# Download the serialized model and create a TensorFlow graph\n",
 59 |     "def load_graph(model):\n",
 60 |     "    graph = tf.Graph()\n",
 61 |     "    graph_def = tf.GraphDef()\n",
 62 |     "    graph_def.ParseFromString(\n",
 63 |     "        requests.get( get_url( model, model[\"model_file_url\"] ) ).content\n",
 64 |     "    )\n",
 65 |     "    with graph.as_default():\n",
 66 |     "        tf.import_graph_def(graph_def)\n",
 67 |     "    return graph\n",
 68 |     "\n",
 69 |     "# Load the labels\n",
 70 |     "def load_labels(model, as_json = False):\n",
 71 |     "    labels = [line.rstrip() \\\n",
 72 |     "        for line in requests.get( get_url( model, model[\"label_file\"] ) ).text.split(\"\\n\") \\\n",
 73 |     "        if line != \"\"]\n",
 74 |     "    if as_json:\n",
 75 |     "        return [{\"index\": item.split(\":\")[0], \"label\" : item.split(\":\")[1]} for item in labels]\n",
 76 |     "    return labels"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "## Use BeautifulSoup to scrape the images from a given url"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 34,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "from bs4 import BeautifulSoup as BS\n",
 93 |     "import re\n",
 94 |     "\n",
 95 |     "# return an array of all the images scraped from an html page\n",
 96 |     "def get_image_urls(url):\n",
 97 |     "    # Instantiate a BeautifulSoup parser\n",
 98 |     "    soup = BS(requests.get(url).text, \"html.parser\")\n",
 99 |     "    \n",
100 |     "    # Local helper method for extracting url\n",
101 |     "    def extract_url(val):\n",
102 |     "        m = re.match(r\"url\\((.*)\\)\", val)\n",
103 |     "        val = m.group(1) if m is not None else val\n",
104 |     "        return \"http:\" + val if val.startswith(\"//\") else val\n",
105 |     "    \n",
106 |     "    # List comprehension that look for <img> elements and backgroud-image styles\n",
107 |     "    return [extract_url(imgtag['src']) for imgtag in soup.find_all('img')] + [ \\\n",
108 |     "        extract_url(val.strip()) for key,val in \\\n",
109 |     "        [tuple(selector.split(\":\")) for elt in soup.select(\"[style]\") \\\n",
110 |     "            for selector in elt[\"style\"].strip(\" ;\").split(\";\")] \\\n",
111 |     "            if key.strip().lower()=='background-image' \\\n",
112 |     "        ]"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "metadata": {},
118 |    "source": [
119 |     "## Helper method for downloading an image into a temp file"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 35,
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": [
128 |     "import tempfile\n",
129 |     "def download_image(url):\n",
130 |     "    response = requests.get(url, stream=True)\n",
131 |     "    if response.status_code == 200:\n",
132 |     "        with tempfile.NamedTemporaryFile(delete=False) as f:\n",
133 |     "            for chunk in response.iter_content(2048):\n",
134 |     "                f.write(chunk)\n",
135 |     "            return f.name\n",
136 |     "    else:\n",
137 |     "        raise Exception(\"Unable to download image: {}\".format(response.status_code))"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "metadata": {},
143 |    "source": [
144 |     "## Decode an image into a tensor"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 36,
150 |    "metadata": {},
151 |    "outputs": [],
152 |    "source": [
153 |     "# decode a given image into a tensor\n",
154 |     "def read_tensor_from_image_file(model, file_name):\n",
155 |     "    file_reader = tf.read_file(file_name, \"file_reader\")\n",
156 |     "    if file_name.endswith(\".png\"):\n",
157 |     "        image_reader = tf.image.decode_png(file_reader, channels = 3,name='png_reader')\n",
158 |     "    elif file_name.endswith(\".gif\"):\n",
159 |     "        image_reader = tf.squeeze(tf.image.decode_gif(file_reader,name='gif_reader'))\n",
160 |     "    elif file_name.endswith(\".bmp\"):\n",
161 |     "        image_reader = tf.image.decode_bmp(file_reader, name='bmp_reader')\n",
162 |     "    else:\n",
163 |     "        image_reader = tf.image.decode_jpeg(file_reader, channels = 3, name='jpeg_reader')\n",
164 |     "    float_caster = tf.cast(image_reader, tf.float32)\n",
165 |     "    dims_expander = tf.expand_dims(float_caster, 0);\n",
166 |     "    \n",
167 |     "    # Read some info from the model metadata, providing default values\n",
168 |     "    input_height = get_model_attribute(model, \"input_height\", 224)\n",
169 |     "    input_width = get_model_attribute(model, \"input_width\", 224)\n",
170 |     "    input_mean = get_model_attribute(model, \"input_mean\", 0)\n",
171 |     "    input_std = get_model_attribute(model, \"input_std\", 255)\n",
172 |     "        \n",
173 |     "    resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width])\n",
174 |     "    normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std])\n",
175 |     "    sess = tf.Session()\n",
176 |     "    result = sess.run(normalized)\n",
177 |     "    return result"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "markdown",
182 |    "metadata": {},
183 |    "source": [
184 |     "## Score_image method that run the model and return the top 5 candidate answers"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": 37,
190 |    "metadata": {},
191 |    "outputs": [],
192 |    "source": [
193 |     "import numpy as np\n",
194 |     "\n",
195 |     "# classify an image given its url\n",
196 |     "def score_image(graph, model, url):\n",
197 |     "    # Get the input and output layer from the model\n",
198 |     "    input_layer = get_model_attribute(model, \"input_layer\", \"input\")\n",
199 |     "    output_layer = get_model_attribute(model, \"output_layer\")\n",
200 |     "    \n",
201 |     "    # Download the image and build a tensor from its data\n",
202 |     "    t = read_tensor_from_image_file(model, download_image(url))\n",
203 |     "    \n",
204 |     "    # Retrieve the tensors corresponding to the input and output layers\n",
205 |     "    input_tensor = graph.get_tensor_by_name(\"import/\" + input_layer + \":0\");\n",
206 |     "    output_tensor = graph.get_tensor_by_name(\"import/\" + output_layer + \":0\");\n",
207 |     "\n",
208 |     "    with tf.Session(graph=graph) as sess:\n",
209 |     "        # Execute the output, overriding the input tensor with the one corresponding\n",
210 |     "        # to the image in the feed_dict argument\n",
211 |     "        results = sess.run(output_tensor, {input_tensor: t})\n",
212 |     "    results = np.squeeze(results)\n",
213 |     "    # select the top 5 candidate and match them to the labels\n",
214 |     "    top_k = results.argsort()[-5:][::-1]\n",
215 |     "    labels = load_labels(model)\n",
216 |     "    return [(labels[i].split(\":\")[1], results[i]) for i in top_k]"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "markdown",
221 |    "metadata": {},
222 |    "source": [
223 |     "## Test the model using a Flickr page"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": 41,
229 |    "metadata": {},
230 |    "outputs": [
231 |     {
232 |      "name": "stdout",
233 |      "output_type": "stream",
234 |      "text": [
235 |       "Results for https://geo.yahoo.com/b?s=792600534: \n",
236 |       "\t[('nail', 0.034935154), ('screw', 0.03144558), ('puck, hockey puck', 0.03032596), ('envelope', 0.0285034), ('Band Aid', 0.027891463)]\n",
237 |       "Results for http://c1.staticflickr.com/6/5598/14934282524_344c84246b_n.jpg: \n",
238 |       "\t[('Egyptian cat', 0.4644194), ('tiger cat', 0.1485573), ('tabby, tabby cat', 0.09759513), ('plastic bag', 0.03814263), ('Siamese cat, Siamese', 0.033892646)]\n",
239 |       "Results for http://c1.staticflickr.com/4/3677/13545844805_170ec3746b_n.jpg: \n",
240 |       "\t[('tabby, tabby cat', 0.7330132), ('Egyptian cat', 0.14256532), ('tiger cat', 0.11719289), ('plastic bag', 0.0028653105), ('bow tie, bow-tie, bowtie', 0.00082955)]\n",
241 |       "Results for http://c1.staticflickr.com/6/5170/5372754294_db6acaa1e5_n.jpg: \n",
242 |       "\t[('Persian cat', 0.607673), ('Angora, Angora rabbit', 0.20204937), ('hamster', 0.02988311), ('Egyptian cat', 0.027227053), ('lynx, catamount', 0.018035706)]\n",
243 |       "Results for http://c1.staticflickr.com/6/5589/14818641818_b0058c0cfc_m.jpg: \n",
244 |       "\t[('Egyptian cat', 0.5786173), ('tabby, tabby cat', 0.27942237), ('tiger cat', 0.11966114), ('lynx, catamount', 0.016066141), ('plastic bag', 0.002206809)]\n",
245 |       "Results for http://c1.staticflickr.com/6/5036/5881933297_7974eaff82_n.jpg: \n",
246 |       "\t[('tiger cat', 0.26617262), ('tabby, tabby cat', 0.2417825), ('Persian cat', 0.18471399), ('lynx, catamount', 0.11543496), ('Egyptian cat', 0.025188642)]\n",
247 |       "Results for http://c1.staticflickr.com/3/2602/3977203168_b9d02a0233.jpg: \n",
248 |       "\t[('tabby, tabby cat', 0.75482476), ('tiger cat', 0.13780454), ('Egyptian cat', 0.05675489), ('Siamese cat, Siamese', 0.02073992), ('lynx, catamount', 0.010187127)]\n",
249 |       "Results for http://c1.staticflickr.com/8/7401/16393044637_72e93d96b6_n.jpg: \n",
250 |       "\t[('Egyptian cat', 0.67294717), ('tiger cat', 0.18149199), ('tabby, tabby cat', 0.0952419), ('lynx, catamount', 0.025225954), ('candle, taper, wax light', 0.003860443)]\n",
251 |       "Results for http://c1.staticflickr.com/9/8110/8594699278_dd256c10fd_m.jpg: \n",
252 |       "\t[('tabby, tabby cat', 0.5829553), ('Egyptian cat', 0.15930973), ('tiger cat', 0.12964381), ('lynx, catamount', 0.11114485), ('plastic bag', 0.006467772)]\n",
253 |       "Results for http://c1.staticflickr.com/8/7023/6581178955_7e23af8bf9_m.jpg: \n",
254 |       "\t[('tabby, tabby cat', 0.28574014), ('Egyptian cat', 0.190615), ('plastic bag', 0.17165014), ('lynx, catamount', 0.101593874), ('tiger cat', 0.040527806)]\n",
255 |       "Results for http://c1.staticflickr.com/8/7313/9775005856_9b5e0ebe16_n.jpg: \n",
256 |       "\t[('tiger cat', 0.40977326), ('tabby, tabby cat', 0.31697693), ('Egyptian cat', 0.16972947), ('lynx, catamount', 0.059500016), ('washer, automatic washer, washing machine', 0.0046033794)]\n",
257 |       "Results for http://c1.staticflickr.com/8/7496/16236770082_205f4e358f_n.jpg: \n",
258 |       "\t[('Egyptian cat', 0.40310237), ('Siamese cat, Siamese', 0.23720524), ('tiger cat', 0.100198396), ('tabby, tabby cat', 0.08537914), ('plastic bag', 0.0352822)]\n",
259 |       "Results for http://c1.staticflickr.com/8/7049/13244364473_7b71bc5a4f_n.jpg: \n",
260 |       "\t[('Egyptian cat', 0.59387493), ('candle, taper, wax light', 0.057717346), ('paper towel', 0.046397187), ('plastic bag', 0.035106137), ('tabby, tabby cat', 0.018382242)]\n",
261 |       "Results for http://c1.staticflickr.com/4/3753/9837176706_9ecc1cddac_n.jpg: \n",
262 |       "\t[('tabby, tabby cat', 0.55699265), ('Egyptian cat', 0.19758604), ('tiger cat', 0.12088148), ('lynx, catamount', 0.057880934), ('plastic bag', 0.01653284)]\n",
263 |       "Results for http://c1.staticflickr.com/4/3488/4051998735_5b4863ac11_m.jpg: \n",
264 |       "\t[('Egyptian cat', 0.5310361), ('tabby, tabby cat', 0.26919606), ('tiger cat', 0.13531871), ('lynx, catamount', 0.050503224), ('washer, automatic washer, washing machine', 0.0053878534)]\n",
265 |       "Results for http://c1.staticflickr.com/9/8335/8086459588_46aae939c8.jpg: \n",
266 |       "\t[('Siamese cat, Siamese', 0.827261), ('mouse, computer mouse', 0.046974737), ('screen, CRT screen', 0.029382586), ('carton', 0.0076049017), ('lynx, catamount', 0.0067297667)]\n",
267 |       "Results for http://c1.staticflickr.com/8/7472/16230028882_c03cd6f2cc_n.jpg: \n",
268 |       "\t[('tiger cat', 0.5394526), ('lynx, catamount', 0.14366476), ('Egyptian cat', 0.10943988), ('red fox, Vulpes vulpes', 0.07641454), ('tabby, tabby cat', 0.034076575)]\n",
269 |       "Results for http://c1.staticflickr.com/4/3940/15504684310_f555c88915_n.jpg: \n",
270 |       "\t[('tabby, tabby cat', 0.49280357), ('Egyptian cat', 0.31668788), ('tiger cat', 0.12977621), ('lynx, catamount', 0.022205332), ('plastic bag', 0.008769177)]\n",
271 |       "Results for http://c1.staticflickr.com/9/8630/16556634997_ef0f9dd5f1_n.jpg: \n",
272 |       "\t[('West Highland white terrier', 0.8534684), ('Angora, Angora rabbit', 0.038167812), ('Samoyed, Samoyede', 0.024762549), ('Scotch terrier, Scottish terrier, Scottie', 0.01685713), ('Persian cat', 0.01484343)]\n",
273 |       "Results for http://c1.staticflickr.com/6/5226/5674849391_824822628c_n.jpg: \n",
274 |       "\t[('tiger cat', 0.45084468), ('tabby, tabby cat', 0.40245533), ('Egyptian cat', 0.11048719), ('lynx, catamount', 0.024745336), ('tiger, Panthera tigris', 0.0064596836)]\n",
275 |       "Results for http://c1.staticflickr.com/3/2234/1704658865_3b982b56cf_m.jpg: \n",
276 |       "\t[('Angora, Angora rabbit', 0.21852449), ('Egyptian cat', 0.19025268), ('tabby, tabby cat', 0.14283349), ('Persian cat', 0.085699804), ('tiger cat', 0.06147669)]\n",
277 |       "Results for http://c1.staticflickr.com/2/1361/5110233061_aa3b1c47ef_n.jpg: \n",
278 |       "\t[('tabby, tabby cat', 0.6095775), ('tiger cat', 0.24819912), ('Egyptian cat', 0.13453156), ('lynx, catamount', 0.0021140918), ('carton', 0.0015312452)]\n",
279 |       "Results for http://c1.staticflickr.com/4/3294/2434900370_17c1221ccf_n.jpg: \n",
280 |       "\t[('Egyptian cat', 0.4372107), ('tabby, tabby cat', 0.26445335), ('tiger cat', 0.13057052), ('bow tie, bow-tie, bowtie', 0.06754344), ('lynx, catamount', 0.037636597)]\n",
281 |       "Results for http://c1.staticflickr.com/3/2858/12174748174_27491cde33_n.jpg: \n",
282 |       "\t[('tiger cat', 0.4069278), ('tabby, tabby cat', 0.23834446), ('Egyptian cat', 0.23789576), ('lynx, catamount', 0.11284405), ('tiger, Panthera tigris', 0.0008611009)]\n",
283 |       "Results for http://c1.staticflickr.com/4/3674/13336301695_1cab4f5c85_n.jpg: \n",
284 |       "\t[('weasel', 0.25950897), ('black-footed ferret, ferret, Mustela nigripes', 0.1795659), ('polecat, fitch, foulmart, foumart, Mustela putorius', 0.15248777), ('mink', 0.07626065), ('Egyptian cat', 0.04768039)]\n"
285 |      ]
286 |     }
287 |    ],
288 |    "source": [
289 |     "model = models['mobilenet']\n",
290 |     "graph = load_graph(model)\n",
291 |     "image_urls = get_image_urls(\"https://www.flickr.com/search/?text=cats\")\n",
292 |     "for url in image_urls:\n",
293 |     "    results = score_image(graph, model, url)\n",
294 |     "    print(\"Results for {}: \\n\\t{}\".format(url, results))"
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "code",
299 |    "execution_count": null,
300 |    "metadata": {},
301 |    "outputs": [],
302 |    "source": []
303 |   }
304 |  ],
305 |  "metadata": {
306 |   "celltoolbar": "Edit Metadata",
307 |   "kernelspec": {
308 |    "display_name": "Python 3",
309 |    "language": "python",
310 |    "name": "python3"
311 |   },
312 |   "language_info": {
313 |    "codemirror_mode": {
314 |     "name": "ipython",
315 |     "version": 3
316 |    },
317 |    "file_extension": ".py",
318 |    "mimetype": "text/x-python",
319 |    "name": "python",
320 |    "nbconvert_exporter": "python",
321 |    "pygments_lexer": "ipython3",
322 |    "version": "3.5.4"
323 |   }
324 |  },
325 |  "nbformat": 4,
326 |  "nbformat_minor": 2
327 | }
328 | 


--------------------------------------------------------------------------------
/chapter 6/Tensorflow VR Part 2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# TensorFlow Visual Recognition Sample Application Part 2\n",
  8 |     "\n",
  9 |     "## Provide a User Interface with a PixieApp\n",
 10 |     "\n",
 11 |     "## Define the model metadata"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 1,
 17 |    "metadata": {
 18 |     "pixiedust": {
 19 |      "displayParams": {}
 20 |     }
 21 |    },
 22 |    "outputs": [
 23 |     {
 24 |      "name": "stderr",
 25 |      "output_type": "stream",
 26 |      "text": [
 27 |       "/Users/dtaieb/anaconda/envs/dashboard/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: compiletime version 3.6 of module 'tensorflow.python.framework.fast_tensor_util' does not match runtime version 3.5\n",
 28 |       "  return f(*args, **kwds)\n"
 29 |      ]
 30 |     }
 31 |    ],
 32 |    "source": [
 33 |     "import tensorflow as tf\n",
 34 |     "import requests\n",
 35 |     "models = {\n",
 36 |     "    \"mobilenet\": {\n",
 37 |     "        \"base_url\":\"https://github.com/DTAIEB/Thoughtful-Data-Science/raw/master/chapter%206/Visual%20Recognition/mobilenet_v1_0.50_224\",\n",
 38 |     "        \"model_file_url\": \"frozen_graph.pb\",\n",
 39 |     "        \"label_file\": \"labels.txt\",\n",
 40 |     "        \"output_layer\": \"MobilenetV1/Predictions/Softmax\"\n",
 41 |     "    }\n",
 42 |     "}\n",
 43 |     "\n",
 44 |     "# helper method for reading attributes from the model metadata\n",
 45 |     "def get_model_attribute(model, key, default_value = None):\n",
 46 |     "    if key not in model:\n",
 47 |     "        if default_value is None:\n",
 48 |     "            raise Exception(\"Require model attribute {} not found\".format(key))\n",
 49 |     "        return default_value\n",
 50 |     "    return model[key]"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "metadata": {},
 56 |    "source": [
 57 |     "## Helper methods for loading the graph and labels for a given model"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 2,
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "# Helper method for resolving url relative to the selected model\n",
 67 |     "def get_url(model, path):\n",
 68 |     "    return model[\"base_url\"] + \"/\" + path\n",
 69 |     "    \n",
 70 |     "# Download the serialized model and create a TensorFlow graph\n",
 71 |     "def load_graph(model):\n",
 72 |     "    graph = tf.Graph()\n",
 73 |     "    graph_def = tf.GraphDef()\n",
 74 |     "    graph_def.ParseFromString(\n",
 75 |     "        requests.get( get_url( model, model[\"model_file_url\"] ) ).content\n",
 76 |     "    )\n",
 77 |     "    with graph.as_default():\n",
 78 |     "        tf.import_graph_def(graph_def)\n",
 79 |     "    return graph\n",
 80 |     "\n",
 81 |     "# Load the labels\n",
 82 |     "def load_labels(model, as_json = False):\n",
 83 |     "    labels = [line.rstrip() \\\n",
 84 |     "        for line in requests.get( get_url( model, model[\"label_file\"] ) ).text.split(\"\\n\") \\\n",
 85 |     "        if line != \"\"]\n",
 86 |     "    if as_json:\n",
 87 |     "        return [{\"index\": item.split(\":\")[0], \"label\" : item.split(\":\")[1]} for item in labels]\n",
 88 |     "    return labels"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "## Use BeautifulSoup to scrape the images from a given url"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 3,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "from bs4 import BeautifulSoup as BS\n",
105 |     "import re\n",
106 |     "\n",
107 |     "# return an array of all the images scraped from an html page\n",
108 |     "def get_image_urls(url):\n",
109 |     "    # Instantiate a BeautifulSoup parser\n",
110 |     "    soup = BS(requests.get(url).text, \"html.parser\")\n",
111 |     "    \n",
112 |     "    # Local helper method for extracting url\n",
113 |     "    def extract_url(val):\n",
114 |     "        m = re.match(r\"url\\((.*)\\)\", val)\n",
115 |     "        val = m.group(1) if m is not None else val\n",
116 |     "        return \"http:\" + val if val.startswith(\"//\") else val\n",
117 |     "    \n",
118 |     "    # List comprehension that look for <img> elements and backgroud-image styles\n",
119 |     "    return [extract_url(imgtag['src']) for imgtag in soup.find_all('img')] + [ \\\n",
120 |     "        extract_url(val.strip()) for key,val in \\\n",
121 |     "        [tuple(selector.split(\":\")) for elt in soup.select(\"[style]\") \\\n",
122 |     "            for selector in elt[\"style\"].strip(\" ;\").split(\";\")] \\\n",
123 |     "            if key.strip().lower()=='background-image' \\\n",
124 |     "        ]"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "markdown",
129 |    "metadata": {},
130 |    "source": [
131 |     "## Helper method for downloading an image into a temp file"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 4,
137 |    "metadata": {},
138 |    "outputs": [],
139 |    "source": [
140 |     "import tempfile\n",
141 |     "def download_image(url):\n",
142 |     "    response = requests.get(url, stream=True)\n",
143 |     "    if response.status_code == 200:\n",
144 |     "        with tempfile.NamedTemporaryFile(delete=False) as f:\n",
145 |     "            for chunk in response.iter_content(2048):\n",
146 |     "                f.write(chunk)\n",
147 |     "            return f.name\n",
148 |     "    else:\n",
149 |     "        raise Exception(\"Unable to download image: {}\".format(response.status_code))"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "metadata": {},
155 |    "source": [
156 |     "## Decode an image into a tensor"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": 5,
162 |    "metadata": {},
163 |    "outputs": [],
164 |    "source": [
165 |     "# decode a given image into a tensor\n",
166 |     "def read_tensor_from_image_file(model, file_name):\n",
167 |     "    file_reader = tf.read_file(file_name, \"file_reader\")\n",
168 |     "    if file_name.endswith(\".png\"):\n",
169 |     "        image_reader = tf.image.decode_png(file_reader, channels = 3,name='png_reader')\n",
170 |     "    elif file_name.endswith(\".gif\"):\n",
171 |     "        image_reader = tf.squeeze(tf.image.decode_gif(file_reader,name='gif_reader'))\n",
172 |     "    elif file_name.endswith(\".bmp\"):\n",
173 |     "        image_reader = tf.image.decode_bmp(file_reader, name='bmp_reader')\n",
174 |     "    else:\n",
175 |     "        image_reader = tf.image.decode_jpeg(file_reader, channels = 3, name='jpeg_reader')\n",
176 |     "    float_caster = tf.cast(image_reader, tf.float32)\n",
177 |     "    dims_expander = tf.expand_dims(float_caster, 0);\n",
178 |     "    \n",
179 |     "    # Read some info from the model metadata, providing default values\n",
180 |     "    input_height = get_model_attribute(model, \"input_height\", 224)\n",
181 |     "    input_width = get_model_attribute(model, \"input_width\", 224)\n",
182 |     "    input_mean = get_model_attribute(model, \"input_mean\", 0)\n",
183 |     "    input_std = get_model_attribute(model, \"input_std\", 255)\n",
184 |     "        \n",
185 |     "    resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width])\n",
186 |     "    normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std])\n",
187 |     "    sess = tf.Session()\n",
188 |     "    result = sess.run(normalized)\n",
189 |     "    return result"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "markdown",
194 |    "metadata": {},
195 |    "source": [
196 |     "## Score_image method that run the model and return the top 5 candidate answers"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": 6,
202 |    "metadata": {},
203 |    "outputs": [],
204 |    "source": [
205 |     "import numpy as np\n",
206 |     "\n",
207 |     "# classify an image given its url\n",
208 |     "def score_image(graph, model, url):\n",
209 |     "    # Get the input and output layer from the model\n",
210 |     "    input_layer = get_model_attribute(model, \"input_layer\", \"input\")\n",
211 |     "    output_layer = get_model_attribute(model, \"output_layer\")\n",
212 |     "    \n",
213 |     "    # Download the image and build a tensor from its data\n",
214 |     "    t = read_tensor_from_image_file(model, download_image(url))\n",
215 |     "    \n",
216 |     "    # Retrieve the tensors corresponding to the input and output layers\n",
217 |     "    input_tensor = graph.get_tensor_by_name(\"import/\" + input_layer + \":0\");\n",
218 |     "    output_tensor = graph.get_tensor_by_name(\"import/\" + output_layer + \":0\");\n",
219 |     "\n",
220 |     "    with tf.Session(graph=graph) as sess:\n",
221 |     "        # Execute the output, overriding the input tensor with the one corresponding\n",
222 |     "        # to the image in the feed_dict argument\n",
223 |     "        results = sess.run(output_tensor, {input_tensor: t})\n",
224 |     "    results = np.squeeze(results)\n",
225 |     "    # select the top 5 candidate and match them to the labels\n",
226 |     "    top_k = results.argsort()[-5:][::-1]\n",
227 |     "    labels = load_labels(model)\n",
228 |     "    return [(labels[i].split(\":\")[1], results[i]) for i in top_k]"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "markdown",
233 |    "metadata": {},
234 |    "source": [
235 |     "## PixieApp with the following screens:\n",
236 |     "1. Ask the user for a url to a web page\n",
237 |     "2. Display the images with top 5 candidate classifications"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": 9,
243 |    "metadata": {
244 |     "pixiedust": {
245 |      "displayParams": {}
246 |     }
247 |    },
248 |    "outputs": [
249 |     {
250 |      "data": {
251 |       "text/html": [
252 |        "<style type=\"text/css\">.pd_warning{display:none;}</style><div class=\"pd_warning\"><em>Hey, there's something awesome here! To see it, open this notebook outside GitHub, in a viewer like Jupyter</em></div>"
253 |       ],
254 |       "text/plain": [
255 |        "<IPython.core.display.HTML object>"
256 |       ]
257 |      },
258 |      "metadata": {
259 |       "pixieapp_metadata": null
260 |      },
261 |      "output_type": "display_data"
262 |     }
263 |    ],
264 |    "source": [
265 |     "from pixiedust.display.app import *\n",
266 |     "\n",
267 |     "@PixieApp\n",
268 |     "class ScoreImageApp():\n",
269 |     "    def setup(self):\n",
270 |     "        self.model = models[\"mobilenet\"]\n",
271 |     "        self.graph = load_graph( self.model )\n",
272 |     "\n",
273 |     "    @route()\n",
274 |     "    def main_screen(self):\n",
275 |     "        return \"\"\"\n",
276 |     "<style>\n",
277 |     "    div.outer-wrapper {\n",
278 |     "        display: table;width:100%;height:300px;\n",
279 |     "    }\n",
280 |     "    div.inner-wrapper {\n",
281 |     "        display: table-cell;vertical-align: middle;height: 100%;width: 100%;\n",
282 |     "    }\n",
283 |     "</style>\n",
284 |     "<div class=\"outer-wrapper\">\n",
285 |     "    <div class=\"inner-wrapper\">\n",
286 |     "        <div class=\"col-sm-3\"></div>\n",
287 |     "        <div class=\"input-group col-sm-6\">\n",
288 |     "          <input id=\"url{{prefix}}\" type=\"text\" class=\"form-control\"\n",
289 |     "              value=\"https://www.flickr.com/search/?text=cats\"\n",
290 |     "              placeholder=\"Enter a url that contains images\">\n",
291 |     "          <span class=\"input-group-btn\">\n",
292 |     "            <button class=\"btn btn-default\" type=\"button\" pd_options=\"image_url=$val(url{{prefix}})\">Go</button>\n",
293 |     "          </span>\n",
294 |     "        </div>\n",
295 |     "    </div>\n",
296 |     "</div>        \n",
297 |     "\"\"\"\n",
298 |     "    \n",
299 |     "    @route(image_url=\"*\")\n",
300 |     "    @templateArgs\n",
301 |     "    def do_process_url(self, image_url):\n",
302 |     "        image_urls = get_image_urls(image_url)\n",
303 |     "        return \"\"\"\n",
304 |     "<div>\n",
305 |     "{%for url in image_urls%}\n",
306 |     "<div style=\"float: left; font-size: 9pt; text-align: center; width: 30%; margin-right: 1%; margin-bottom: 0.5em;\">\n",
307 |     "<img src=\"{{url}}\" style=\"width: 100%\">\n",
308 |     "<div style=\"display:inline-block\" pd_render_onload pd_options=\"score_url={{url}}\"></div>\n",
309 |     "</div>\n",
310 |     "{%endfor%}\n",
311 |     "<p style=\"clear: both;\">\n",
312 |     "</div>\n",
313 |     "        \"\"\"\n",
314 |     "    \n",
315 |     "    @route(score_url=\"*\")\n",
316 |     "    @templateArgs\n",
317 |     "    def do_score_url(self, score_url):\n",
318 |     "        results = score_image(self.graph, self.model, score_url)\n",
319 |     "        return \"\"\"\n",
320 |     "<ul style=\"text-align:left\">\n",
321 |     "{%for label, confidence in results%}\n",
322 |     "<li><b>{{label}}</b>: {{confidence}}</li>\n",
323 |     "{%endfor%}\n",
324 |     "</ul>\n",
325 |     "\"\"\"\n",
326 |     "    \n",
327 |     "app = ScoreImageApp()\n",
328 |     "app.run()"
329 |    ]
330 |   },
331 |   {
332 |    "cell_type": "code",
333 |    "execution_count": null,
334 |    "metadata": {},
335 |    "outputs": [],
336 |    "source": []
337 |   }
338 |  ],
339 |  "metadata": {
340 |   "celltoolbar": "Edit Metadata",
341 |   "kernelspec": {
342 |    "display_name": "Python 3",
343 |    "language": "python",
344 |    "name": "python3"
345 |   },
346 |   "language_info": {
347 |    "codemirror_mode": {
348 |     "name": "ipython",
349 |     "version": 3
350 |    },
351 |    "file_extension": ".py",
352 |    "mimetype": "text/x-python",
353 |    "name": "python",
354 |    "nbconvert_exporter": "python",
355 |    "pygments_lexer": "ipython3",
356 |    "version": "3.5.4"
357 |   }
358 |  },
359 |  "nbformat": 4,
360 |  "nbformat_minor": 2
361 | }
362 | 


--------------------------------------------------------------------------------
/chapter 6/Visual Recognition/mobilenet_v1_0.50_224/frozen_graph.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DTAIEB/Thoughtful-Data-Science/8b80e8f3e33b6fdc6672ecee1f27e0b983b28241/chapter 6/Visual Recognition/mobilenet_v1_0.50_224/frozen_graph.pb


--------------------------------------------------------------------------------
/chapter 6/Visual Recognition/mobilenet_v1_0.50_224/quantized_graph.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DTAIEB/Thoughtful-Data-Science/8b80e8f3e33b6fdc6672ecee1f27e0b983b28241/chapter 6/Visual Recognition/mobilenet_v1_0.50_224/quantized_graph.pb


--------------------------------------------------------------------------------
/chapter 6/sampleCode1.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | x_input = tf.placeholder(tf.float32)
 3 | y_output = tf.placeholder(tf.float32)
 4 | eps = 0.01
 5 | W1 = tf.Variable(tf.random_uniform([2,2], -eps, eps))
 6 | W2 = tf.Variable(tf.random_uniform([2,1], -eps, eps))
 7 | layer1 = tf.sigmoid(tf.matmul(x_input, W1))
 8 | output_layer = tf.sigmoid(tf.matmul(layer1, W2))
 9 | cost = tf.reduce_mean(tf.square(y_output - output_layer))
10 | train = tf.train.GradientDescentOptimizer(0.05).minimize(cost)
11 | training_data = ([[0,0],[0,1],[1,0],[1,1]], [[0],[1],[1],[0]])
12 | with tf.Session() as sess:
13 |     sess.run(tf.global_variables_initializer())
14 |     for i in range(5000):
15 |         sess.run(train, feed_dict={x_input: training_data[0], y_output: training_data[1]}) 
16 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode10.py:
--------------------------------------------------------------------------------
1 | # Load the labels
2 | def load_labels(model, as_json = False):
3 |     labels = [line.rstrip() \
4 |       for line in requests.get(get_url(model, model["label_file"]) ).text.split("\n") if line != ""]
5 |     if as_json:
6 |         return [{"index": item.split(":")[0],"label":item.split(":")[1]} for item in labels]
7 |     return labels
8 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode11.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup as BS
 2 | import re
 3 | 
 4 | # return an array of all the images scraped from an html page
 5 | def get_image_urls(url):
 6 |     # Instantiate a BeautifulSoup parser
 7 |     soup = BS(requests.get(url).text, "html.parser")
 8 |     
 9 |     # Local helper method for extracting url
10 |     def extract_url(val):
11 |         m = re.match(r"url\((.*)\)", val)
12 |         val = m.group(1) if m is not None else val
13 |         return "http:" + val if val.startswith("//") else val
14 |     
15 |     # List comprehension that look for <img> elements and backgroud-image styles
16 |     return [extract_url(imgtag['src']) for imgtag in soup.find_all('img')] + [ \
17 |         extract_url(val.strip()) for key,val in \
18 |         [tuple(selector.split(":")) for elt in soup.select("[style]") \
19 |             for selector in elt["style"].strip(" ;").split(";")] \
20 |             if key.strip().lower()=='background-image' \
21 |         ] 
22 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode12.py:
--------------------------------------------------------------------------------
 1 | import tempfile
 2 | def download_image(url):
 3 |    response = requests.get(url, stream=True)
 4 |    if response.status_code == 200:
 5 |       with tempfile.NamedTemporaryFile(delete=False) as f:
 6 |          for chunk in response.iter_content(2048):
 7 |             f.write(chunk)
 8 |          return f.name
 9 |    else:
10 |       raise Exception("Unable to download image: {}".format(response.status_code))
11 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode13.py:
--------------------------------------------------------------------------------
 1 | # decode a given image into a tensor
 2 | def read_tensor_from_image_file(model, file_name):
 3 |     file_reader = tf.read_file(file_name, "file_reader")
 4 |     if file_name.endswith(".png"):
 5 |         image_reader = tf.image.decode_png(file_reader, channels = 3,name='png_reader')
 6 |     elif file_name.endswith(".gif"):
 7 |         image_reader = tf.squeeze(tf.image.decode_gif(file_reader,name='gif_reader'))
 8 |     elif file_name.endswith(".bmp"):
 9 |         image_reader = tf.image.decode_bmp(file_reader, name='bmp_reader')
10 |     else:
11 |         image_reader = tf.image.decode_jpeg(file_reader, channels = 3, name='jpeg_reader')
12 |     float_caster = tf.cast(image_reader, tf.float32)
13 |     dims_expander = tf.expand_dims(float_caster, 0);
14 |     
15 |     # Read some info from the model metadata, providing default values
16 |     input_height = get_model_attribute(model, "input_height", 224)
17 |     input_width = get_model_attribute(model, "input_width", 224)
18 |     input_mean = get_model_attribute(model, "input_mean", 0)
19 |     input_std = get_model_attribute(model, "input_std", 255)
20 |         
21 |     resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width])
22 |     normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std])
23 |     sess = tf.Session()
24 |     result = sess.run(normalized)
25 |     return result 
26 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode14.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | # classify an image given its url
 4 | def score_image(graph, model, url):
 5 |     # Get the input and output layer from the model
 6 |     input_layer = get_model_attribute(model, "input_layer", "input")
 7 |     output_layer = get_model_attribute(model, "output_layer")
 8 |     
 9 |     # Download the image and build a tensor from its data
10 |     t = read_tensor_from_image_file(model, download_image(url))
11 |     
12 |     # Retrieve the tensors corresponding to the input and output layers
13 |     input_tensor = graph.get_tensor_by_name("import/" + input_layer + ":0");
14 |     output_tensor = graph.get_tensor_by_name("import/" + output_layer + ":0");
15 | 
16 |     with tf.Session(graph=graph) as sess:
17 |         results = sess.run(output_tensor, {input_tensor: t})
18 |     results = np.squeeze(results)
19 |     # select the top 5 candidate and match them to the labels
20 |     top_k = results.argsort()[-5:][::-1]
21 |     labels = load_labels(model)
22 |     return [(labels[i].split(":")[1], results[i]) for i in top_k]
23 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode15.py:
--------------------------------------------------------------------------------
1 | model = models['mobilenet']
2 | graph = load_graph(model)
3 | image_urls = get_image_urls("https://www.flickr.com/search/?text=cats")
4 | for url in image_urls:
5 |     results = score_image(graph, model, url)
6 |     print("Result for {}: \n\t{}".format(url, results))
7 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode16.py:
--------------------------------------------------------------------------------
1 | from pixiedust.display.app import *
2 | 
3 | @PixieApp
4 | class ScoreImageApp():
5 |     def setup(self):
6 |         self.model = models["mobilenet"]
7 |         self.graph = load_graph( self.model )
8 |     …
9 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode17.py:
--------------------------------------------------------------------------------
 1 | [[ScoreImageApp]]
 2 | @route()
 3 | def main_screen(self):
 4 |    return """
 5 | <style>
 6 |     div.outer-wrapper {
 7 |         display: table;width:100%;height:300px;
 8 |     }
 9 |     div.inner-wrapper {
10 |         display: table-cell;vertical-align: middle;height: 100%;width: 100%;
11 |     }
12 | </style>
13 | <div class="outer-wrapper">
14 |     <div class="inner-wrapper">
15 |         <div class="col-sm-3"></div>
16 |         <div class="input-group col-sm-6">
17 |           <input id="url{{prefix}}" type="text" class="form-control"
18 |               value="https://www.flickr.com/search/?text=cats"
19 |               placeholder="Enter a url that contains images">
20 |           <span class="input-group-btn">
21 |             <button class="btn btn-default" type="button" pd_options="image_url=$val(url{{prefix}})">Go</button>
22 |           </span>
23 |         </div>
24 |     </div>
25 | </div>        
26 | """
27 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode18.py:
--------------------------------------------------------------------------------
 1 | [[ScoreImageApp]]
 2 | @route(image_url="*")
 3 | @templateArgs
 4 | def do_process_url(self, image_url):
 5 |    image_urls = get_image_urls(image_url)
 6 |    return """
 7 | <div>
 8 | {%for url in image_urls%}
 9 | <div style="float: left; font-size: 9pt; text-align: center; width: 30%; margin-right: 1%; margin-bottom: 0.5em;">
10 | <img src="{{url}}" style="width: 100%">
11 |   <div style="display:inline-block" pd_render_onload pd_options="score_url={{url}}">
12 |   </div>
13 | </div>
14 | {%endfor%}
15 | <p style="clear: both;">
16 | </div>
17 |         """
18 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode19.py:
--------------------------------------------------------------------------------
 1 | [[ScoreImageApp]]
 2 | @route(score_url="*")
 3 | @templateArgs
 4 | def do_score_url(self, score_url):
 5 |    results = score_image(self.graph, self.model, score_url)
 6 |    return """
 7 | <ul style="text-align:left">
 8 | {%for label, confidence in results%}
 9 | <li><b>{{label}}</b>: {{confidence}}</li>
10 | {%endfor%}
11 | </ul>
12 | """
13 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode2.py:
--------------------------------------------------------------------------------
 1 | def do_training(train, train_labels, test, test_labels, num_classes):
 2 |     #set TensorFlow logging level to INFO
 3 |     tf.logging.set_verbosity(tf.logging.INFO)
 4 | 
 5 |     # Build 2 hidden layer DNN with 10, 10 units respectively.
 6 |     classifier = tf.estimator.DNNClassifier(
 7 |         # Compute feature_columns from dataframe keys using list comprehension
 8 |         feature_columns =
 9 |             [tf.feature_column.numeric_column(key=key) for key in train.keys()],
10 |         hidden_units=[10, 10],
11 |         n_classes=num_classes)
12 | 
13 |     # Train the Model
14 |     classifier.train(
15 |         input_fn=lambda:train_input_fn(train, train_labels,100),
16 |         steps=1000
17 |     )
18 | 
19 |     # Evaluate the model
20 |     eval_result = classifier.evaluate(
21 |         input_fn=lambda:eval_input_fn(test, test_labels,100)
22 |     )
23 | 
24 |     return (classifier, eval_result)
25 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode20.py:
--------------------------------------------------------------------------------
 1 | [[ImageRecoApp]]
 2 | from pixiedust.apps.template import TemplateTabbedApp
 3 | @PixieApp
 4 | class ImageRecoApp(TemplateTabbedApp):
 5 |     def setup(self):
 6 |         self.apps = [
 7 |             {"title": "Score", "app_class": "ScoreImageApp"},
 8 |             {"title": "Model", "app_class": "TensorGraphApp"},
 9 |             {"title": "Labels", "app_class": "LabelsApp"}
10 |         ]
11 |         self.model = models["mobilenet"]
12 |         self.graph = self.load_graph(self.model)
13 |         
14 | app = ImageRecoApp()
15 | app.run()
16 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode21.py:
--------------------------------------------------------------------------------
 1 | @PixieApp
 2 | class TensorGraphApp():
 3 |     """Visualize TensorFlow graph."""
 4 |     def setup(self):
 5 |         self.graph = self.parent_pixieapp.graph
 6 | 
 7 |     @route()
 8 |     @templateArgs
 9 |     def main_screen(self):
10 |         strip_def = self.strip_consts(self.graph.as_graph_def())
11 |         code = """
12 |             <script>
13 |               function load() {{
14 |                 document.getElementById("{id}").pbtxt = {data};
15 |               }}
16 |             </script>
17 |             <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
18 |             <div style="height:600px">
19 |               <tf-graph-basic id="{id}"></tf-graph-basic>
20 |             </div>
21 |         """.format(data=repr(str(strip_def)), id='graph'+ self.getPrefix()).replace('"', '&quot;')
22 | 
23 |         return """
24 | <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{{code}}"></iframe>
25 | """
26 | 
27 |     def strip_consts(self, graph_def, max_const_size=32):
28 |         """Strip large constant values from graph_def."""
29 |         strip_def = tf.GraphDef()
30 |         for n0 in graph_def.node:
31 |             n = strip_def.node.add() 
32 |             n.MergeFrom(n0)
33 |             if n.op == 'Const':
34 |                 tensor = n.attr['value'].tensor
35 |                 size = len(tensor.tensor_content)
36 |                 if size > max_const_size:
37 |                     tensor.tensor_content = "<stripped {} bytes>".format(size).encode("UTF-8")
38 |         return strip_def
39 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode22.py:
--------------------------------------------------------------------------------
 1 | [[LabelsApp]]
 2 | @PixieApp
 3 | class LabelsApp():
 4 |     def setup(self):
 5 |         self.labels = self.parent_pixieapp.load_labels(
 6 |             self.parent_pixieapp.model, as_json=True
 7 |         )
 8 |     
 9 |     @route()
10 |     def main_screen(self):
11 |         return """
12 | <div pd_render_onload pd_entity="labels">
13 |     <pd_options>
14 |     {
15 |         "table_noschema": "true",
16 |         "handlerId": "tableView",
17 |         "rowCount": "10000"
18 |     }
19 |     </pd_options>
20 | </div>
21 |         """
22 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode23.py:
--------------------------------------------------------------------------------
 1 | import pandas
 2 | wnid_to_urls = pandas.read_csv('/Users/dtaieb/Downloads/fall11_urls.txt', sep='\t', names=["wnid", "url"],
 3 |                      header=0, error_bad_lines=False, warn_bad_lines=False, encoding="ISO-8859-1")
 4 | wnid_to_urls['wnid'] = wnid_to_urls['wnid'].apply(lambda x: x.split("_")[0])
 5 | wnid_to_urls = wnid_to_urls.dropna()
 6 | 
 7 | wnid_to_words = pandas.read_csv('/Users/dtaieb/Downloads/words.txt', sep='\t', names=["wnid", "description"],
 8 |                      header=0, error_bad_lines=False, warn_bad_lines=False, encoding="ISO-8859-1")
 9 | wnid_to_words = wnid_to_words.dropna()
10 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode24.py:
--------------------------------------------------------------------------------
 1 | def get_url_for_keywords(keywords):
 2 |     results = {}
 3 |     for keyword in keywords:
 4 |         df = wnid_to_words.loc[wnid_to_words['description'] == keyword]
 5 |         row_list = df['wnid'].values.tolist()
 6 |         descriptions = df['description'].values.tolist()
 7 |         if len(row_list) > 0:
 8 |             results[descriptions[0]] = wnid_to_urls.loc[wnid_to_urls['wnid'] == row_list[0]]["url"].values.tolist()
 9 |     return results
10 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode25.py:
--------------------------------------------------------------------------------
 1 | from pixiedust.utils.environment import Environment
 2 | root_dir = ensure_dir_exists(os.path.join(Environment.pixiedustHome, "imageRecoApp")
 3 | image_dir = root_dir
 4 | image_dict = get_url_for_keywords(["apple", "orange", "pear", "banana"])
 5 | with open(os.path.join(image_dir, "retrained_label.txt"), "w") as f_label: 
 6 |     for key in image_dict:
 7 |         f_label.write(key + "\n")
 8 |         path = ensure_dir_exists(os.path.join(image_dir, key))
 9 |         count = 0
10 |         for url in image_dict[key]:
11 |             download_image_into_dir(url, path)
12 |             count += 1
13 |             if count > 500:
14 |                 break;
15 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode26.py:
--------------------------------------------------------------------------------
 1 | def add_jpeg_decoding(model):
 2 |     input_height = get_model_attribute(model, "input_height")
 3 |     input_width = get_model_attribute(model, "input_width")
 4 |     input_depth = get_model_attribute(model, "input_depth")
 5 |     input_mean = get_model_attribute(model, "input_mean", 0)
 6 |     input_std = get_model_attribute(model, "input_std", 255)
 7 |     
 8 |     jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput')
 9 |     decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth)
10 |     decoded_image_as_float = tf.cast(decoded_image, dtype=tf.float32)
11 |     decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)
12 |     resize_shape = tf.stack([input_height, input_width])
13 |     resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32)
14 |     resized_image = tf.image.resize_bilinear(decoded_image_4d,
15 |                                            resize_shape_as_int)
16 |     offset_image = tf.subtract(resized_image, input_mean)
17 |     mul_image = tf.multiply(offset_image, 1.0 / input_std)
18 |     return jpeg_data, mul_image
19 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode27.py:
--------------------------------------------------------------------------------
1 | def run_bottleneck_on_image(sess, image_data, image_data_tensor,decoded_image_tensor, 
2 |                             resized_input_tensor,bottleneck_tensor):
3 |     # First decode the JPEG image, resize it, and rescale the pixel values.
4 |     resized_input_values = sess.run(decoded_image_tensor,{image_data_tensor: image_data})
5 |     # Then run it through the recognition network.
6 |     bottleneck_values = sess.run(bottleneck_tensor,{resized_input_tensor: resized_input_values})
7 |     bottleneck_values = np.squeeze(bottleneck_values)
8 |     return bottleneck_values
9 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode28.py:
--------------------------------------------------------------------------------
 1 | [[TensorGraphApp]]
 2 | return """
 3 | {%if this.custom_graph%}
 4 | <div style="margin-top:10px" pd_refresh>
 5 |     <pd_script>
 6 | self.graph = self.custom_graph if self.graph is not self.custom_graph else self.parent_pixieapp.graph
 7 |     </pd_script>
 8 |     <span style="font-weight:bold">Select a model to display:</span>
 9 |     <select>
10 |         <option {%if this.graph!=this.custom_graph%}selected{%endif%} value="main">MobileNet</option>
11 |         <option {%if this.graph==this.custom_graph%}selected{%endif%} value="custom">Custom</options>
12 |     </select>
13 | {%endif%}
14 | <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{{code}}"></iframe>
15 | """
16 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode29.py:
--------------------------------------------------------------------------------
1 | with tf.name_scope('cross_entropy'):
2 |     cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=ground_truth_input, logits=logits)
3 |     with tf.name_scope('total'):
4 |         cross_entropy_mean = tf.reduce_mean(cross_entropy)
5 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode3.py:
--------------------------------------------------------------------------------
 1 | def input_fn(features, labels, batch_size, train):
 2 |     # Convert the inputs to a Dataset and shuffle.
 3 |     dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels)).shuffle(1000)
 4 |     if train:
 5 |         #repeat only for training
 6 |         dataset = dataset.repeat()
 7 |     # Return the dataset in batch
 8 |     return dataset.batch(batch_size)
 9 | 
10 | def train_input_fn(features, labels, batch_size):
11 |     return input_fn(features, labels, batch_size, train=True)
12 | 
13 | def eval_input_fn(features, labels, batch_size):
14 |     return input_fn(features, labels, batch_size, train=False)
15 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode30.py:
--------------------------------------------------------------------------------
 1 | [[LabelsApp]]
 2 | @PixieApp
 3 | class LabelsApp():
 4 |     def setup(self):
 5 |         …
 6 |     
 7 |     @route()
 8 |     def main_screen(self):
 9 |         return """
10 | {%if this.custom_labels%}
11 | <div style="margin-top:10px" pd_refresh>
12 |     <pd_script>
13 | self.current_labels = self.custom_labels if self.current_labels is not self.custom_labels else self.labels
14 |     </pd_script>
15 |     <span style="font-weight:bold">Select a model to display:</span>
16 |     <select>
17 |         <option {%if this.current_labels!=this.labels%}selected{%endif%} value="main">MobileNet</option>
18 |         <option {%if this.current_labels==this.custom_labels%}selected{%endif%} value="custom">Custom</options>
19 |     </select>
20 | {%endif%}
21 | <div pd_render_onload pd_entity="current_labels">
22 |     <pd_options>
23 |     {
24 |         "table_noschema": "true",
25 |         "handlerId": "tableView",
26 |         "rowCount": "10000",
27 |         "noChartCache": "true"
28 |         
29 |     }
30 |     </pd_options>
31 | </div>
32 |         """
33 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode31.py:
--------------------------------------------------------------------------------
 1 | # classify an image given its url
 2 | def score_image(graph, model, url):
 3 |     # Download the image and build a tensor from its data
 4 |     t = read_tensor_from_image_file(model, download_image(url))
 5 |     
 6 |     def do_score_image(graph, output_layer, labels):        
 7 |         # Retrieve the tensors corresponding to the input and output layers
 8 |         input_tensor = graph.get_tensor_by_name("import/" + input_layer + ":0");
 9 |         output_tensor = graph.get_tensor_by_name( output_layer + ":0");
10 | 
11 |         with tf.Session(graph=graph) as sess:
12 |          	  # Initialize the variables
13 |             sess.run(tf.global_variables_initializer())
14 |             results = sess.run(output_tensor, {input_tensor: t})
15 |         results = np.squeeze(results)
16 |         # select the top 5 candidates and match them to the labels
17 |         top_k = results.argsort()[-5:][::-1]
18 |         return [(labels[i].split(":")[1], results[i]) for i in top_k]
19 |     
20 |     results = {}
21 |     input_layer = get_model_attribute(model, "input_layer", "input")
22 |     labels = load_labels(model)
23 |     results["mobilenet"] = do_score_image(graph, "import/" + get_model_attribute(model, "output_layer"), labels)
24 |     if "custom_graph" in model and "custom_labels" in model:
25 |         with open(model["custom_labels"]) as f:
26 |             labels = [line.rstrip() for line in f.readlines() if line != ""]
27 |             custom_labels = ["{}:{}".format(i, label) for i,label in zip(range(len(labels)), labels)]
28 |         results["custom"] = do_score_image(model["custom_graph"], "final_result", custom_labels)
29 |     return results
30 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode32.py:
--------------------------------------------------------------------------------
 1 | @route(score_url="*")
 2 | @templateArgs
 3 | def do_score_url(self, score_url):
 4 |    scores_dict = score_image(self.graph, self.model, score_url)
 5 |    return """
 6 | {%for model, results in scores_dict.items()%}
 7 | <div style="font-weight:bold">{{model}}</div>
 8 | <ul style="text-align:left">
 9 | {%for label, confidence in results%}
10 | <li><b>{{label}}</b>: {{confidence}}</li>
11 | {%endfor%}
12 | </ul>
13 | {%endfor%}
14 |    """
15 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode4.py:
--------------------------------------------------------------------------------
 1 | from pixiedust.display.app import *
 2 | @PixieApp
 3 | class SimpleClassificationDNN():
 4 |     @route()
 5 |     def main_screen(self):
 6 |         return """
 7 | <h1 style="margin:40px">
 8 |     <center>The classificiation model will be trained on all the numeric columns of the dataset</center>
 9 | </h1>
10 | <style>
11 |     div.outer-wrapper {
12 |         display: table;width:100%;height:300px;
13 |     }
14 |     div.inner-wrapper {
15 |         display: table-cell;vertical-align: middle;height: 100%;width: 100%;
16 |     }
17 | </style>
18 | <div class="outer-wrapper">
19 |     <div class="inner-wrapper">
20 |         <div class="col-sm-3"></div>
21 |         <div class="input-group col-sm-6">
22 |           <select id="cols{{prefix}}" style="width:100%;height:30px" pd_options="predictor=$val(cols{{prefix}})">
23 |               <option value="0">Select a predictor column</option>
24 |               {%for col in this.pixieapp_entity.columns.values.tolist()%}
25 |               <option value="{{col}}">{{col}}</option>
26 |               {%endfor%}
27 |           </select>
28 |         </div>
29 |     </div>
30 | </div>     
31 |         """
32 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode5.py:
--------------------------------------------------------------------------------
 1 | @route(predictor="*")
 2 | @templateArgs
 3 | def prepare_training(self, predictor):
 4 |         #select only numerical columns
 5 |         self.dataset = self.pixieapp_entity.dropna(axis=1).select_dtypes(
 6 |             include=['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
 7 |         )
 8 |         #Compute the number of classed by counting the groups
 9 |         self.num_classes = self.dataset.groupby(predictor).size().shape[0]
10 |         #Create the train and test feature and labels
11 |         self.train_x=self.dataset.sample(frac=0.8)
12 |         self.full_train = self.train_x.copy()
13 |         self.train_y = self.train_x.pop(predictor)
14 |         self.test_x=self.dataset.drop(self.train_x.index)
15 |         self.full_test = self.test_x.copy()
16 |         self.test_y=self.test_x.pop(predictor)
17 |         
18 |         bar_chart_options = {
19 |           "rowCount": "100",
20 |           "keyFields": predictor,
21 |           "handlerId": "barChart",
22 |           "noChartCache": "true"
23 |         }
24 |         
25 |         return """
26 | <div class="container" style="margin-top:20px">
27 |     <div class="row">
28 |         <div class="col-sm-5">
29 |             <h3><center>Train set class distribution</center></h3>
30 |             <div pd_entity="full_train" pd_render_onload>
31 |                 <pd_options>{{bar_chart_options|tojson}}</pd_options>
32 |             </div>
33 |         </div>
34 |         <div class="col-sm-5">
35 |             <h3><center>Test set class distribution</center></h3>
36 |             <div pd_entity="full_test" pd_render_onload>
37 |                 <pd_options>{{bar_chart_options|tojson}}</pd_options>
38 |             </div>
39 |         </div>
40 |     </div>
41 | </div>
42 | 
43 | <div style="text-align:center">
44 |     <button class="btn btn-default" type="submit" pd_options="do_training=true">
45 |         Start Training
46 |     </button>
47 | </div>
48 | """
49 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode6.py:
--------------------------------------------------------------------------------
 1 | @route(do_training="*")
 2 |    @captureOutput
 3 | def do_training_screen(self):
 4 |        self.classifier, self.eval_results = \
 5 |       do_training(
 6 | self.train_x, self.train_y, self.test_x, self.test_y, self.num_classes
 7 |       )
 8 |         return """
 9 | <h2>Training completed successfully</h2>
10 | <table>
11 |     <thead>
12 |         <th>Metric</th>
13 |         <th>Value</th>
14 |     </thead>
15 |     <tbody>
16 | {%for key,value in this.eval_results.items()%}
17 | <tr>
18 |     <td>{{key}}</td>
19 |     <td>{{value}}</td>
20 | </tr>
21 | {%endfor%}
22 |     </tbody>
23 | </table>
24 |         """ 
25 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode7.py:
--------------------------------------------------------------------------------
1 | models = {
2 |     "mobilenet": {
3 |         "base_url":"https://github.com/DTAIEB/Thoughtful-Data-Science/raw/master/chapter%206/Visual%20Recognition/mobilenet_v1_0.50_224",
4 |         "model_file_url": "frozen_graph.pb",
5 |         "label_file": "labels.txt",
6 |         "output_layer": "MobilenetV1/Predictions/Softmax"
7 |     }
8 | }
9 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode8.py:
--------------------------------------------------------------------------------
1 | # helper method for reading attributes from the model metadata
2 | def get_model_attribute(model, key, default_value = None):
3 |     if key not in model:
4 |         if default_value is None:
5 |             raise Exception("Require model attribute {} not found".format(key))
6 |         return default_value
7 |     return model[key]
8 | 


--------------------------------------------------------------------------------
/chapter 6/sampleCode9.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import requests
 3 | # Helper method for resolving url relative to the selected model
 4 | def get_url(model, path):
 5 |     return model["base_url"] + "/" + path
 6 |     
 7 | # Download the serialized model and create a TensorFlow graph
 8 | def load_graph(model):
 9 |     graph = tf.Graph()
10 |     graph_def = tf.GraphDef()
11 |     graph_def.ParseFromString(
12 |         requests.get( get_url( model, model["model_file_url"] ) ).content
13 |     )
14 |     with graph.as_default():
15 |         tf.import_graph_def(graph_def)
16 |     return graph
17 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode1.py:
--------------------------------------------------------------------------------
 1 | from six import iteritems
 2 | import json
 3 | import csv
 4 | from tweepy.streaming import StreamListener
 5 | class RawTweetsListener(StreamListener):
 6 |     def __init__(self):
 7 |         self.buffered_data = []
 8 |         self.counter = 0
 9 | 
10 |     def flush_buffer_if_needed(self):
11 |         "Check the buffer capacity and write to a new file if needed"
12 |         length = len(self.buffered_data)
13 |         if length > 0 and length % 10 == 0:
14 |             with open(os.path.join( output_dir, "tweets{}.csv".format(self.counter)), "w") as fs:
15 |                 self.counter += 1
16 |                 csv_writer = csv.DictWriter( fs, fieldnames = fieldnames)
17 |                 for data in self.buffered_data:
18 |                     csv_writer.writerow(data)
19 |             self.buffered_data = []
20 | 
21 |     def on_data(self, data):
22 |         def transform(key, value):
23 |             return transforms[key](value) if key in transforms else value
24 | 
25 |         self.buffered_data.append(
26 |             {key:transform(key,value) \
27 |                  for key,value in iteritems(json.loads(data)) \
28 |                  if key in fieldnames}
29 |         )
30 |         self.flush_buffer_if_needed()
31 |         return True
32 | 
33 |     def on_error(self, status):
34 |         print("An error occured while receiving streaming data: {}".format(status))
35 |         return False 
36 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode10.py:
--------------------------------------------------------------------------------
1 | parquet_batch_df = spark.sql(
2 | "select * from parquet.`{}`".format(
3 | os.path.join(root_dir, "output_parquet")
4 | )
5 | )
6 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode11.py:
--------------------------------------------------------------------------------
1 | from watson_developer_cloud import NaturalLanguageUnderstandingV1
2 | from watson_developer_cloud.natural_language_understanding_v1 import Features, SentimentOptions, EntitiesOptions
3 | 
4 | nlu = NaturalLanguageUnderstandingV1(
5 |     version='2017-02-27',
6 |     username='XXXX',
7 |     password='XXXX'
8 | )
9 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode12.py:
--------------------------------------------------------------------------------
 1 | [[RawTweetsListener]]
 2 | def enrich(self, data):
 3 |     try:
 4 |         response = nlu.analyze( 
 5 |             text = data['text'],
 6 |             features = Features(
 7 |                 sentiment=SentimentOptions(), 
 8 |                 entities=EntitiesOptions()
 9 |             )
10 |         )
11 |         data["sentiment"] = response["sentiment"]["document"]["label"]
12 |         top_entity = response["entities"][0] if len(response["entities"]) > 0 else None
13 |         data["entity"] = top_entity["text"] if top_entity is not None else ""
14 |         data["entity_type"] = top_entity["type"] if top_entity is not None else ""
15 |         return data
16 |     except Exception as e:
17 |         self.warn("Error from Watson service while enriching data: {}".format(e))
18 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode13.py:
--------------------------------------------------------------------------------
 1 | field_metadata = [
 2 |     {"name": "created_at","type": DateType()},
 3 |     {"name": "text", "type": StringType()},
 4 |     {"name": "source", "type": StringType(), 
 5 |          "transform": lambda s: BS(s, "html.parser").text.strip()
 6 |     },
 7 |     {"name": "sentiment", "type": StringType()},
 8 |     {"name": "entity", "type": StringType()},
 9 |     {"name": "entity_type", "type": StringType()}
10 | ]
11 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode14.py:
--------------------------------------------------------------------------------
 1 | def on_data(self, data):
 2 |         def transform(key, value):
 3 |             return transforms[key](value) if key in transforms else value
 4 |         data = self.enrich(json.loads(data))
 5 |         if data is not None:
 6 |             self.buffered_data.append(
 7 |                 {key:transform(key,value) \
 8 |                      for key,value in iteritems(data) \
 9 |                      if key in fieldnames}
10 |             )
11 |             self.flush_buffer_if_needed()
12 |         return True
13 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode15.py:
--------------------------------------------------------------------------------
 1 | schema = StructType(
 2 |     [StructField(f["name"], f["type"], True) for f in field_metadata]
 3 | )
 4 | csv_sdf = spark.readStream \
 5 |     .csv(
 6 |         output_dir,
 7 |         schema=schema,
 8 |         multiLine = True,
 9 |         dateFormat = 'EEE MMM dd kk:mm:ss Z y',
10 |         ignoreTrailingWhiteSpace = True,
11 |         ignoreLeadingWhiteSpace = True
12 |     )
13 | csv_sdf.printSchema()
14 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode16.py:
--------------------------------------------------------------------------------
1 | def start_stream(queries):
2 |     "Asynchronously start a new Twitter stream"
3 |     stream = Stream(auth, RawTweetsListener())
4 |     stream.filter(track=queries, languages=["en"], async=True)
5 |     return stream
6 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode17.py:
--------------------------------------------------------------------------------
 1 | def start_streaming_dataframe(output_dir):
 2 |     "Start a Spark Streaming DataFrame from a file source"
 3 |     schema = StructType(
 4 |         [StructField(f["name"], f["type"], True) for f in field_metadata]
 5 |     )
 6 |     return spark.readStream \
 7 |         .csv(
 8 |             output_dir,
 9 |             schema=schema,
10 |             multiLine = True,
11 |             timestampFormat = 'EEE MMM dd kk:mm:ss Z yyyy',
12 |             ignoreTrailingWhiteSpace = True,
13 |             ignoreLeadingWhiteSpace = True
14 |         )
15 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode18.py:
--------------------------------------------------------------------------------
 1 | def start_parquet_streaming_query(csv_sdf):
 2 |     """
 3 |     Create and run a streaming query from a Structured DataFrame 
 4 |     outputing the results into a parquet database
 5 |     """
 6 |     streaming_query = csv_sdf \
 7 |       .writeStream \
 8 |       .format("parquet") \
 9 |       .option("path", os.path.join(root_dir, "output_parquet")) \
10 |       .trigger(processingTime="2 seconds") \
11 |       .option("checkpointLocation", os.path.join(root_dir, "output_chkpt")) \
12 |       .start()
13 |     return streaming_query
14 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode19.py:
--------------------------------------------------------------------------------
 1 | class StreamsManager():
 2 |     def __init__(self):
 3 |         self.twitter_stream = None
 4 |         self.csv_sdf = None
 5 |         
 6 |     def reset(self, search_query = None):
 7 |         if self.twitter_stream is not None:
 8 |             self.twitter_stream.disconnect()
 9 |         #stop all the active streaming queries and re_initialize the directories
10 |         for query in spark.streams.active:
11 |             query.stop()
12 |         # initialize the directories
13 |         self.root_dir, self.output_dir = init_output_dirs()
14 |         # start the tweepy stream
15 |         self.twitter_stream = start_stream([search_query]) if search_query is not None else None
16 |         # start the spark streaming stream
17 |         self.csv_sdf = start_streaming_dataframe(output_dir) if search_query is not None else None
18 |         
19 |     def __del__(self):
20 |         # Automatically called when the class is garbage collected
21 |         self.reset()
22 |         
23 | streams_manager = StreamsManager()
24 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode2.py:
--------------------------------------------------------------------------------
 1 | from pyspark.sql.types import StringType, DateType
 2 | from bs4 import BeautifulSoup as BS
 3 | fieldnames = [f["name"] for f in field_metadata]
 4 | transforms = {item['name']:item['transform'] for item in field_metadata if "transform" in item}
 5 | field_metadata = [
 6 |     {"name": "created_at","type": DateType()},
 7 |     {"name": "text", "type": StringType()},
 8 |     {"name": "source", "type": StringType(), 
 9 |          "transform": lambda s: BS(s, "html.parser").text.strip()
10 |     }
11 | ]
12 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode20.py:
--------------------------------------------------------------------------------
 1 | from pixiedust.display.app import *
 2 | @PixieApp
 3 | class TweetInsightApp():    
 4 |     @route()
 5 |     def main_screen(self):
 6 |         return """
 7 | <style>
 8 |     div.outer-wrapper {
 9 |         display: table;width:100%;height:300px;
10 |     }
11 |     div.inner-wrapper {
12 |         display: table-cell;vertical-align: middle;height: 100%;width: 100%;
13 |     }
14 | </style>
15 | <div class="outer-wrapper">
16 |     <div class="inner-wrapper">
17 |         <div class="col-sm-3"></div>
18 |         <div class="input-group col-sm-6">
19 |           <input id="query{{prefix}}" type="text" class="form-control"
20 |               value=""
21 |               placeholder="Enter a search query (e.g. baseball)">
22 |           <span class="input-group-btn">
23 |             <button class="btn btn-default" type="button" pd_options="search_query=$val(query{{prefix}})">
24 |                 Go
25 |             </button>
26 |           </span>
27 |         </div>
28 |     </div>
29 | </div>
30 |         """
31 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode21.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | [[TweetInsightApp]]
 3 | @route(search_query="*")
 4 |     def do_search_query(self, search_query):
 5 |         streams_manager.reset(search_query)
 6 |         start_parquet_streaming_query(streams_manager.csv_sdf)
 7 |         while True:
 8 |             try:
 9 |                 parquet_dir = os.path.join(root_dir, "output_parquet")
10 |                 self.parquet_df = spark.sql("select * from parquet.`{}`".format(parquet_dir))
11 |                 break
12 |             except:
13 |                 time.sleep(5)
14 |         return """
15 | <div class="container">
16 |     <div id="header{{prefix}}" class="row no_loading_msg" pd_refresh_rate="5000" pd_target="header{{prefix}}">
17 |         <pd_script>
18 | print("Number of tweets received: {}".format(streams_manager.twitter_stream.listener.tweet_count))
19 |         </pd_script>
20 |     </div>
21 |     <div class="row" style="min-height:300px">
22 |         <div class="col-sm-5">
23 |             <div id="metric1{{prefix}}" pd_refresh_rate="10000" class="no_loading_msg"
24 |                 pd_options="display_metric1=true" pd_target="metric1{{prefix}}">
25 |             </div>
26 |         </div>
27 |         <div class="col-sm-5">
28 |             <div id="metric2{{prefix}}" pd_refresh_rate="12000" class="no_loading_msg"
29 |                 pd_options="display_metric2=true" pd_target="metric2{{prefix}}">
30 |             </div>
31 |         </div>
32 |     </div>
33 |     
34 |     <div class="row" style="min-height:400px">
35 |         <div class="col-sm-offset-1 col-sm-10">
36 |             <div id="word_cloud{{prefix}}" pd_refresh_rate="20000" class="no_loading_msg"
37 |                 pd_options="display_wc=true" pd_target="word_cloud{{prefix}}">
38 |             </div>
39 |         </div>
40 |     </div>
41 |         """
42 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode22.py:
--------------------------------------------------------------------------------
 1 | [[TweetInsightApp]]
 2 | @route(display_metric1="*")
 3 |     def do_display_metric1(self, display_metric1):
 4 |         parquet_dir = os.path.join(root_dir, "output_parquet")
 5 |         self.parquet_df = spark.sql("select * from parquet.`{}`".format(parquet_dir))
 6 |         return """
 7 | <div class="no_loading_msg" pd_render_onload pd_entity="parquet_df">
 8 |     <pd_options>
 9 |     {
10 |       "legend": "true",
11 |       "keyFields": "sentiment",
12 |       "clusterby": "entity_type",
13 |       "handlerId": "barChart",
14 |       "rendererId": "bokeh",
15 |       "rowCount": "10",
16 |       "sortby": "Values DESC",
17 |       "noChartCache": "true"
18 |     }
19 |     </pd_options>
20 | </div>
21 |         """
22 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode23.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | from wordcloud import WordCloud
 3 | [[TweetInsightApp]]
 4 | @route(display_wc="*")
 5 | @captureOutput
 6 | def do_display_wc(self):
 7 |     text = "\n".join(
 8 |         [r['entity'] for r in self.parquet_df.select("entity").collect() if r['entity'] is not None]
 9 |     )
10 |     plt.figure( figsize=(13,7) )
11 |     plt.axis("off")
12 |     plt.imshow(
13 |         WordCloud(width=750, height=350).generate(text), 
14 |         interpolation='bilinear'
15 |     )
16 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode24.py:
--------------------------------------------------------------------------------
1 | @PixieApp
2 | class StreamingQueriesApp():
3 |     @route()
4 |     def main_screen(self):
5 |         return """
6 | <div class="no_loading_msg" pd_refresh_rate="5000" pd_options="show_progress=true">
7 | </div>
8 |         """
9 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode25.py:
--------------------------------------------------------------------------------
 1 | @route(show_progress="true")
 2 |     def do_show_progress(self):
 3 |         return """
 4 | {%for query in this.spark.streams.active%}
 5 |     <div>
 6 |     <div class="page-header"> 
 7 |         <h1>Progress Report for Spark Stream: {{query.id}}</h1>
 8 |     <div>
 9 |     <table>
10 |         <thead>
11 |           <tr>
12 |              <th>metric</th>
13 |              <th>value</th>
14 |           </tr>
15 |         </thead>
16 |         <tbody>
17 |             {%for key, value in query.lastProgress.items()%}
18 |             <tr>
19 |                 <td>{{key}}</td>
20 |                 <td>{{value}}</td>
21 |             </tr>
22 |             {%endfor%}
23 |         </tbody>        
24 |     </table>
25 | {%endfor%}
26 |         """
27 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode26.py:
--------------------------------------------------------------------------------
 1 | from pixiedust.display.app import *
 2 | from pixiedust.apps.template import TemplateTabbedApp
 3 | 
 4 | @PixieApp
 5 | class TwitterSentimentApp(TemplateTabbedApp):
 6 |     def setup(self):
 7 |         self.apps = [
 8 |             {"title": "Tweets Insights", "app_class": "TweetInsightApp"},
 9 |             {"title": "Streaming Queries", "app_class": "StreamingQueriesApp"}
10 |         ]
11 |         
12 | app = TwitterSentimentApp()
13 | app.run()
14 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode27.py:
--------------------------------------------------------------------------------
 1 | message_hub_creds = {
 2 |   "instance_id": "XXXXX",
 3 |   "mqlight_lookup_url": "https://mqlight-lookup-prod02.messagehub.services.us-south.bluemix.net/Lookup?serviceId=XXXX",
 4 |   "api_key": "XXXX",
 5 |   "kafka_admin_url": "https://kafka-admin-prod02.messagehub.services.us-south.bluemix.net:443",
 6 |   "kafka_rest_url": "https://kafka-rest-prod02.messagehub.services.us-south.bluemix.net:443",
 7 |   "kafka_brokers_sasl": [
 8 |     "kafka03-prod02.messagehub.services.us-south.bluemix.net:9093",
 9 |     "kafka01-prod02.messagehub.services.us-south.bluemix.net:9093",
10 |     "kafka02-prod02.messagehub.services.us-south.bluemix.net:9093",
11 |     "kafka05-prod02.messagehub.services.us-south.bluemix.net:9093",
12 |     "kafka04-prod02.messagehub.services.us-south.bluemix.net:9093"
13 |   ],
14 |   "user": "XXXX",
15 |   "password": "XXXX"
16 | }
17 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode28.py:
--------------------------------------------------------------------------------
 1 | [[RawTweetsListener]]
 2 | context = ssl.create_default_context()
 3 | context.options &= ssl.OP_NO_TLSv1
 4 | context.options &= ssl.OP_NO_TLSv1_1
 5 | kafka_conf = {
 6 |     'sasl_mechanism': 'PLAIN',
 7 |     'security_protocol': 'SASL_SSL',
 8 |     'ssl_context': context,
 9 |     "bootstrap_servers": message_hub_creds["kafka_brokers_sasl"],
10 |     "sasl_plain_username": message_hub_creds["user"],
11 |     "sasl_plain_password": message_hub_creds["password"],
12 |     "api_version":(0, 10, 1),
13 |     "value_serializer" : lambda v: json.dumps(v).encode('utf-8')
14 | }
15 | self.producer = KafkaProducer(**kafka_conf)
16 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode29.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import json
 3 | 
 4 | def ensure_topic_exists(topic_name):
 5 |     response = requests.post(
 6 |                 message_hub_creds["kafka_rest_url"] + "/admin/topics", 
 7 |                 data = json.dumps({"name": topic_name}),
 8 |                 headers={"X-Auth-Token": message_hub_creds["api_key"]}
 9 |             )
10 |     if response.status_code != 200 and response.status_code != 202 and \
11 |        response.status_code != 422 and response.status_code != 403:
12 |         raise Exception(response.json())
13 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode3.py:
--------------------------------------------------------------------------------
 1 | import shutil
 2 | def ensure_dir(dir, delete_tree = False):
 3 |     if not os.path.exists(dir):
 4 |         os.makedirs(dir)
 5 |     elif delete_tree:
 6 |         shutil.rmtree(dir)
 7 |         os.makedirs(dir)
 8 |     return os.path.abspath(dir)
 9 | 
10 | root_dir = ensure_dir("output", delete_tree = True)
11 | output_dir = ensure_dir(os.path.join(root_dir, "raw"))
12 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode30.py:
--------------------------------------------------------------------------------
 1 | [[RawTweetsListener]]
 2 | def on_data(self, data):
 3 |     self.tweet_count += 1
 4 |     self.producer.send(
 5 |         self.topic, 
 6 |         {key:transform(key,value) \
 7 |                 for key,value in iteritems(json.loads(data)) \
 8 |                 if key in fieldnames}
 9 |     )
10 |     return True
11 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode31.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from watson_developer_cloud import NaturalLanguageUnderstandingV1
 3 | from watson_developer_cloud.natural_language_understanding_v1 import Features, SentimentOptions, EntitiesOptions
 4 | 
 5 | # init() function will be called once on pipeline initialization
 6 | # @state a Python dictionary object for keeping state. The state object is passed to the process function
 7 | def init(state):
 8 |     # do something once on pipeline initialization and save in the state object
 9 |     state["nlu"] = NaturalLanguageUnderstandingV1(
10 |         version='2017-02-27',
11 |         username='XXXX’,
12 |         password='XXXX'
13 |     )
14 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode32.py:
--------------------------------------------------------------------------------
 1 | # @event a Python dictionary object representing the input event tuple as defined by the input schema
 2 | # @state a Python dictionary object for keeping state over subsequent function calls
 3 | # return must be a Python dictionary object. It will be the output of this operator.
 4 | # Returning None results in not submitting an output tuple for this invocation.
 5 | # You must declare all output attributes in the Edit Schema window.
 6 | def process(event, state):
 7 |     # Enrich the event, such as by:
 8 |     # event['wordCount'] = len(event['phrase'].split())
 9 |     try:
10 |         event['text'] = event['text'].replace('"', "'")
11 |         response = state["nlu"].analyze( 
12 |             text = event['text'],
13 |             features=Features(sentiment=SentimentOptions(), entities=EntitiesOptions())
14 |         )
15 |         event["sentiment"] = response["sentiment"]["document"]["label"]
16 |         top_entity = response["entities"][0] if len(response["entities"]) > 0 else None
17 |         event["entity"] = top_entity["text"] if top_entity is not None else ""
18 |         event["entity_type"] = top_entity["type"] if top_entity is not None else ""
19 |     except Exception as e:
20 |         return None
21 |     return event
22 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode33.py:
--------------------------------------------------------------------------------
 1 | def start_streaming_dataframe():
 2 |     "Start a Spark Streaming DataFrame from a Kafka Input source"
 3 |     schema = StructType(
 4 |         [StructField(f["name"], f["type"], True) for f in field_metadata]
 5 |     )
 6 |     kafka_options = {
 7 |         "kafka.ssl.protocol":"TLSv1.2",
 8 |         "kafka.ssl.enabled.protocols":"TLSv1.2",
 9 |         "kafka.ssl.endpoint.identification.algorithm":"HTTPS",
10 |         'kafka.sasl.mechanism': 'PLAIN',
11 |         'kafka.security.protocol': 'SASL_SSL'
12 |     }
13 |     return spark.readStream \
14 |         .format("kafka") \
15 |         .option("kafka.bootstrap.servers", ",".join(message_hub_creds["kafka_brokers_sasl"])) \
16 |         .option("subscribe", "enriched_tweets") \
17 |         .load(**kafka_options)
18 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode34.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "language": "python",
 3 |     "env": {
 4 |      "SCALA_HOME": "/Users/dtaieb/pixiedust/bin/scala/scala-2.11.8",
 5 |      "PYTHONPATH": "/Users/dtaieb/pixiedust/bin/spark/spark-2.3.0-bin-hadoop2.7/python/:/Users/dtaieb/pixiedust/bin/spark/spark-2.3.0-bin-hadoop2.7/python/lib/py4j-0.10.6-src.zip",
 6 |      "SPARK_HOME": "/Users/dtaieb/pixiedust/bin/spark/spark-2.3.0-bin-hadoop2.7",
 7 |      "PYSPARK_SUBMIT_ARGS": "--driver-java-options=-Djava.security.auth.login.config=/Users/dtaieb/pixiedust/jaas.conf --jars /Users/dtaieb/pixiedust/bin/cloudant-spark-v2.0.0-185.jar --driver-class-path /Users/dtaieb/pixiedust/data/libs/* --master local[10] --packages org.apache.spark:spark-sql-kafka-0-10_2.11:2.3.0 pyspark-shell",
 8 |      "PIXIEDUST_HOME": "/Users/dtaieb/pixiedust",
 9 |      "SPARK_DRIVER_MEMORY": "10G",
10 |      "SPARK_LOCAL_IP": "127.0.0.1",
11 |      "PYTHONSTARTUP": "/Users/dtaieb/pixiedust/bin/spark/spark-2.3.0-bin-hadoop2.7/python/pyspark/shell.py"
12 |     },
13 |     "display_name": "Python with Pixiedust (Spark 2.3)",
14 |     "argv": [
15 |      "python",
16 |      "-m",
17 |      "ipykernel",
18 |      "-f",
19 |      "{connection_file}"
20 |     ]
21 |    }
22 |    


--------------------------------------------------------------------------------
/chapter 7/sampleCode4.py:
--------------------------------------------------------------------------------
1 | from tweepy import Stream
2 | def start_stream(queries):
3 |     "Asynchronously start a new Twitter stream"
4 |     stream = Stream(auth, RawTweetsListener())
5 |     stream.filter(track=queries, async=True)
6 |     return stream
7 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode5.py:
--------------------------------------------------------------------------------
 1 | schema = StructType(
 2 | [StructField(f["name"], f["type"], True) for f in field_metadata]
 3 | )
 4 | csv_sdf = spark.readStream\
 5 | 	.format("csv")\
 6 | 	.option("schema", schema)\
 7 | 	.option("multiline", True)\
 8 | 	.option("dateFormat", 'EEE MMM dd kk:mm:ss Z y')\
 9 | .option("ignoreTrailingWhiteSpace", True)\
10 | .option("ignoreLeadingWhiteSpace", True)\
11 | 	.load(output_dir)
12 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode6.py:
--------------------------------------------------------------------------------
 1 | csv_sdf = spark.readStream \
 2 |     .csv(
 3 |         output_dir,
 4 |         schema=schema,
 5 |         multiLine = True,
 6 |         dateFormat = 'EEE MMM dd kk:mm:ss Z y',
 7 |         ignoreTrailingWhiteSpace = True,
 8 |         ignoreLeadingWhiteSpace = True
 9 |     )
10 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode7.py:
--------------------------------------------------------------------------------
1 | tweet_streaming_query = csv_sdf \
2 |   .writeStream \
3 |   .format("parquet") \
4 |   .option("path", os.path.join(root_dir, "output_parquet")) \
5 |   .trigger(processingTime="2 seconds") \
6 |   .option("checkpointLocation", os.path.join(root_dir, "output_chkpt")) \
7 |   .start()
8 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode8.py:
--------------------------------------------------------------------------------
1 | tweet_streaming_query = csv_sdf.writeStream\
2 |   .outputMode("append")\
3 |   .format("console")\
4 |   .trigger(processingTime='2 seconds')\
5 |   .start()
6 | 


--------------------------------------------------------------------------------
/chapter 7/sampleCode9.py:
--------------------------------------------------------------------------------
1 | import json
2 | for query in spark.streams.active:
3 |     print("-----------")
4 |     print("id: {}".format(query.id))
5 |     print(json.dumps(query.lastProgress, indent=2, sort_keys=True))
6 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode1.py:
--------------------------------------------------------------------------------
1 | import statsmodels
2 | np.lookfor("acf", module = statsmodels)
3 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode10.py:
--------------------------------------------------------------------------------
1 | import statsmodels.tsa.api as smt
2 | smt.graphics.plot_pacf(msft['Adj. Close'], lags=50)
3 | plt.show()
4 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode11.py:
--------------------------------------------------------------------------------
 1 | @PixieApp
 2 | class StockExplorer():
 3 |     @route()
 4 |     def main_screen(self):
 5 |         return """
 6 | <style>
 7 |     div.outer-wrapper {
 8 |         display: table;width:100%;height:300px;
 9 |     }
10 |     div.inner-wrapper {
11 |         display: table-cell;vertical-align: middle;height: 100%;width: 100%;
12 |     }
13 | </style>
14 | <div class="outer-wrapper">
15 |     <div class="inner-wrapper">
16 |         <div class="col-sm-3"></div>
17 |         <div class="input-group col-sm-6">
18 |           <input id="stocks{{prefix}}" type="text" class="form-control" 
19 |               value="MSFT,AMZN,IBM"
20 |               placeholder="Enter a list of stocks separated by comma e.g MSFT,AMZN,IBM">
21 |           <span class="input-group-btn">
22 |             <button class="btn btn-default" type="button" pd_options="explore=true">
23 |                 <pd_script>
24 | self.select_tickers('$val(stocks{{prefix}})'.split(','))
25 |                 </pd_script>
26 |                 Explore
27 |             </button>
28 |           </span>
29 |         </div>
30 |     </div>
31 | </div>   
32 | """
33 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode12.py:
--------------------------------------------------------------------------------
 1 | [[StockExplorer]]
 2 | def select_tickers(self, tickers):
 3 |         self.tickers = {ticker.strip():{} for ticker in tickers}
 4 |         self.set_active_ticker(tickers[0].strip())
 5 |         
 6 | def set_active_ticker(self, ticker):
 7 |     self.active_ticker = ticker
 8 |     if 'df' not in self.tickers[ticker]:
 9 |         self.tickers[ticker]['df'] = quandl.get('WIKI/{}'.format(ticker))
10 |         self.tickers[ticker]['df']['daily_spread'] = self.tickers[ticker]['df']['Adj. Close'] - self.tickers[ticker]['df']['Adj. Open']
11 |         self.tickers[ticker]['df'] = self.tickers[ticker]['df'].reset_index()
12 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode13.py:
--------------------------------------------------------------------------------
 1 | [[StockExplorer]]
 2 | @route(explore="*")
 3 | @templateArgs
 4 | def stock_explore_screen(self):
 5 |     tabs = [("Explore","StockExploreSubApp"), ("Moving Average", "MovingAverageSubApp"),
 6 |             ("ACF and PACF", "AutoCorrelationSubApp")]
 7 |     return """
 8 | <style>    
 9 |     .btn:active, .btn.active {
10 |         background-color:aliceblue;
11 |     }
12 | </style>
13 | <div class="page-header">
14 |   <h1>Stock Explorer PixieApp</h1>
15 | </div>
16 | <div class="container-fluid">
17 |     <div class="row">
18 |         <div class="btn-group-vertical btn-group-toggle col-sm-2" data-toggle="buttons">
19 |             {%for title, subapp in tabs%}
20 |             <label class="btn btn-secondary {%if loop.first%}active{%endif%}"
21 |                 pd_options="show_analytic={{subapp}}"
22 |                 pd_target="analytic_screen{{prefix}}">
23 |                 <input type="radio" {%if loop.first%}checked{%endif%}> 
24 |                     {{title}}
25 |             </label>
26 |             {%endfor%}
27 |         </div>
28 |         <div id="analytic_screen{{prefix}}" class="col-sm-10">
29 |         </div>
30 |     </div>
31 | """
32 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode14.py:
--------------------------------------------------------------------------------
1 |     @route(show_analytic="*")
2 |     def show_analytic_screen(self, show_analytic):
3 |         return """
4 | <div pd_app="{{show_analytic}}" pd_render_onload></div>
5 | """
6 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode15.py:
--------------------------------------------------------------------------------
 1 | [[BaseSubApp]]
 2 | def add_ticker_selection_markup(refresh_ids):
 3 |     def deco(fn):
 4 |         def wrap(self, *args, **kwargs):
 5 |             return """
 6 | <div class="row" style="text-align:center">
 7 |     <div class="btn-group btn-group-toggle" style="border-bottom:2px solid #eeeeee" data-toggle="buttons">
 8 |         {%for ticker, state in this.parent_pixieapp.tickers.items()%}
 9 |         <label class="btn btn-secondary {%if this.parent_pixieapp.active_ticker == ticker%}active{%endif%}"
10 |             pd_refresh=\"""" + ",".join(refresh_ids) + """\" pd_script="self.parent_pixieapp.set_active_ticker('{{ticker}}')">
11 |             <input type="radio" {%if this.parent_pixieapp.active_ticker == ticker%}checked{%endif%}> 
12 |                 {{ticker}}
13 |         </label>
14 |         {%endfor%}
15 |     </div>
16 | </div>
17 |             """ + fn(self, *args, **kwargs)
18 |         return wrap
19 |     return deco
20 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode16.py:
--------------------------------------------------------------------------------
 1 | @PixieApp
 2 | class StockExploreSubApp(BaseSubApp):
 3 |     @route()
 4 |     @BaseSubApp.add_ticker_selection_markup(['chart{{prefix}}', 'daily_spread{{prefix}}'])
 5 |     def main_screen(self):
 6 |         return """
 7 | <div class="row" style="min-height:300px">
 8 |     <div class="col-xs-6" id="chart{{prefix}}" pd_render_onload pd_options="show_chart=Adj. Close">
 9 |     </div>
10 |     <div class="col-xs-6" id="daily_spread{{prefix}}" pd_render_onload pd_options="show_chart=daily_spread">
11 |     </div>
12 | </div>
13 | """
14 |         
15 |     @route(show_chart="*")
16 |     def show_chart_screen(self, show_chart):
17 |         return """
18 | <div pd_entity="parent_pixieapp.get_active_df()" pd_render_onload>
19 |     <pd_options>
20 |     {
21 |       "handlerId": "lineChart",
22 |       "valueFields": "{{show_chart}}",
23 |       "rendererId": "bokeh",
24 |       "keyFields": "Date",
25 |       "noChartCache": "true",
26 |       "rowCount": "10000"
27 |     }
28 |     </pd_options>
29 | </div>
30 |         """
31 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode17.py:
--------------------------------------------------------------------------------
1 | [[StockExplorer]]
2 | def get_active_df(self):
3 |     return self.tickers[self.active_ticker]['df']
4 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode18.py:
--------------------------------------------------------------------------------
1 | tabs = [("Explore","StockExploreSubApp"), ("Moving Average", "MovingAverageSubApp"),("ACF and PACF", "AutoCorrelationSubApp")]


--------------------------------------------------------------------------------
/chapter 8/sampleCode19.py:
--------------------------------------------------------------------------------
 1 | [[BaseSubApp]] 
 2 | @route(widget="lag_slider")
 3 | def slider_screen(self):
 4 |     return """
 5 | <div>
 6 |     <label class="field">Lag:<span id="slideval{{prefix}}">50</span></label>
 7 |     <i class="fa fa-info-circle" style="color:orange" data-toggle="pd-tooltip" 
 8 |         title="Selected lag used to compute moving average, ACF or PACF"></i>
 9 |     <div id="slider{{prefix}}" name="slider" data-min=30 data-max=300
10 |         data-default=50 style="margin: 0 0.6em;">
11 |     </div>
12 | </div>
13 | <script>
14 | $("[id^=slider][id$={{prefix}}]").each(function() {
15 |     var sliderElt = $(this)
16 |     var min = sliderElt.data("min")
17 |     var max = sliderElt.data("max")
18 |     var val = sliderElt.data("default")
19 |     sliderElt.slider({
20 |         min: isNaN(min) ? 0 : min,
21 |         max: isNaN(max) ? 100 : max,
22 |         value: isNaN(val) ? 50 : val,
23 |         change: function(evt, ui) {
24 |             $("[id=slideval{{prefix}}]").text(ui.value); 
25 |             pixiedust.sendEvent({type:'lagSlider',value:ui.value})
26 |         },
27 |         slide: function(evt, ui) {
28 |             $("[id=slideval{{prefix}}]").text(ui.value);
29 |         }
30 |     });
31 | })
32 | </script>
33 |         """
34 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode2.py:
--------------------------------------------------------------------------------
1 | ar = np.arange(20)
2 | print(ar)
3 | print(ar.reshape(4,5)) 
4 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode20.py:
--------------------------------------------------------------------------------
 1 | @PixieApp
 2 | class MovingAverageSubApp(BaseSubApp):
 3 |     @route()
 4 |     @BaseSubApp.add_ticker_selection_markup(['chart{{prefix}}'])
 5 |     def main_screen(self):
 6 |         return """
 7 | <div class="row" style="min-height:300px">
 8 |     <div class="page-header text-center">
 9 |         <h1>Moving Average for {{this.parent_pixieapp.active_ticker}}</h1>
10 |     </div>
11 |     <div class="col-sm-12" id="chart{{prefix}}" pd_render_onload pd_entity="get_moving_average_df()">
12 |         <pd_options>
13 |         {
14 |           "valueFields": "Adj. Close",
15 |           "keyFields": "x",
16 |           "rendererId": "bokeh",
17 |           "handlerId": "lineChart",
18 |           "rowCount": "10000"
19 |         }
20 |         </pd_options>
21 |     </div>
22 | </div>
23 | <div class="row">
24 |     <div pd_widget="lag_slider">
25 |         <pd_event_handler 
26 |             pd_source="lagSlider"
27 |             pd_script="self.lag = eventInfo['value']"
28 |             pd_refresh="chart{{prefix}}">
29 |         </pd_event_handler>
30 |     </div>
31 | </div>
32 | """
33 |     def get_moving_average_df(self):
34 |         ma = self.parent_pixieapp.get_active_df()['Adj. Close'].rolling(window=self.lag).mean()
35 |         ma_df = pd.DataFrame(ma)
36 |         ma_df["x"] = ma_df.index
37 |         return ma_df
38 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode21.py:
--------------------------------------------------------------------------------
 1 | import statsmodels.tsa.api as smt
 2 | @PixieApp
 3 | class AutoCorrelationSubApp(BaseSubApp):
 4 |     @route()
 5 |     @BaseSubApp.add_ticker_selection_markup(['chart_acf{{prefix}}', 'chart_pacf{{prefix}}'])
 6 |     def main_screen(self):
 7 |         return """
 8 | <div class="row" style="min-height:300px">
 9 |     <div class="col-sm-6">
10 |         <div class="page-header text-center">
11 |             <h1>Auto-correlation Function</h1>
12 |         </div>
13 |         <div id="chart_acf{{prefix}}" pd_render_onload pd_options="show_acf=true">
14 |         </div>
15 |     </div>
16 |     <div class="col-sm-6">
17 |         <div class="page-header text-center">
18 |             <h1>Partial Auto-correlation Function</h1>
19 |         </div>
20 |         <div id="chart_pacf{{prefix}}" pd_render_onload pd_options="show_pacf=true">
21 |         </div>
22 |     </div>
23 | </div> 
24 | 
25 | <div class="row">
26 |     <div pd_widget="lag_slider">
27 |         <pd_event_handler 
28 |             pd_source="lagSlider"
29 |             pd_script="self.lag = eventInfo['value']"
30 |             pd_refresh="chart_acf{{prefix}},chart_pacf{{prefix}}">
31 |         </pd_event_handler>
32 |     </div>
33 | </div>
34 | """
35 |     @route(show_acf='*')
36 |     @captureOutput
37 |     def show_acf_screen(self):
38 |         smt.graphics.plot_acf(self.parent_pixieapp.get_active_df()['Adj. Close'], lags=self.lag)
39 |     
40 |     @route(show_pacf='*')
41 |     @captureOutput
42 |     def show_pacf_screen(self):
43 |         smt.graphics.plot_pacf(self.parent_pixieapp.get_active_df()['Adj. Close'], lags=self.lag)
44 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode22.py:
--------------------------------------------------------------------------------
1 | logmsft = np.log(train_set['Adj. Close'])
2 | logmsft.index = train_set['Date']
3 | logmsft_diff = pd.DataFrame(logmsft - logmsft.shift()).reset_index()
4 | logmsft_diff.dropna(inplace=True)
5 | display(logmsft_diff)
6 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode23.py:
--------------------------------------------------------------------------------
 1 | from statsmodels.tsa.stattools import adfuller
 2 | import pprint
 3 | 
 4 | ad_fuller_results = adfuller(
 5 | logmsft_diff['Adj. Close'], autolag = 'AIC', regression = 'c'
 6 | )
 7 | labels = ['Test Statistic','p-value','#Lags Used','Number of Observations Used']
 8 | pp = pprint.PrettyPrinter(indent=4)
 9 | pp.pprint({labels[i]: ad_fuller_results[i] for i in range(4)})
10 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode24.json:
--------------------------------------------------------------------------------
1 | {   
2 |     'Number of lags used': 3,
3 |     'Number of Observations Used': 8057,
4 |     'Test statistic': -48.071592138591136,
5 |     'MacKinnon’s approximate p-value': 0.0
6 | }
7 |     
8 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode25.py:
--------------------------------------------------------------------------------
1 | import statsmodels.tsa.api as smt
2 | smt.graphics.plot_acf(logmsft_diff['Adj. Close'], lags=100)
3 | plt.show()
4 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode26.py:
--------------------------------------------------------------------------------
1 | smt.graphics.plot_pacf(logmsft_diff['Adj. Close'], lags=100)
2 | plt.show()
3 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode27.py:
--------------------------------------------------------------------------------
 1 | from statsmodels.tsa.arima_model import ARIMA
 2 | 
 3 | import warnings
 4 | with warnings.catch_warnings():
 5 |     warnings.simplefilter("ignore")
 6 |     arima_model_class = ARIMA(train_set['Adj. Close'], dates=train_set['Date'], order=(1,1,1))
 7 |     arima_model = arima_model_class.fit(disp=0)
 8 | 
 9 |     print(arima_model.resid.describe())
10 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode28.py:
--------------------------------------------------------------------------------
 1 | def plot_predict(model, dates_series, num_observations):
 2 |     fig = plt.figure(figsize = (12,5))
 3 |     model.plot_predict(
 4 |         start = str(dates_series[len(dates_series)-num_observations]), 
 5 |         end = str(dates_series[len(dates_series)-1])
 6 |     )
 7 |     plt.show()
 8 |     
 9 | plot_predict(arima_model, train_set['Date'], 100)
10 | plot_predict(arima_model, train_set['Date'], 10) 
11 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode29.py:
--------------------------------------------------------------------------------
 1 | def compute_test_set_predictions(train_set, test_set):
 2 |     with warnings.catch_warnings():
 3 |         warnings.simplefilter("ignore")
 4 |         history = train_set['Adj. Close'].values
 5 |         forecast = np.array([])
 6 |         for t in range(len(test_set)):
 7 |             prediction = ARIMA(history, order=(1,1,0)).fit(disp=0).forecast()
 8 |             history = np.append(history, test_set['Adj. Close'].iloc[t])
 9 |             forecast = np.append(forecast, prediction[0])
10 |         return pd.DataFrame(
11 |           {"forecast": forecast,
12 |            "test": test_set['Adj. Close'],
13 |            "Date": pd.date_range(start=test_set['Date'].iloc[len(test_set)-1], periods = len(test_set))
14 |           }
15 |         )
16 |         
17 | results = compute_test_set_predictions(train_set, test_set)
18 | display(results)
19 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode3.py:
--------------------------------------------------------------------------------
1 | sample = np.arange(10)
2 | print("Sample:", sample)
3 | print("Access by index: ", sample[2])
4 | print("First 5 elements: ", sample[:5])
5 | print("From 8 to the end: ", sample[8:])
6 | print("Last 3 elements: ", sample[-3:])
7 | print("Every 2 elements: ", sample[::2])
8 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode30.py:
--------------------------------------------------------------------------------
1 | from sklearn.metrics import mean_squared_error
2 | def compute_mean_squared_error(test_series, forecast_series):
3 |     return mean_squared_error(test_series, forecast_series)
4 | 
5 | print('Mean Squared Error: {}'.format( 
6 | compute_mean_squared_error( test_set['Adj. Close'], results.forecast)) 
7 | )
8 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode31.py:
--------------------------------------------------------------------------------
1 | [[StockExplorer]]
2 | @route(explore="*")
3 | @templateArgs
4 | def stock_explore_screen(self):
5 |    tabs = [("Explore","StockExploreSubApp"), ("Moving Average", "MovingAverageSubApp"),
6 |                 ("ACF and PACF", "AutoCorrelationSubApp"), ("Forecast with ARIMA", "ForecastArimaSubApp")] 
7 |    …
8 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode32.py:
--------------------------------------------------------------------------------
 1 | from statsmodels.tsa.arima_model import ARIMA
 2 | 
 3 | @PixieApp
 4 | class ForecastArimaSubApp(BaseSubApp):
 5 |     def setup(self):
 6 |         self.entity_dataframe = self.parent_pixieapp.get_active_df().copy()
 7 |         self.differencing = False
 8 |         
 9 |     def set_active_ticker(self, ticker):
10 |         BaseSubApp.set_active_ticker(self, ticker)
11 |         self.setup()
12 | 
13 |     @route()
14 |     @BaseSubApp.add_ticker_selection_markup([])
15 |     def main_screen(self):
16 |         return """
17 | <div class="page-header text-center">
18 |     <h2>1. Data Exploration to test for Stationarity
19 |         <button class="btn btn-default" pd_script="self.toggle_differencing()" pd_refresh>
20 |             {%if this.differencing%}Remove differencing{%else%}Add differencing{%endif%}
21 |         </button>
22 |         <button class="btn btn-default" pd_options="do_forecast=true">
23 |             Continue to Forecast
24 |         </button>
25 |     </h2>
26 | </div>
27 | 
28 | <div class="row" style="min-height:300px">
29 |     <div class="col-sm-10" id="chart{{prefix}}" pd_render_onload pd_options="show_chart=Adj. Close">
30 |     </div>
31 | </div>
32 | 
33 | <div class="row" style="min-height:300px">
34 |     <div class="col-sm-6">
35 |         <div class="page-header text-center">
36 |             <h3>Auto-correlation Function</h3>
37 |         </div>
38 |         <div id="chart_acf{{prefix}}" pd_render_onload pd_options="show_acf=true">
39 |         </div>
40 |     </div>
41 |     <div class="col-sm-6">
42 |         <div class="page-header text-center">
43 |             <h3>Partial Auto-correlation Function</h3>
44 |         </div>
45 |         <div id="chart_pacf{{prefix}}" pd_render_onload pd_options="show_pacf=true">
46 |         </div>
47 |     </div>
48 | </div>
49 |         """
50 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode33.py:
--------------------------------------------------------------------------------
 1 | [[BaseSubApp]]
 2 | def add_ticker_selection_markup(refresh_ids):
 3 |         def deco(fn):
 4 |             def wrap(self, *args, **kwargs):
 5 |                 return """
 6 | <div class="row" style="text-align:center">
 7 |     <div class="btn-group btn-group-toggle" style="border-bottom:2px solid #eeeeee" data-toggle="buttons">
 8 |         {%for ticker, state in this.parent_pixieapp.tickers.items()%}
 9 |         <label class="btn btn-secondary {%if this.parent_pixieapp.active_ticker == ticker%}active{%endif%}"
10 |             pd_refresh=\"""" + ",".join(refresh_ids) + """\" pd_script="self.set_active_ticker('{{ticker}}')">
11 |             <input type="radio" {%if this.parent_pixieapp.active_ticker == ticker%}checked{%endif%}> 
12 |                 {{ticker}}
13 |         </label>
14 |         {%endfor%}
15 |     </div>
16 | </div>
17 |                 """ + fn(self, *args, **kwargs)
18 |             return wrap
19 |         return deco
20 |     
21 |     def set_active_ticker(self, ticker):
22 |         self.parent_pixieapp.set_active_ticker(ticker)
23 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode34.py:
--------------------------------------------------------------------------------
1 | [[ForecastArimaSubApp]]
2 | def set_active_ticker(self, ticker):
3 |         BaseSubApp.set_active_ticker(self, ticker)
4 |         self.setup()
5 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode35.py:
--------------------------------------------------------------------------------
 1 | @route()
 2 |     @BaseSubApp.add_ticker_selection_markup([])
 3 |     def main_screen(self):
 4 |         return """
 5 | <div class="page-header text-center">
 6 |   <h2>1. Data Exploration to test for Stationarity
 7 |     <button class="btn btn-default" pd_script="self.toggle_differencing()" pd_refresh>
 8 |     {%if this.differencing%}Remove differencing{%else%}Add differencing{%endif%}
 9 |     </button>
10 |     <button class="btn btn-default" pd_options="do_forecast=true">
11 |         Continue to Forecast
12 |     </button>
13 |   </h2>
14 | </div>
15 | 
16 | <div class="row" style="min-height:300px">
17 |   <div class="col-sm-10" id="chart{{prefix}}" pd_render_onload pd_options="show_chart=Adj. Close">
18 |   </div>
19 | </div>
20 | 
21 | <div class="row" style="min-height:300px">
22 |     <div class="col-sm-6">
23 |         <div class="page-header text-center">
24 |             <h3>Auto-correlation Function</h3>
25 |         </div>
26 |         <div id="chart_acf{{prefix}}" pd_render_onload pd_options="show_acf=true">
27 |         </div>
28 |     </div>
29 |     <div class="col-sm-6">
30 |       <div class="page-header text-center">
31 |          <h3>Partial Auto-correlation Function</h3>
32 |       </div>
33 |       <div id="chart_pacf{{prefix}}" pd_render_onload pd_options="show_pacf=true">
34 |       </div>
35 |     </div>
36 | </div>
37 |         """
38 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode36.py:
--------------------------------------------------------------------------------
 1 | def toggle_differencing(self):
 2 |    if self.differencing:
 3 |        self.entity_dataframe = self.parent_pixieapp.get_active_df().copy()
 4 |        self.differencing = False
 5 |    else:
 6 |        log_df = np.log(self.entity_dataframe['Adj. Close'])
 7 |        log_df.index = self.entity_dataframe['Date']
 8 |        self.entity_dataframe = pd.DataFrame(log_df - log_df.shift()).reset_index()
 9 |        self.entity_dataframe.dropna(inplace=True)
10 |        self.differencing = True
11 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode37.py:
--------------------------------------------------------------------------------
 1 | @route(show_acf='*')
 2 | @captureOutput
 3 | def show_acf_screen(self):
 4 |     smt.graphics.plot_acf(self.entity_dataframe['Adj. Close'], lags=50)
 5 | 
 6 | @route(show_pacf='*')
 7 | @captureOutput
 8 | def show_pacf_screen(self):
 9 |     smt.graphics.plot_pacf(self.entity_dataframe['Adj. Close'], lags=50)
10 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode38.py:
--------------------------------------------------------------------------------
 1 | @route(do_forecast="true")
 2 |     @BaseSubApp.add_ticker_selection_markup([])
 3 |     def do_forecast_screen(self):
 4 |         return """
 5 | <div class="page-header text-center">
 6 |     <h2>2. Build Arima model
 7 |         <button class="btn btn-default" pd_options="do_diagnose=true">
 8 |             Diagnose Model
 9 |         </button>
10 |     </h2>
11 | </div>
12 | <div class="row" id="forecast{{prefix}}">
13 |     <div style="font-weight:bold">Enter the p,d,q order for the ARIMA model you want to build</div>
14 | 
15 |     <div class="form-group" style="margin-left: 20px">
16 |         <label class="control-label">Enter the p order for the AR model:</label>
17 |         <input type="text" class="form-control" id="p_order{{prefix}}" value="1" style="width: 100px;margin-left:10px">
18 | 
19 |         <label class="control-label">Enter the d order for the Integrated step:</label>
20 |         <input type="text" class="form-control" id="d_order{{prefix}}" value="1" style="width: 100px;margin-left:10px">
21 | 
22 |         <label class="control-label">Enter the q order for the MA model:</label>
23 |         <input type="text" class="form-control" id="q_order{{prefix}}" value="1" style="width: 100px;margin-left:10px">
24 |     </div>
25 | 
26 |     <center>
27 |         <button class="btn btn-default" pd_target="forecast{{prefix}}"
28 |             pd_options="p_order=$val(p_order{{prefix}});d_order=$val(p_order{{prefix}});q_order=$val(p_order{{prefix}})">
29 |         Go
30 |         </button>
31 |     </center>
32 | </div>
33 | """
34 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode39.py:
--------------------------------------------------------------------------------
1 |     @route(plot_predict="true")
2 |     @captureOutput
3 |     def plot_predict(self):
4 |         plot_predict(self.arima_model, self.train_set['Date'], 100)
5 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "databases": [{
 3 |            "id": 231,
 4 |            "name": "Deutsche Bundesbank Data Repository",
 5 |            "database_code": "BUNDESBANK",
 6 |            "description": "Data on the German economy, …",
 7 |            "datasets_count": 49358,
 8 |            "downloads": 43209922,
 9 |            "premium": false,
10 |            "image": "https://quandl--upload.s3.amazonaws/...thumb_bundesbank.png",
11 |            "favorite": false,
12 |            "url_name": "Deutsche-Bundesbank-Data-Repository"
13 |          },…
14 |   ], 
15 |     "meta": {
16 |       "query": "",
17 |       "per_page": 100,
18 |       "current_page": 1,
19 |       "prev_page": null,
20 |       "total_pages": 3,
21 |       "total_count": 274,
22 |       "next_page": 2,
23 |       "current_first_item": 1,
24 |       "current_last_item": 100
25 |     }
26 |   }
27 |   


--------------------------------------------------------------------------------
/chapter 8/sampleCode40.py:
--------------------------------------------------------------------------------
 1 | @route(p_order="*",d_order="*",q_order="*")
 2 | def build_arima_model_screen(self, p_order, d_order, q_order):
 3 |     #Build the arima model
 4 |     self.train_set = self.parent_pixieapp.get_active_df()[:-14]
 5 |     self.test_set = self.parent_pixieapp.get_active_df()[-14:]
 6 |     self.arima_model = ARIMA(
 7 |         self.train_set['Adj. Close'], dates=self.train_set['Date'], 
 8 |         order=(int(p_order),int(d_order),int(q_order))
 9 |     ).fit(disp=0)
10 |     self.residuals = self.arima_model.resid.describe().to_frame().reset_index()
11 |     return """
12 | <div class="page-header text-center">
13 |     <h3>ARIMA Model succesfully created</h3>
14 | <div>
15 | <div class="row">
16 |     <div class="col-sm-10 col-sm-offset-3">
17 |         <div pd_render_onload pd_options="plot_predict=true">
18 |         </div>
19 |         <h3>Predicted values against the train set</h3>
20 |     </div>
21 | </div>
22 | <div class="row">
23 |     <div pd_render_onload pd_entity="residuals">
24 |         <pd_options>
25 |         {
26 |           "handlerId": "tableView",
27 |           "table_noschema": "true",
28 |           "table_nosearch": "true",
29 |           "table_nocount": "true"
30 |         }
31 |         </pd_options>
32 |     </div>
33 |     <h3><center>Residual errors statistics</center></h3> 
34 | <div>
35 |         """
36 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode41.py:
--------------------------------------------------------------------------------
 1 |     def compute_test_set_predictions(self):
 2 |         return compute_test_set_predictions(self.train_set, self.test_set)
 3 | 
 4 |     @route(do_diagnose="true")
 5 |     @BaseSubApp.add_ticker_selection_markup([])
 6 |     def do_diagnose_screen(self):
 7 |         return """
 8 | <div class="page-header text-center"><h2>3. Diagnose the model against the test set</h2></div>
 9 | <div class="row">
10 |     <div class="col-sm-10 center" pd_render_onload pd_entity="compute_test_set_predictions()">
11 |         <pd_options>
12 |         {
13 |           "keyFields": "Date",
14 |           "valueFields": "forecast,test",
15 |           "handlerId": "lineChart",
16 |           "rendererId": "bokeh",
17 |           "noChartCache": "true"          
18 |         }
19 |         </pd_options>
20 |     </div>
21 | </div>
22 | """
23 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode5.py:
--------------------------------------------------------------------------------
1 | import requests
2 | databases = []
3 | page = 1
4 | while(page is not None):
5 |     payload = requests.get("https://www.quandl.com/api/v3/databases?api_key={}&page={}"\
6 |                     .format(quandl.ApiConfig.api_key, page)).json()
7 |     databases += payload['databases']
8 |     page = payload['meta']['next_page']
9 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode6.py:
--------------------------------------------------------------------------------
1 | codes = pixiedust.sampleData( "https://www.quandl.com/api/v3/databases/WIKI/codes?api_key=" + quandl.ApiConfig.api_key)
2 | display(codes)
3 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode7.py:
--------------------------------------------------------------------------------
1 | msft = quandl.get('WIKI/MSFT')
2 | msft['daily_spread'] = msft['Adj. Close'].diff()
3 | msft = msft.reset_index()
4 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode8.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | tail = msft[msft['Date'] > '2016-05-16']
3 | investment = np.cumsum((10000 / tail['Adj. Close'].values[0]) * tail['daily_spread']) + 10000
4 | investment = investment.astype(int)
5 | investment.index = tail['Date']
6 | investment = investment.resample('M').mean()
7 | investment = pd.DataFrame(investment).reset_index()
8 | display(investment)
9 | 


--------------------------------------------------------------------------------
/chapter 8/sampleCode9.py:
--------------------------------------------------------------------------------
1 | smt.graphics.plot_acf(np.cos(np.linspace(0, 1000, 100)), lags=50)
2 | plt.show()
3 | 


--------------------------------------------------------------------------------
/chapter 9/USFlightsAnalysis/airlines.csv:
--------------------------------------------------------------------------------
 1 | IATA_CODE,AIRLINE
 2 | UA,United Air Lines Inc.
 3 | AA,American Airlines Inc.
 4 | US,US Airways Inc.
 5 | F9,Frontier Airlines Inc.
 6 | B6,JetBlue Airways
 7 | OO,Skywest Airlines Inc.
 8 | AS,Alaska Airlines Inc.
 9 | NK,Spirit Air Lines
10 | WN,Southwest Airlines Co.
11 | DL,Delta Air Lines Inc.
12 | EV,Atlantic Southeast Airlines
13 | HA,Hawaiian Airlines Inc.
14 | MQ,American Eagle Airlines Inc.
15 | VX,Virgin America
16 | 


--------------------------------------------------------------------------------
/chapter 9/USFlightsAnalysis/flights.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DTAIEB/Thoughtful-Data-Science/8b80e8f3e33b6fdc6672ecee1f27e0b983b28241/chapter 9/USFlightsAnalysis/flights.zip


--------------------------------------------------------------------------------
/chapter 9/sampleCode1.py:
--------------------------------------------------------------------------------
1 | G = nx.DiGraph()
2 | G.add_nodes_from(['A', 'B', 'C', 'D', 'E'])
3 | G.add_edge('A', 'B')
4 | G.add_edge('B', 'B')
5 | G.add_edges_from([('A', 'E'),('A', 'D'),('B', 'C'),('C', 'E'),('D', 'C')])
6 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode10.py:
--------------------------------------------------------------------------------
1 | import matplotlib.cm as cm
2 | fig = plt.figure(figsize = (12,12))
3 | nx.draw(flight_graph, arrows=True, with_labels=True, width = 0.5,style="dotted",
4 |         node_color=range(len(flight_graph)), cmap=cm.get_cmap(name="cool"),
5 |         edge_color=range(len(flight_graph.edges)), edge_cmap=cm.get_cmap(name="spring"),
6 |         pos = nx.random_layout(flight_graph)
7 |        )
8 | plt.show()
9 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode11.py:
--------------------------------------------------------------------------------
1 | degree_df = pd.DataFrame([{"IATA_CODE":k, "DEGREE":v} for k,v in flight_graph.degree], columns=["IATA_CODE", "DEGREE"])
2 | airports_centrality = pd.merge(airports, degree_df, on='IATA_CODE')
3 | airports_centrality
4 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode12.py:
--------------------------------------------------------------------------------
 1 | from six import iteritems
 2 | def compute_centrality(g, centrality_df, compute_fn, col_name, *args, **kwargs): 
 3 |     # create a temporary DataFrame that contains the computed centrality values
 4 |     temp_df = pd.DataFrame(
 5 |         [{"IATA_CODE":k, col_name:v} for k,v in iteritems(compute_fn(g, *args, **kwargs))], 
 6 |         columns=["IATA_CODE", col_name]
 7 |     )
 8 |     # make sure to remove the col_name from the centrality_df is already there
 9 |     if col_name in centrality_df.columns:
10 |         centrality_df.drop([col_name], axis=1, inplace=True)
11 |     # merge the 2 DataFrame on the IATA_CODE column
12 |     centrality_df = pd.merge(centrality_df, temp_df, on='IATA_CODE')
13 |     return centrality_df
14 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode13.py:
--------------------------------------------------------------------------------
1 | airports_centrality = compute_centrality(flight_graph, airports_centrality, nx.pagerank, "PAGE_RANK")
2 | airports_centrality = compute_centrality(flight_graph, airports_centrality, nx.closeness_centrality, "CLOSENESS")
3 | airports_centrality = compute_centrality(
4 |     flight_graph, airports_centrality, nx.betweenness_centrality, "BETWEENNESS", k=len(flight_graph))
5 | airports_centrality
6 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode14.py:
--------------------------------------------------------------------------------
1 | for col_name in ["DEGREE", "PAGE_RANK", "CLOSENESS", "BETWEENNESS"]:
2 |     print("{} : {}".format(
3 |         col_name, 
4 |         airports_centrality.nlargest(10, col_name)["IATA_CODE"].values)
5 |     )
6 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode15.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.cm as cm
 2 | def visualize_neighbors(parent_node):
 3 |     fig = plt.figure(figsize = (12,12))
 4 |     # Create a subgraph and add an edge from the parent node to all its neighbors
 5 |     graph = nx.DiGraph()
 6 |     for neighbor in flight_graph.neighbors(parent_node):
 7 |         graph.add_edge(parent_node, neighbor)
 8 |     # draw the subgraph
 9 |     nx.draw(graph, arrows=True, with_labels=True, width = 0.5,style="dotted",
10 |             node_color=range(len(graph)), cmap=cm.get_cmap(name="cool"),
11 |             edge_color=range(len(graph.edges)), edge_cmap=cm.get_cmap(name="spring"),
12 |            )
13 |     plt.show()
14 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode16.py:
--------------------------------------------------------------------------------
 1 | # use a cache so we don't recompute the weight for the same airport every time
 2 | cache = {}
 3 | def compute_weight(centrality_indice_col):
 4 |     # wrapper function that conform to the dijkstra weight argument
 5 |     def wrapper(source, target, attribute):
 6 |         # try the cache first and compute the weight if not there
 7 |         source_weight = cache.get(source, None)
 8 |         if source_weight is None:
 9 |             # look up the airports_centrality for the value
10 |             source_weight = airports_centrality.loc[airports_centrality["IATA_CODE"] == source][centrality_indice_col].values[0]
11 |             cache[source] = source_weight
12 |         target_weight = cache.get(target, None)
13 |         if target_weight is None:
14 |             target_weight = airports_centrality.loc[airports_centrality["IATA_CODE"] == target][centrality_indice_col].values[0]
15 |             cache[target] = target_weight
16 |         # Return weight is inversely proportional to the computed weighted since
17 |         # the Dijkstra algorithm give precedence to shorter distances
18 |         return float(1/source_weight) + float(1/target_weight)
19 |     return wrapper
20 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode17.py:
--------------------------------------------------------------------------------
1 | for col_name in ["DEGREE", "PAGE_RANK", "CLOSENESS"]:
2 |     #clear the cache
3 |     cache.clear()
4 |     print("{} : {}".format(
5 |         col_name,
6 |         nx.dijkstra_path(flight_graph, "BOS", "PSC", weight=compute_weight(col_name))
7 |     ))
8 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode18.py:
--------------------------------------------------------------------------------
 1 | [[USFlightsAnalysis]]
 2 | from pixiedust.display.app import *
 3 | from pixiedust.apps.mapboxBase import MapboxBase
 4 | from collections import OrderedDict
 5 | 
 6 | @PixieApp
 7 | class USFlightsAnalysis(MapboxBase):
 8 |     …
 9 |     @route()
10 |     def main_screen(self):
11 |         return """
12 | <style>
13 |     div.outer-wrapper {
14 |         display: table;width:100%;height:300px;
15 |     }
16 |     div.inner-wrapper {
17 |         display: table-cell;vertical-align: middle;height: 100%;width: 100%;
18 |     }
19 | </style>
20 | <div class="outer-wrapper">
21 |     <div class="inner-wrapper">
22 |         <div class="col-sm-6">
23 |             <div class="rendererOpt" style="font-weight:bold">
24 | Select origin airport:
25 |             </div>
26 |             <div>
27 |                 <select id="origin_airport{{prefix}}" pd_refresh="origin_graph{{prefix}}">
28 |                     <option value="" selected></option>
29 |                     {%for code, airport in this.get_airports() %}
30 |                     <option value="{{code}}">{{code}} - {{airport}}</option>
31 |                     {%endfor%}
32 |                 </select>
33 |             </div>
34 |             <div id="origin_graph{{prefix}}" pd_options="visualize_graph=$val(origin_airport{{prefix}})"></div>
35 |         </div>
36 |         <div class="input-group col-sm-6">
37 |             <div class="rendererOpt" style="font-weight:bold">
38 | Select destination airport:
39 |   </div>
40 |             <div>
41 |                 <select id="destination_airport{{prefix}}" pd_refresh="destination_graph{{prefix}}">
42 |                     <option value="" selected></option>
43 |                     {%for code, airport in this.get_airports() %}
44 |                     <option value="{{code}}">{{code}} - {{airport}}</option>
45 |                     {%endfor%}
46 |                 </select>
47 |             </div>
48 |             <div id="destination_graph{{prefix}}" 
49 |                 pd_options="visualize_graph=$val(destination_airport{{prefix}})">
50 |             </div>
51 |         </div>
52 |     </div>
53 | </div>
54 | <div style="text-align:center">
55 |     <button class="btn btn-default" type="button" 
56 |         pd_options="org_airport=$val(origin_airport{{prefix}});dest_airport=$val(destination_airport{{prefix}})">
57 |         <pd_script type="preRun">
58 |             if ($("#origin_airport{{prefix}}").val() == "" || $("#destination_airport{{prefix}}").val() == ""){
59 |                 alert("Please select an origin and destination airport");
60 |                 return false;
61 |             }
62 |             return true;
63 |         </pd_script>
64 |         Analyze
65 |     </button>
66 | </div>
67 | """
68 | 
69 | def get_airports(self):
70 |    return [tuple(l) for l in airports_centrality[["IATA_CODE", "AIRPORT"]].values.tolist()]
71 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode19.py:
--------------------------------------------------------------------------------
1 | [[USFlightsAnalysis]]
2 | @route(visualize_graph="*")
3 | @captureOutput
4 | def visualize_graph_screen(self, visualize_graph):
5 |     visualize_neighbors(visualize_graph, (5,5))
6 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode2.py:
--------------------------------------------------------------------------------
1 | %matplotlib inline
2 | import matplotlib.pyplot as plt
3 | nx.draw(G_complete, with_labels=True)
4 | plt.show()
5 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode20.html:
--------------------------------------------------------------------------------
 1 | <button class="btn btn-default" type="button" pd_options="org_airport=$val(origin_airport{{prefix}});dest_airport=$val(destination_airport{{prefix}})">
 2 |    <pd_script type="preRun">
 3 |        if ($("#origin_airport{{prefix}}").val() == "" || $("#destination_airport{{prefix}}").val() == ""){
 4 |            alert("Please select an origin and destination airport");
 5 |            return false;
 6 |        }
 7 |        return true;
 8 |    </pd_script>
 9 |       Analyze
10 |    </button>
11 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode21.py:
--------------------------------------------------------------------------------
 1 | [[USFlightsAnalysis]]
 2 | @route(org_airport="*", dest_airport="*")
 3 | def compute_path_screen(self, org_airport, dest_airport):
 4 |     return """
 5 | <div class="container-fluid">
 6 |     <div class="form-group col-sm-2" style="padding-right:10px;">
 7 |         <div><strong>Centrality Indices</strong></div>
 8 |         {% for centrality in this.centrality_indices.keys() %}
 9 |         <div class="rendererOpt checkbox checkbox-primary">
10 |             <input type="checkbox" pd_refresh="flight_map{{prefix}}" 
11 |                 pd_script="self.compute_toggle_centrality_layer('{{org_airport}}', '{{dest_airport}}', '{{centrality}}')">
12 |             <label>{{centrality}}</label>
13 |         </div>      
14 |         {%endfor%}
15 |     </div>
16 |     <div class="form-group col-sm-10">
17 |         <h1 class="rendererOpt">Select a centrality index to show the shortest flight path
18 |         </h1>
19 |         <div id="flight_map{{prefix}}" pd_entity="self.airports_centrality" pd_render_onload>
20 |             <pd_options>
21 |             {
22 |               "keyFields": "LATITUDE,LONGITUDE",
23 |               "valueFields": "AIRPORT,DEGREE,PAGE_RANK,ELAPSED_TIME,CLOSENESS",
24 |               "custombasecolorsecondary": "#fffb00",
25 |               "colorrampname": "Light to Dark Red",
26 |               "handlerId": "mapView",
27 |               "quantiles": "0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0",
28 |               "kind": "choropleth",
29 |               "rowCount": "1000",
30 |               "numbins": "5",
31 |               "mapboxtoken": "pk.eyJ1IjoibWFwYm94IiwiYSI6ImNpejY4M29iazA2Z2gycXA4N2pmbDZmangifQ.-g_vE53SD2WrJ6tFX7QHmA",
32 |               "custombasecolor": "#ffffff"
33 |             }
34 |             </pd_options>
35 |         </div>
36 |     </div>
37 | </div>
38 | """
39 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode22.py:
--------------------------------------------------------------------------------
1 | [[USFlightsAnalysis]]
2 | def setup(self):
3 |    self.centrality_indices = OrderedDict([
4 |       ("ELAPSED_TIME","rgba(256,0,0,0.65)"), 
5 |       ("DEGREE", "rgba(0,256,0,0.65)"), 
6 |       ("PAGE_RANK", "rgba(0,0,256,0.65)"),
7 |       ("CLOSENESS", "rgba(128,0,128,0.65)")
8 |   ])
9 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode23.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "geometry": {
 3 |         "type": "LineString", 
 4 |         "coordinates": [
 5 |             [-93.21692, 44.88055],
 6 |             [-119.11903000000001, 46.26468]        
 7 |         ]
 8 |     }, 
 9 |     "type": "Feature", 
10 |     "properties": {}
11 | }
12 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode24.py:
--------------------------------------------------------------------------------
 1 | [[USFlightsAnalysis]]
 2 | def compute_toggle_centrality_layer(self, org_airport, dest_airport, centrality):
 3 |     cache.clear()
 4 |     cities = nx.dijkstra_path(flight_graph, org_airport, dest_airport, weight=compute_weight(centrality))
 5 |     layer_index = self.get_layer_index(centrality, {
 6 |         "name": centrality,
 7 |         "geojson": {
 8 |             "type": "FeatureCollection",
 9 |             "features":[
10 |                 {"type":"Feature",
11 |                  "properties":{"route":"{} to {}".format(cities[i], cities[i+1])},
12 |                  "geometry":{
13 |                      "type":"LineString",
14 |                      "coordinates":[
15 |                          self.get_airport_location(cities[i]),
16 |                          self.get_airport_location(cities[i+1])
17 |                      ]
18 |                  }
19 |                 } for i in range(len(cities) - 1)
20 |             ]
21 |         },
22 |         "paint":{
23 |             "line-width": 8,
24 |             "line-color": self.centrality_indices[centrality]
25 |         }
26 |     })
27 |     self.toggleLayer(layer_index)
28 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode25.py:
--------------------------------------------------------------------------------
1 | [[USFlightsAnalysis]]
2 | def get_airport_location(self, airport_code):
3 |     row = airports_centrality.loc[airports["IATA_CODE"] == airport_code]
4 |     if row is not None:
5 |         return [row["LONGITUDE"].values[0], row["LATITUDE"].values[0]]
6 |     return None
7 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode26.py:
--------------------------------------------------------------------------------
 1 | def compute_delay_airline_df(airline, org_airport=None):
 2 | 	# create a mask for selecting the data
 3 |    mask = (flights["AIRLINE"] == airline)
 4 |    if org_airport is not None:
 5 | 	    # Add the org_airport to the mask
 6 |        mask = mask & (flights["ORIGIN_AIRPORT"] == org_airport)
 7 |    # Apply the mask to the Pandas dataframe
 8 |    df = flights[mask]
 9 |    # Convert the YEAR, MONTH and DAY column into a DateTime
10 |    df["DATE"] = pd.to_datetime(flights[['YEAR','MONTH', 'DAY']])
11 |    # Select only the columns that we need
12 |    return df[["DATE", "ARRIVAL_DELAY"]]
13 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode27.py:
--------------------------------------------------------------------------------
 1 | from pixiedust.apps.template import TemplateTabbedApp
 2 | 
 3 | @PixieApp
 4 | class RouteAnalysisApp(TemplateTabbedApp):
 5 |     def setup(self):
 6 |         self.apps = [
 7 |             {"title": "Search Shortest Route", "app_class": "SearchShortestRouteApp"},
 8 |             {"title": "Explore Airlines", "app_class": "AirlinesApp"}
 9 |         ]
10 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode28.py:
--------------------------------------------------------------------------------
 1 | [[USFlightsAnalysis]]
 2 | @route(org_airport="*", dest_airport="*")
 3 | def analyze_route(self, org_airport, dest_airport):
 4 |     return """
 5 | <div pd_app="RouteAnalysisApp" 
 6 |     pd_options="org_airport={{org_airport}};dest_airport={{dest_airport}}" 
 7 |     pd_render_onload>
 8 | </div>
 9 |     """
10 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode29.py:
--------------------------------------------------------------------------------
 1 | [[SearchShortestRouteApp]]
 2 | from pixiedust.display.app import *
 3 | from pixiedust.apps.mapboxBase import MapboxBase
 4 | from collections import OrderedDict
 5 | 
 6 | @PixieApp
 7 | class SearchShortestRouteApp(MapboxBase):
 8 |     def setup(self):
 9 |         self.org_airport = self.parent_pixieapp.options.get("org_airport")
10 |         self.dest_airport = self.parent_pixieapp.options.get("dest_airport")
11 |         self.centrality_indices = OrderedDict([
12 |             ("ELAPSED_TIME","rgba(256,0,0,0.65)"), 
13 |             ("DEGREE", "rgba(0,256,0,0.65)"), 
14 |             ("PAGE_RANK", "rgba(0,0,256,0.65)"),
15 |             ("CLOSENESS", "rgba(128,0,128,0.65)")
16 |         ])
17 | 	…
18 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode3.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import datetime
 3 | import numpy as np
 4 | 
 5 | # clean up the flights data in flights.csv
 6 | flights = pd.read_csv('flights.raw.csv', low_memory=False)
 7 | 
 8 | # select only the rows that have a 3 letter IATA code in the ORIGIN and DESTINATION airports
 9 | mask = (flights["ORIGIN_AIRPORT"].str.len() == 3) & (flights["DESTINATION_AIRPORT"].str.len() == 3)
10 | flights = flights[ mask ]
11 | 
12 | # remove the unwanted columns
13 | dropped_columns=["SCHEDULED_DEPARTURE","SCHEDULED_TIME",
14 | "CANCELLATION_REASON","DIVERTED","DIVERTED","TAIL_NUMBER","TAXI_OUT",
15 | "WHEELS_OFF","WHEELS_ON",
16 | "TAXI_IN","SCHEDULED_ARRIVAL", "ARRIVAL_TIME", "AIR_SYSTEM_DELAY","SECURITY_DELAY",
17 | "AIRLINE_DELAY","LATE_AIRCRAFT_DELAY", "WEATHER_DELAY"]
18 | flights.drop(dropped_columns, axis=1, inplace=True)
19 | 
20 | # remove the row that have NA in the ELAPSED_TIME column
21 | flights.dropna(subset=["ELAPSED_TIME"], inplace=True)
22 | 
23 | # remove the row that have NA in the DEPARTURE_TIME column
24 | flights.dropna(subset=["ELAPSED_TIME"], inplace=True)
25 | 
26 | # Create a new DEPARTURE_TIME columns that has the actual datetime
27 | def to_datetime(row):
28 |     departure_time = str(int(row["DEPARTURE_TIME"])).zfill(4)
29 |     hour = int(departure_time[0:2])
30 |     return datetime.datetime(year=row["YEAR"], month=row["MONTH"], day=row["DAY"], 
31 |                              hour = 0 if hour >= 24 else hour, 
32 |                              minute=int(departure_time[2:4])
33 |                             )
34 | flights["DEPARTURE_TIME"] = flights.apply(to_datetime, axis=1)
35 | 
36 | # write the data back to file without the index
37 | flights.to_csv('flights.csv', index=False)
38 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode30.py:
--------------------------------------------------------------------------------
1 | [[AirlinesApp]]
2 | @PixieApp
3 | class AirlinesApp():
4 |     def setup(self):
5 |         self.org_airport = self.parent_pixieapp.options.get("org_airport")
6 |         self.dest_airport = self.parent_pixieapp.options.get("dest_airport")
7 |         self.airlines = flights[flights["ORIGIN_AIRPORT"] == self.org_airport].groupby("AIRLINE").size().index.values.tolist()
8 |         self.airlines = [(a, airlines.loc[airlines["IATA_CODE"] == a]["AIRLINE"].values[0]) for a in self.airlines]
9 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode31.py:
--------------------------------------------------------------------------------
 1 | [[AirlinesApp]]
 2 |  @route()
 3 |     def main_screen(self):
 4 |         return """
 5 | <div class="container-fluid">
 6 |     {%for airline_code, airline_name in this.airlines%}
 7 |     <div class="row" style="max-e">
 8 |         <h1 style="color:red">{{airline_name}}</h1>
 9 |         <div class="col-sm-6">
10 |             <div pd_render_onload pd_options="delay_org_airport=true;airline_code={{airline_code}};airline_name={{airline_name}}"></div>
11 |         </div>
12 |         <div class="col-sm-6">
13 |             <div pd_render_onload pd_options="delay_org_airport=false;airline_code={{airline_code}};airline_name={{airline_name}}"></div>
14 |         </div>
15 |     </div>
16 |     {%endfor%}
17 | </div>
18 |         """
19 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode32.py:
--------------------------------------------------------------------------------
 1 | [[AirlinesApp]]
 2 |  @route(delay_org_airport="*",airline_code="*", airline_name="*")
 3 |     @templateArgs
 4 |     def delay_airline_screen(self, delay_org_airport, airline_code, airline_name):
 5 |         mask = (flights["AIRLINE"] == airline_code)
 6 |         if delay_org_airport == "true":
 7 |             mask = mask & (flights["ORIGIN_AIRPORT"] == self.org_airport)
 8 |         average_delay = round(flights[mask]["ARRIVAL_DELAY"].mean(), 2)
 9 |         return """
10 | {%if delay_org_airport == "true" %}
11 | <h4>Delay chart for all flights out of {{this.org_airport}}</h4>
12 | {%else%}
13 | <h4>Delay chart for all flights</h4>
14 | {%endif%}
15 | <h4 style="margin-top:5px">Average delay: {{average_delay}} minutes</h4>
16 | <div pd_render_onload pd_entity="compute_delay_airline_df('{{airline_code}}', '{{delay_org_airport}}')">
17 |     <pd_options>
18 |     {
19 |       "keyFields": "DATE",
20 |       "handlerId": "lineChart",
21 |       "valueFields": "ARRIVAL_DELAY",
22 |       "noChartCache": "true"
23 |     }
24 |     </pd_options>
25 | </div>
26 |         """
27 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode33.py:
--------------------------------------------------------------------------------
1 | [[AirlinesApp]]
2 |  def compute_delay_airline_df(self, airline, delay_org_airport):
3 |         mask = (flights["AIRLINE"] == airline)
4 |         if delay_org_airport == "true":
5 |             mask = mask & (flights["ORIGIN_AIRPORT"] == self.org_airport)
6 |         df = flights[mask]
7 |         df["DATE"] = pd.to_datetime(flights[['YEAR','MONTH', 'DAY']])
8 |         return df[["DATE", "ARRIVAL_DELAY"]]
9 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode34.py:
--------------------------------------------------------------------------------
1 | import statsmodels.tsa.api as smt
2 | smt.graphics.plot_acf(df['ARRIVAL_DELAY'], lags=100)
3 | plt.show()
4 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode35.py:
--------------------------------------------------------------------------------
1 | import statsmodels.tsa.api as smt
2 | smt.graphics.plot_pacf(df['ARRIVAL_DELAY'], lags=50)
3 | plt.show()
4 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode36.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | train_set, test_set = df[:-14], df[-14:]
 3 | train_set.index = train_set["DEPARTURE_TIME"]
 4 | test_set.index = test_set["DEPARTURE_TIME"]
 5 | logdf = np.log(train_set['ARRIVAL_DELAY'])
 6 | logdf.index = train_set['DEPARTURE_TIME']
 7 | logdf_diff = pd.DataFrame(logdf - logdf.shift()).reset_index()
 8 | logdf_diff.replace([np.inf, -np.inf], np.nan, inplace=True)
 9 | logdf_diff.dropna(inplace=True)
10 | display(logdf_diff)
11 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode37.py:
--------------------------------------------------------------------------------
1 | smt.graphics.plot_acf(logdf_diff["ARRIVAL_DELAY"], lags=100)
2 | plt.show()
3 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode38.py:
--------------------------------------------------------------------------------
1 | smt.graphics.plot_pacf(logdf_diff["ARRIVAL_DELAY"], lags=100)
2 | plt.show()
3 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode39.py:
--------------------------------------------------------------------------------
1 | from statsmodels.tsa.arima_model import ARIMA
2 | 
3 | import warnings
4 | with warnings.catch_warnings():
5 |     warnings.simplefilter("ignore")
6 |     arima_model_class = ARIMA(train_set['ARRIVAL_DELAY'], dates=train_set['DEPARTURE_TIME'], order=(1,1,1))
7 |     arima_model = arima_model_class.fit(disp=0)
8 |     print(arima_model.resid.describe())
9 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode4.py:
--------------------------------------------------------------------------------
1 | airports = pixiedust.sampleData("https://github.com/DTAIEB/Thoughtful-Data-Science/raw/master/chapter%209/USFlightsAnalysis/airports.csv")
2 | airlines = pixiedust.sampleData("https://github.com/DTAIEB/Thoughtful-Data-Science/raw/master/chapter%209/USFlightsAnalysis/airlines.csv")
3 | flights = pixiedust.sampleData("https://github.com/DTAIEB/Thoughtful-Data-Science/raw/master/chapter%209/USFlightsAnalysis/flights.zip")
4 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode40.py:
--------------------------------------------------------------------------------
 1 | def plot_predict(model, dates_series, num_observations):
 2 |     fig,ax = plt.subplots(figsize = (12,8))
 3 |     model.plot_predict(
 4 |         start = dates_series[len(dates_series)-num_observations], 
 5 |         end = dates_series[len(dates_series)-1],
 6 |         ax = ax
 7 |     )
 8 |     plt.show()
 9 | plot_predict(arima_model, train_set['DEPARTURE_TIME'], 100)
10 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode41.py:
--------------------------------------------------------------------------------
 1 | def compute_test_set_predictions(train_set, test_set):
 2 |     with warnings.catch_warnings():
 3 |         warnings.simplefilter("ignore")
 4 |         history = train_set['ARRIVAL_DELAY'].values
 5 |         forecast = np.array([])
 6 |         for t in range(len(test_set)):
 7 |             prediction = ARIMA(history, order=(1,1,0)).fit(disp=0).forecast()
 8 |             history = np.append(history, test_set['ARRIVAL_DELAY'].iloc[t])
 9 |             forecast = np.append(forecast, prediction[0])
10 |         return pd.DataFrame(
11 |           {"forecast": forecast,
12 |            "test": test_set['ARRIVAL_DELAY'],
13 |            "Date": pd.date_range(start=test_set['DEPARTURE_TIME'].iloc[len(test_set)-1], periods = len(test_set))
14 |           }
15 |         )
16 |         
17 | results = compute_test_set_predictions(train_set, test_set)
18 | display(results)
19 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode42.py:
--------------------------------------------------------------------------------
 1 | [[PredictDelayApp]] 
 2 | @route()
 3 |     def main_screen(self):
 4 |         return """
 5 | <div class="container-fluid">
 6 |     <div class="row">
 7 |         <div class="col-sm-6">
 8 |             <div class="rendererOpt" style="font-weight:bold">
 9 |                 Select a flight segment:
10 |             </div>
11 |             <div>
12 |                 <select id="segment{{prefix}}" pd_refresh="prediction_graph{{prefix}}">
13 |                     <option value="" selected></option>
14 |                     {%for start, end in this.paths %}
15 |                     <option value="{{start}}:{{end}}">{{start}} -> {{end}}</option>
16 |                     {%endfor%}
17 |                 </select>
18 |             </div>
19 |         </div>
20 |         <div class="col-sm-6">
21 |             <div class="rendererOpt" style="font-weight:bold">
22 |                 Select an airline:
23 |             </div>
24 |             <div>
25 |                 <select id="airline{{prefix}}" pd_refresh="prediction_graph{{prefix}}">
26 |                     <option value="" selected></option>
27 |                     {%for airline_code, airline_name in this.airlines%}
28 |                     <option value="{{airline_code}}">{{airline_name}}</option>
29 |                     {%endfor%}
30 |                 </select>
31 |             </div>
32 |         </div>
33 |     </div>
34 |     <div class="row">
35 |         <div class="col-sm-12">
36 |             <div id="prediction_graph{{prefix}}" 
37 |                 pd_options="flight_segment=$val(segment{{prefix}});airline=$val(airline{{prefix}})">
38 |             </div>
39 |         </div>
40 |     </div>
41 | </div>
42 |         """
43 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode43.py:
--------------------------------------------------------------------------------
 1 | [[PredictDelayApp]] 
 2 | @route(flight_segment="*", airline="*")
 3 |     @captureOutput
 4 |     def predict_screen(self, flight_segment, airline):
 5 |         if flight_segment is None or flight_segment == "":
 6 |             return "<div>Please select a flight segment</div>"
 7 |         airport = flight_segment.split(":")[1]
 8 |         mask = (flights["DESTINATION_AIRPORT"] == airport)
 9 |         if airline is not None and airline != "":
10 |             mask = mask & (flights["AIRLINE"] == airline)
11 |         df = flights[mask]
12 |         df.index = df["DEPARTURE_TIME"]
13 |         df = df.tail(50000)
14 |         df = df[~df.index.duplicated(keep='first')]
15 |         with warnings.catch_warnings():
16 |             warnings.simplefilter("ignore")
17 |             arima_model_class = ARIMA(df["ARRIVAL_DELAY"], dates=df['DEPARTURE_TIME'], order=(1,1,1))
18 |             arima_model = arima_model_class.fit(disp=0)
19 |             fig, ax = plt.subplots(figsize = (12,8))
20 |             num_observations = 100  
21 |             date_series = df["DEPARTURE_TIME"]
22 |             arima_model.plot_predict(
23 |                 start = str(date_series[len(date_series)-num_observations]), 
24 |                 end = str(date_series[len(date_series)-1]),
25 |                 ax = ax
26 |             )
27 |             plt.show()
28 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode44.py:
--------------------------------------------------------------------------------
 1 | from pixiedust.apps.template import TemplateTabbedApp
 2 | 
 3 | @PixieApp
 4 | class RouteAnalysisApp(TemplateTabbedApp):
 5 |     def setup(self):
 6 |         self.apps = [
 7 |             {"title": "Search Shortest Route", "app_class": "SearchShortestRouteApp"},
 8 |             {"title": "Explore Airlines", "app_class": "AirlinesApp"},
 9 |             {"title": "Flight Delay Prediction", "app_class": "PredictDelayApp"}
10 |         ]
11 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode5.py:
--------------------------------------------------------------------------------
1 | edges = flights.groupby(["ORIGIN_AIRPORT","DESTINATION_AIRPORT"]) [["ELAPSED_TIME"]].mean()
2 | edges
3 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode6.py:
--------------------------------------------------------------------------------
1 | edges = edges.reset_index()
2 | edges
3 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode7.py:
--------------------------------------------------------------------------------
1 | flight_graph = nx.from_pandas_edgelist(
2 |     flights, "ORIGIN_AIRPORT","DESTINATION_AIRPORT", "ELAPSED_TIME", 
3 |     create_using = nx.DiGraph() )
4 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode8.py:
--------------------------------------------------------------------------------
1 | print("Nodes: {}".format(flight_graph.nodes))
2 | print("Edges: {}".format(flight_graph.edges))
3 | 


--------------------------------------------------------------------------------
/chapter 9/sampleCode9.py:
--------------------------------------------------------------------------------
1 | import matplotlib.cm as cm
2 | fig = plt.figure(figsize = (12,12))
3 | nx.draw(flight_graph, arrows=True, with_labels=True, width = 0.5,style="dotted",
4 |         node_color=range(len(flight_graph)), cmap=cm.get_cmap(name="cool"),
5 |         edge_color=range(len(flight_graph.edges)), edge_cmap=cm.get_cmap(name="spring")
6 |        )
7 | plt.show()
8 | 


--------------------------------------------------------------------------------