├── .gitignore
├── 20t1
    ├── assign3
    │   ├── test.csv
    │   ├── training.csv
    │   └── validation.csv
    ├── credits.csv
    ├── movies.csv
    └── z1111111.py
├── 21t1
    └── z1111111.py
├── 23T1
    ├── YOUR_ZID_STARTING_WITH_Z.py
    ├── city_pairs.csv
    ├── datasets.zip
    └── seats.csv
├── 24T1
    ├── ass1
    │   ├── ds_jobs.csv
    │   ├── requirements.txt
    │   └── z1234567.py
    ├── ass2
    │   └── zXXXXXXX.py
    └── ass3
    │   ├── requirements.txt
    │   ├── test-marking.csv
    │   ├── test.csv
    │   └── train.csv
├── Ass1_ChoroplethMap
    ├── Olympics_dataset.csv
    ├── README.md
    ├── app.py
    └── requirements.txt
├── Jenkinsfile
├── README.md
├── Week10_Regression_and_Clustering
    ├── activity_1.py
    ├── activity_2.py
    ├── activity_3.py
    ├── diet.csv
    └── iris.csv
├── Week11_Preprocessing
    ├── activity_1.py
    ├── activity_2.py
    └── activity_3.py
├── Week2_DataAccess
    ├── Demographic_Statistics_By_Zip_Code.csv
    ├── activity_1.py
    ├── activity_2.py
    ├── activity_3.py
    └── activity_4.py
├── Week3_Data_Cleansing
    ├── Books.csv
    ├── City.csv
    ├── activity_1.py
    ├── activity_2.py
    ├── activity_3.py
    └── activity_4.py
├── Week4_Visualization
    ├── Books.csv
    ├── activity_1.py
    ├── activity_2.py
    ├── activity_3.py
    ├── activity_4.py
    └── iris.csv
├── Week5_Flask
    ├── Books.csv
    ├── activity_1.py
    ├── activity_2.py
    └── activity_3.py
├── Week6_Flask2
    ├── Books.csv
    ├── activity_1.py
    ├── activity_2.py
    └── activity_3.py
├── Week7_Client
    ├── activity_1.py
    ├── activity_2.py
    ├── activity_3.py
    └── activity_4.py
├── Week7_GraphQL
    ├── activity_1.py
    ├── activity_2.py
    └── activity_3.py
├── Week8_Authentication
    ├── Books.csv
    ├── activity_1.py
    ├── activity_1_client.py
    ├── activity_2.py
    └── activity_3.py
├── Week9_Classification
    ├── activity_1.py
    ├── activity_2.py
    ├── activity_3.py
    └── iris.csv
├── assignments
    ├── Countries-Continents.csv
    ├── Getting Started Academic Cloud.pdf
    ├── Olympics_dataset1.csv
    ├── Olympics_dataset2.csv
    ├── Process Mining Kickstarter - Exercises.pdf
    ├── Process Mining Kickstarter - Solution Manual.pdf
    └── z1111111.py
└── docs
    ├── Flyer_UNSW_Al-Banna.pdf
    └── myExperience.pdf


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | .idea/
106 | data/
107 | 


--------------------------------------------------------------------------------
/20t1/z1111111.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | import json
  3 | import matplotlib.pyplot as plt
  4 | import pandas as pd
  5 | import sys
  6 | import os
  7 | 
  8 | studentid = os.path.basename(sys.modules[__name__].__file__)
  9 | 
 10 | 
 11 | #################################################
 12 | # Your personal methods can be here ...
 13 | #################################################
 14 | 
 15 | 
 16 | def log(question, output_df, other):
 17 |     print("--------------- {}----------------".format(question))
 18 |     if other is not None:
 19 |         print(question, other)
 20 |     if output_df is not None:
 21 |         print(output_df.head(5).to_string())
 22 | 
 23 | 
 24 | def question_1(movies, credits):
 25 |     """
 26 |     :param movies: the path for the movie.csv file
 27 |     :param credits: the path for the credits.csv file
 28 |     :return: df1
 29 |             Data Type: Dataframe
 30 |             Please read the assignment specs to know how to create the output dataframe
 31 |     """
 32 | 
 33 |     #################################################
 34 |     # Your code goes here ...
 35 |     #################################################
 36 | 
 37 |     log("QUESTION 1", output_df=df1, other=df1.shape)
 38 |     return df1
 39 | 
 40 | 
 41 | def question_2(df1):
 42 |     """
 43 |     :param df1: the dataframe created in question 1
 44 |     :return: df2
 45 |             Data Type: Dataframe
 46 |             Please read the assignment specs to know how to create the output dataframe
 47 |     """
 48 | 
 49 |     #################################################
 50 |     # Your code goes here ...
 51 |     #################################################
 52 | 
 53 |     log("QUESTION 2", output_df=df2, other=(len(df2.columns), sorted(df2.columns)))
 54 |     return df2
 55 | 
 56 | 
 57 | def question_3(df2):
 58 |     """
 59 |     :param df2: the dataframe created in question 2
 60 |     :return: df3
 61 |             Data Type: Dataframe
 62 |             Please read the assignment specs to know how to create the output dataframe
 63 |     """
 64 | 
 65 |     #################################################
 66 |     # Your code goes here ...
 67 |     #################################################
 68 | 
 69 |     log("QUESTION 3", output_df=df3, other=df3.index.name)
 70 |     return df3
 71 | 
 72 | 
 73 | def question_4(df3):
 74 |     """
 75 |     :param df3: the dataframe created in question 3
 76 |     :return: df4
 77 |             Data Type: Dataframe
 78 |             Please read the assignment specs to know how to create the output dataframe
 79 |     """
 80 | 
 81 |     #################################################
 82 |     # Your code goes here ...
 83 |     #################################################
 84 | 
 85 |     log("QUESTION 4", output_df=df4, other=(df4['budget'].min(), df4['budget'].max(), df4['budget'].mean()))
 86 |     return df4
 87 | 
 88 | 
 89 | def question_5(df4):
 90 |     """
 91 |     :param df4: the dataframe created in question 4
 92 |     :return: df5
 93 |             Data Type: Dataframe
 94 |             Please read the assignment specs to know how to create the output dataframe
 95 |     """
 96 | 
 97 |     #################################################
 98 |     # Your code goes here ...
 99 |     #################################################
100 | 
101 |     log("QUESTION 5", output_df=df5,
102 |         other=(df5['success_impact'].min(), df5['success_impact'].max(), df5['success_impact'].mean()))
103 |     return df5
104 | 
105 | 
106 | def question_6(df5):
107 |     """
108 |     :param df5: the dataframe created in question 5
109 |     :return: df6
110 |             Data Type: Dataframe
111 |             Please read the assignment specs to know how to create the output dataframe
112 |     """
113 | 
114 |     #################################################
115 |     # Your code goes here ...
116 |     #################################################
117 | 
118 |     log("QUESTION 6", output_df=df6, other=(df6['popularity'].min(), df6['popularity'].max(), df6['popularity'].mean()))
119 |     return df6
120 | 
121 | 
122 | def question_7(df6):
123 |     """
124 |     :param df6: the dataframe created in question 6
125 |     :return: df7
126 |             Data Type: Dataframe
127 |             Please read the assignment specs to know how to create the output dataframe
128 |     """
129 | 
130 |     #################################################
131 |     # Your code goes here ...
132 |     #################################################
133 | 
134 |     log("QUESTION 7", output_df=df7, other=df7['popularity'].dtype)
135 |     return df7
136 | 
137 | 
138 | def question_8(df7):
139 |     """
140 |     :param df7: the dataframe created in question 7
141 |     :return: df8
142 |             Data Type: Dataframe
143 |             Please read the assignment specs to know how to create the output dataframe
144 |     """
145 | 
146 |     #################################################
147 |     # Your code goes here ...
148 |     #################################################
149 | 
150 |     log("QUESTION 8", output_df=df8, other=df8["cast"].head(10).values)
151 |     return df8
152 | 
153 | 
154 | def question_9(df8):
155 |     """
156 |     :param df9: the dataframe created in question 8
157 |     :return: movies
158 |             Data Type: List of strings (movie titles)
159 |             Please read the assignment specs to know how to create the output
160 |     """
161 | 
162 |     #################################################
163 |     # Your code goes here ...
164 |     #################################################
165 | 
166 |     log("QUESTION 9", output_df=None, other=movies)
167 |     return movies
168 | 
169 | 
170 | def question_10(df8):
171 |     """
172 |     :param df8: the dataframe created in question 8
173 |     :return: df10
174 |             Data Type: Dataframe
175 |             Please read the assignment specs to know how to create the output dataframe
176 |     """
177 | 
178 |     #################################################
179 |     # Your code goes here ...
180 |     #################################################
181 | 
182 |     log("QUESTION 10", output_df=df10, other=df10["release_date"].head(5).to_string().replace("\n", " "))
183 |     return df10
184 | 
185 | 
186 | def question_11(df10):
187 |     """
188 |     :param df10: the dataframe created in question 10
189 |     :return: nothing, but saves the figure on the disk
190 |     """
191 | 
192 |     #################################################
193 |     # Your code goes here ...
194 |     #################################################
195 | 
196 |     plt.savefig("{}-Q11.png".format(studentid))
197 | 
198 | 
199 | def question_12(df10):
200 |     """
201 |     :param df10: the dataframe created in question 10
202 |     :return: nothing, but saves the figure on the disk
203 |     """
204 | 
205 |     #################################################
206 |     # Your code goes here ...
207 |     #################################################
208 | 
209 |     plt.savefig("{}-Q12.png".format(studentid))
210 | 
211 | 
212 | def question_13(df10):
213 |     """
214 |     :param df10: the dataframe created in question 10
215 |     :return: nothing, but saves the figure on the disk
216 |     """
217 | 
218 |     #################################################
219 |     # Your code goes here ...
220 |     #################################################
221 | 
222 |     plt.savefig("{}-Q13.png".format(studentid))
223 | 
224 | 
225 | if __name__ == "__main__":
226 |     df1 = question_1("movies.csv", "credits.csv")
227 |     df2 = question_2(df1)
228 |     df3 = question_3(df2)
229 |     df4 = question_4(df3)
230 |     df5 = question_5(df4)
231 |     df6 = question_6(df5)
232 |     df7 = question_7(df6)
233 |     df8 = question_8(df7)
234 |     movies = question_9(df8)
235 |     df10 = question_10(df8)
236 |     question_11(df10)
237 |     question_12(df10)
238 |     question_13(df10)
239 | 


--------------------------------------------------------------------------------
/21t1/z1111111.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import matplotlib.pyplot as plt
  3 | import pandas as pd
  4 | import sys
  5 | import os
  6 | import numpy as np
  7 | import math
  8 | import re
  9 | 
 10 | studentid = os.path.basename(sys.modules[__name__].__file__)
 11 | 
 12 | 
 13 | def log(question, output_df, other):
 14 |     print("--------------- {}----------------".format(question))
 15 | 
 16 |     if other is not None:
 17 |         print(question, other)
 18 |     if output_df is not None:
 19 |         df = output_df.head(5).copy(True)
 20 |         for c in df.columns:
 21 |             df[c] = df[c].apply(lambda a: a[:20] if isinstance(a, str) else a)
 22 | 
 23 |         df.columns = [a[:10] + "..." for a in df.columns]
 24 |         print(df.to_string())
 25 | 
 26 | 
 27 | def question_1(exposure, countries):
 28 |     """
 29 |     :param exposure: the path for the exposure.csv file
 30 |     :param countries: the path for the Countries.csv file
 31 |     :return: df1
 32 |             Data Type: Dataframe
 33 |             Please read the assignment specs to know how to create the output dataframe
 34 |     """
 35 | 
 36 |     #################################################
 37 |     # Your code goes here ...
 38 |     #################################################
 39 | 
 40 |     log("QUESTION 1", output_df=df1, other=df1.shape)
 41 |     return df1
 42 | 
 43 | 
 44 | def question_2(df1):
 45 |     """
 46 |     :param df1: the dataframe created in question 1
 47 |     :return: df2
 48 |             Data Type: Dataframe
 49 |             Please read the assignment specs to know how to create the output dataframe
 50 |     """
 51 | 
 52 |     #################################################
 53 |     # Your code goes here ...
 54 |     #################################################
 55 | 
 56 |     log("QUESTION 2", output_df=df2[["avg_latitude", "avg_longitude"]], other=df2.shape)
 57 |     return df2
 58 | 
 59 | 
 60 | def question_3(df2):
 61 |     """
 62 |     :param df2: the dataframe created in question 2
 63 |     :return: df3
 64 |             Data Type: Dataframe
 65 |             Please read the assignment specs to know how to create the output dataframe
 66 |     """
 67 | 
 68 |     #################################################
 69 |     # Your code goes here ...
 70 |     #################################################
 71 | 
 72 |     log("QUESTION 3", output_df=df3[['distance_to_Wuhan']], other=df3.shape)
 73 |     return df3
 74 | 
 75 | 
 76 | def question_4(df2, continents):
 77 |     """
 78 |     :param df2: the dataframe created in question 2
 79 |     :param continents: the path for the Countries-Continents.csv file
 80 |     :return: df4
 81 |             Data Type: Dataframe
 82 |             Please read the assignment specs to know how to create the output dataframe
 83 |     """
 84 | 
 85 |     #################################################
 86 |     # Your code goes here ...
 87 |     #################################################
 88 | 
 89 |     log("QUESTION 4", output_df=df4, other=df4.shape)
 90 |     return df4
 91 | 
 92 | 
 93 | def question_5(df2):
 94 |     """
 95 |     :param df2: the dataframe created in question 2
 96 |     :return: df5
 97 |             Data Type: dataframe
 98 |             Please read the assignment specs to know how to create the output dataframe
 99 |     """
100 |     #################################################
101 |     # Your code goes here ...
102 |     #################################################
103 | 
104 |     log("QUESTION 5", output_df=df5, other=df5.shape)
105 |     return df5
106 | 
107 | 
108 | def question_6(df2):
109 |     """
110 |     :param df2: the dataframe created in question 2
111 |     :return: cities_lst
112 |             Data Type: list
113 |             Please read the assignment specs to know how to create the output dataframe
114 |     """
115 |     cities_lst = []
116 |     #################################################
117 |     # Your code goes here ...
118 |     #################################################
119 | 
120 |     log("QUESTION 6", output_df=None, other=cities_lst)
121 |     return cities_lst
122 | 
123 | 
124 | def question_7(df2):
125 |     """
126 |     :param df2: the dataframe created in question 2
127 |     :return: df7
128 |             Data Type: Dataframe
129 |             Please read the assignment specs to know how to create the output dataframe
130 |     """
131 | 
132 |     #################################################
133 |     # Your code goes here ...
134 |     #################################################
135 | 
136 |     log("QUESTION 7", output_df=df7, other=df7.shape)
137 |     return df7
138 | 
139 | 
140 | def question_8(df2, continents):
141 |     """
142 |     :param df2: the dataframe created in question 2
143 |     :param continents: the path for the Countries-Continents.csv file
144 |     :return: nothing, but saves the figure on the disk
145 |     """
146 | 
147 |     #################################################
148 |     # Your code goes here ...
149 |     #################################################
150 | 
151 |     plt.savefig("{}-Q11.png".format(studentid))
152 | 
153 | 
154 | def question_9(df2):
155 |     """
156 |     :param df2: the dataframe created in question 2
157 |     :return: nothing, but saves the figure on the disk
158 |     """
159 | 
160 |     #################################################
161 |     # Your code goes here ...
162 |     #################################################
163 | 
164 |     plt.savefig("{}-Q12.png".format(studentid))
165 | 
166 | 
167 | def question_10(df2, continents):
168 |     """
169 |     :param df2: the dataframe created in question 2
170 |     :return: nothing, but saves the figure on the disk
171 |     :param continents: the path for the Countries-Continents.csv file
172 |     """
173 | 
174 |     #################################################
175 |     # Your code goes here ...
176 |     #################################################
177 | 
178 |     plt.savefig("{}-Q13.png".format(studentid))
179 | 
180 | 
181 | if __name__ == "__main__":
182 |     df1 = question_1("exposure.csv", "Countries.csv")
183 |     df2 = question_2(df1.copy(True))
184 |     df3 = question_3(df2.copy(True))
185 |     df4 = question_4(df2.copy(True), "Countries-Continents.csv")
186 |     df5 = question_5(df2.copy(True))
187 |     lst = question_6(df2.copy(True))
188 |     df7 = question_7(df2.copy(True))
189 |     question_8(df2.copy(True), "Countries-Continents.csv")
190 |     question_9(df2.copy(True))
191 |     question_10(df2.copy(True), "Countries-Continents.csv")
192 | 


--------------------------------------------------------------------------------
/23T1/YOUR_ZID_STARTING_WITH_Z.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import matplotlib.pyplot as plt
  3 | import pandas as pd
  4 | import sys
  5 | import os
  6 | import numpy as np
  7 | import math
  8 | import re
  9 | 
 10 | studentid = os.path.basename(sys.modules[__name__].__file__)
 11 | 
 12 | 
 13 | def log(question, output_df, other):
 14 |     print("--------------- {}----------------".format(question))
 15 | 
 16 |     if other is not None:
 17 |         print(question, other)
 18 |     if output_df is not None:
 19 |         df = output_df.head(5).copy(True)
 20 |         for c in df.columns:
 21 |             df[c] = df[c].apply(lambda a: a[:20] if isinstance(a, str) else a)
 22 | 
 23 |         df.columns = [a[:10] + "..." for a in df.columns]
 24 |         print(df.to_string())
 25 | 
 26 | 
 27 | def question_1(city_pairs):
 28 |     """
 29 |     :return: df1
 30 |             Data Type: Dataframe
 31 |             Please read the assignment specs to know how to create the output dataframe
 32 |     """
 33 | 
 34 |     #################################################
 35 |     # Your code goes here ...
 36 |     #################################################
 37 | 
 38 |     log("QUESTION 1", output_df=df1[["AustralianPort", "ForeignPort", "passenger_in_out", "freight_in_out", "mail_in_out"]], other=df1.shape)
 39 |     return df1
 40 | 
 41 | 
 42 | def question_2(df1):
 43 |     """
 44 |     :param df1: the dataframe created in question 1
 45 |     :return: dataframe df2
 46 |             Please read the assignment specs to know how to create the output dataframe
 47 |     """
 48 | 
 49 |     #################################################
 50 |     # Your code goes here ...
 51 |     #################################################
 52 | 
 53 |     log("QUESTION 2", output_df=df2, other=df2.shape)
 54 |     return df2
 55 | 
 56 | 
 57 | def question_3(df1):
 58 |     """
 59 |     :param df1: the dataframe created in question 1
 60 |     :return: df3
 61 |             Data Type: Dataframe
 62 |             Please read the assignment specs to know how to create the output dataframe
 63 |     """
 64 |     #################################################
 65 |     # Your code goes here ...
 66 |     #################################################
 67 | 
 68 |     log("QUESTION 3", output_df=df3, other=df3.shape)
 69 |     return df3
 70 | 
 71 | 
 72 | def question_4(df1):
 73 |     """
 74 |     :param df1: the dataframe created in question 1
 75 |     :return: df4
 76 |             Data Type: Dataframe
 77 |             Please read the assignment specs to know how to create the output dataframe
 78 |     """
 79 | 
 80 |     #################################################
 81 |     # Your code goes here ...
 82 |     #################################################
 83 | 
 84 |     log("QUESTION 4", output_df=df4, other=df4.shape)
 85 |     return df4
 86 | 
 87 | 
 88 | def question_5(seats):
 89 |     """
 90 |     :param seats : the path to dataset
 91 |     :return: df5
 92 |             Data Type: dataframe
 93 |             Please read the assignment specs to know how to create the  output dataframe
 94 |     """
 95 |     #################################################
 96 |     # Your code goes here ...
 97 |     #################################################
 98 | 
 99 |     log("QUESTION 5", output_df=df5, other=df5.shape)
100 |     return df5
101 | 
102 | 
103 | def question_6(df5):
104 |     """
105 |     :param df5: the dataframe created in question 5
106 |     :return: df6
107 |     """
108 | 
109 |     #################################################
110 |     # Your code goes here ...
111 |     #################################################
112 | 
113 |     log("QUESTION 6", output_df=df6, other=df6.shape)
114 |     return df6
115 | 
116 | 
117 | def question_7(seats, city_pairs):
118 |     """
119 |     :param seats: the path to dataset
120 |     :param city_pairs : the path to dataset
121 |     :return: nothing, but saves the figure on the disk
122 |     """
123 | 
124 |     #################################################
125 |     # Your code goes here ...
126 |     #################################################
127 | 
128 |     plt.savefig("{}-Q7.png".format(studentid))
129 | 
130 | 
131 | if __name__ == "__main__":
132 |     df1 = question_1("city_pairs.csv")
133 |     df2 = question_2(df1.copy(True))
134 |     df3 = question_3(df1.copy(True))
135 |     df4 = question_4(df1.copy(True))
136 |     df5 = question_5("seats.csv")
137 |     df6 = question_6(df5.copy(True))
138 |     question_7("seats.csv", "city_pairs.csv")
139 | 


--------------------------------------------------------------------------------
/23T1/datasets.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mysilver/COMP9321-Data-Services/b477e20a819d63a53c684d0cb5fb332dc5b63e6c/23T1/datasets.zip


--------------------------------------------------------------------------------
/24T1/ass1/requirements.txt:
--------------------------------------------------------------------------------
 1 | contourpy==1.2.0
 2 | cycler==0.12.1
 3 | fonttools==4.49.0
 4 | kiwisolver==1.4.5
 5 | lxml==5.1.0
 6 | matplotlib==3.8.2
 7 | numpy==1.26.0
 8 | packaging==23.2
 9 | pandas==2.2.0
10 | pillow==10.2.0
11 | pyparsing==3.1.1
12 | python-dateutil==2.8.2
13 | pytz==2024.1
14 | rapidfuzz==3.6.1
15 | six==1.16.0
16 | thefuzz==0.22.1
17 | tzdata==2024.1
18 | 


--------------------------------------------------------------------------------
/24T1/ass1/z1234567.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Third-party libraries
  5 | # NOTE: You may **only** use the following third-party libraries:
  6 | import matplotlib.pyplot as plt
  7 | import numpy as np
  8 | import pandas as pd 
  9 | from thefuzz import fuzz
 10 | from thefuzz import process
 11 | # NOTE: It isn't necessary to use all of these to complete the assignment, 
 12 | # but you are free to do so, should you choose.
 13 | 
 14 | # Standard libraries
 15 | # NOTE: You may use **any** of the Python 3.11 or Python 3.12 standard libraries:
 16 | # https://docs.python.org/3.11/library/index.html
 17 | # https://docs.python.org/3.12/library/index.html
 18 | from pathlib import Path
 19 | # ... import your standard libraries here ...
 20 | 
 21 | 
 22 | ######################################################
 23 | # NOTE: DO NOT MODIFY THE LINE BELOW ...
 24 | ######################################################
 25 | studentid = Path(__file__).stem
 26 | 
 27 | ######################################################
 28 | # NOTE: DO NOT MODIFY THE FUNCTION BELOW ...
 29 | ######################################################
 30 | def log(question, output_df, other):
 31 |     print(f"--------------- {question}----------------")
 32 | 
 33 |     if other is not None:
 34 |         print(question, other)
 35 |     if output_df is not None:
 36 |         df = output_df.head(5).copy(True)
 37 |         for c in df.columns:
 38 |             df[c] = df[c].apply(lambda a: a[:20] if isinstance(a, str) else a)
 39 | 
 40 |         df.columns = [a[:10] + "..." for a in df.columns]
 41 |         print(df.to_string())
 42 | 
 43 | 
 44 | ######################################################
 45 | # NOTE: YOU MAY ADD ANY HELPER FUNCTIONS BELOW ...
 46 | ######################################################
 47 | 
 48 | 
 49 | 
 50 | ######################################################
 51 | # QUESTIONS TO COMPLETE BELOW ...
 52 | ######################################################
 53 | 
 54 | ######################################################
 55 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ...
 56 | ######################################################
 57 | def question_1(jobs_csv):
 58 |     """Read the data science jobs CSV file into a DataFrame.
 59 | 
 60 |     See the assignment spec for more details.
 61 | 
 62 |     Args:
 63 |         jobs_csv (str): Path to the jobs CSV file.
 64 | 
 65 |     Returns:
 66 |         DataFrame: The jobs DataFrame.
 67 |     """
 68 | 
 69 |     ######################################################
 70 |     # TODO: Your code goes here ...
 71 |     ######################################################
 72 | 
 73 |     
 74 | 
 75 |     ######################################################
 76 |     # NOTE: DO NOT MODIFY THE CODE BELOW ...
 77 |     ######################################################
 78 |     log("QUESTION 1", output_df=df, other=df.shape)
 79 |     return df
 80 | 
 81 | 
 82 | 
 83 | ######################################################
 84 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ...
 85 | ######################################################
 86 | def question_2(cost_csv, cost_url):
 87 |     """Read the cost of living CSV into a DataFrame.  If the CSV file does not 
 88 |     exist, scrape it from the specified URL and save it to the CSV file.
 89 | 
 90 |     See the assignment spec for more details.
 91 | 
 92 |     Args:
 93 |         cost_csv (str): Path to the cost of living CSV file.
 94 |         cost_url (str): URL of the cost of living page.
 95 | 
 96 |     Returns:
 97 |         DataFrame: The cost of living DataFrame.
 98 |     """
 99 | 
100 |     ######################################################
101 |     # TODO: Your code goes here ...
102 |     ######################################################
103 | 
104 |     
105 | 
106 |     ######################################################
107 |     # NOTE: DO NOT MODIFY THE CODE BELOW ...
108 |     ######################################################
109 |     log("QUESTION 2", output_df=df, other=df.shape)
110 |     return df
111 | 
112 | 
113 | ######################################################
114 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ...
115 | ######################################################
116 | def question_3(currency_csv, currency_url):
117 |     """Read the currency conversion rates CSV into a DataFrame.  If the CSV 
118 |     file does not exist, scrape it from the specified URL and save it to 
119 |     the CSV file.
120 | 
121 |     See the assignment spec for more details.
122 | 
123 |     Args:
124 |         cost_csv (str): Path to the currency conversion rates CSV file.
125 |         cost_url (str): URL of the currency conversion rates page.
126 | 
127 |     Returns:
128 |         DataFrame: The currency conversion rates DataFrame.
129 |     """
130 | 
131 |     ######################################################
132 |     # TODO: Your code goes here ...
133 |     ######################################################
134 | 
135 |     
136 | 
137 |     ######################################################
138 |     # NOTE: DO NOT MODIFY THE CODE BELOW ...
139 |     ######################################################
140 |     log("QUESTION 3", output_df=df, other=df.shape)
141 |     return df
142 | 
143 | 
144 | ######################################################
145 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ...
146 | ######################################################
147 | def question_4(country_csv, country_url):
148 |     """Read the country codes CSV into a DataFrame.  If the CSV file does not 
149 |     exist, it will be scrape the data from the specified URL and save it to the 
150 |     CSV file.
151 | 
152 |     See the assignment spec for more details.
153 | 
154 |     Args:
155 |         cost_csv (str): Path to the country codes CSV file.
156 |         cost_url (str): URL of the country codes page.
157 | 
158 |     Returns:
159 |         DataFrame: The country codes DataFrame.
160 |     """
161 | 
162 |     ######################################################
163 |     # TODO: Your code goes here ...
164 |     ######################################################
165 | 
166 |     
167 | 
168 |     ######################################################
169 |     # NOTE: DO NOT MODIFY THE CODE BELOW ...
170 |     ######################################################
171 |     log("QUESTION 4", output_df=df, other=df.shape)
172 |     return df
173 | 
174 | 
175 | ######################################################
176 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ...
177 | ######################################################
178 | def question_5(jobs_df):
179 |     """Summarise some dimensions of the jobs DataFrame.
180 | 
181 |     See the assignment spec for more details.
182 | 
183 |     Args:
184 |         jobs_df (DataFrame): The jobs DataFrame returned in question 1.
185 | 
186 |     Returns:
187 |         DataFrame: The summary DataFrame.
188 |     """
189 | 
190 |     ######################################################
191 |     # TODO: Your code goes here ...
192 |     ######################################################
193 | 
194 |     
195 | 
196 |     ######################################################
197 |     # NOTE: DO NOT MODIFY THE CODE BELOW ...
198 |     ######################################################
199 |     log("QUESTION 5", output_df=df, other=df.shape)
200 |     return df
201 | 
202 | 
203 | ######################################################
204 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ...
205 | ######################################################
206 | def question_6(jobs_df):
207 |     """Add an experience rating column to the jobs DataFrame.
208 | 
209 |     See the assignment spec for more details.
210 | 
211 |     Args:
212 |         jobs_df (DataFrame): The jobs DataFrame returned in question 1.
213 | 
214 |     Returns:
215 |         DataFrame: The jobs DataFrame with the experience rating column added.
216 |     """
217 | 
218 |     ######################################################
219 |     # TODO: Your code goes here ...
220 |     ######################################################
221 | 
222 |     
223 | 
224 |     ######################################################
225 |     # NOTE: DO NOT MODIFY THE CODE BELOW ...
226 |     ######################################################
227 |     log("QUESTION 6", output_df=df, other=df.shape)
228 |     return df
229 | 
230 | 
231 | ######################################################
232 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ...
233 | ######################################################
234 | def question_7(jobs_df, country_df):
235 |     """Merge the jobs and country codes DataFrames.
236 | 
237 |     See the assignment spec for more details.
238 | 
239 |     Args:
240 |         jobs_df (DataFrame): The jobs DataFrame returned in question 6.
241 |         country_df (DataFrame): The country codes DataFrame returned in 
242 |                                 question 4.
243 | 
244 |     Returns:
245 |         DataFrame: The merged DataFrame.
246 |     """
247 | 
248 |     ######################################################
249 |     # TODO: Your code goes here ...
250 |     ######################################################
251 | 
252 |     
253 | 
254 |     ######################################################
255 |     # NOTE: DO NOT MODIFY THE CODE BELOW ...
256 |     ######################################################
257 |     log("QUESTION 7", output_df=df, other=df.shape)
258 |     return df
259 | 
260 | 
261 | ######################################################
262 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ...
263 | ######################################################
264 | def question_8(jobs_df, currency_df):
265 |     """Add an Australian dollar salary column to the jobs DataFrame.
266 | 
267 |     See the assignment spec for more details.
268 | 
269 |     Args:
270 |         jobs_df (DataFrame): The jobs DataFrame returned in question 7.
271 |         currency_df (DataFrame): The currency conversion rates DataFrame 
272 |                                  returned in question 3.
273 | 
274 |     Returns:
275 |         DataFrame: The jobs DataFrame with the Australian dollar salary column
276 |                    added.
277 |     """
278 | 
279 |     ######################################################
280 |     # TODO: Your code goes here ...
281 |     ######################################################
282 | 
283 |     
284 | 
285 |     
286 |     ######################################################
287 |     # NOTE: DO NOT MODIFY THE CODE BELOW ...
288 |     ######################################################
289 |     log("QUESTION 8", output_df=df, other=df.shape)
290 |     return df
291 | 
292 | 
293 | ######################################################
294 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ...
295 | ######################################################
296 | def question_9(cost_df):
297 |     """Re-scale the cost of living DataFrame to be relative to Australia.
298 | 
299 |     See the assignment spec for more details.
300 | 
301 |     Args:
302 |         cost_df (DataFrame): The cost of living DataFrame returned in question 2.
303 | 
304 |     Returns:
305 |         DataFrame: The re-scaled cost of living DataFrame.
306 |     """
307 | 
308 |     ######################################################
309 |     # TODO: Your code goes here ...
310 |     ######################################################
311 | 
312 |     
313 |     
314 |     ######################################################
315 |     # NOTE: DO NOT MODIFY THE CODE BELOW ...
316 |     ######################################################
317 |     log("QUESTION 9", output_df=df, other=df.shape)
318 |     return df
319 | 
320 | 
321 | ######################################################
322 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ...
323 | ######################################################
324 | def question_10(jobs_df, cost_df):
325 |     """Merge the jobs and cost of living DataFrames.
326 | 
327 |     See the assignment spec for more details.
328 | 
329 |     Args:
330 |         jobs_df (DataFrame): The jobs DataFrame returned in question 8.
331 |         cost_df (DataFrame): The cost of living DataFrame returned in question 9.
332 | 
333 |     Returns:
334 |         DataFrame: The merged DataFrame.
335 |     """
336 | 
337 |     ######################################################
338 |     # TODO: Your code goes here ...
339 |     ######################################################
340 | 
341 |     
342 | 
343 |     ######################################################
344 |     # NOTE: DO NOT MODIFY THE CODE BELOW ...
345 |     ######################################################
346 |     log("QUESTION 10", output_df=df, other=df.shape)
347 |     return df
348 | 
349 | 
350 | ######################################################
351 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ...
352 | ######################################################
353 | def question_11(jobs_df):
354 |     """Create a pivot table of the average salary in AUD by country and 
355 |     experience rating.
356 | 
357 |     See the assignment spec for more details.
358 | 
359 |     Args:
360 |         jobs_df (DataFrame): The jobs DataFrame returned in question 10.
361 | 
362 |     Returns:
363 |         DataFrame: The pivot table.
364 |     """
365 | 
366 |     ######################################################
367 |     # TODO: Your code goes here ...
368 |     ######################################################
369 | 
370 | 
371 | 
372 |     ######################################################
373 |     # NOTE: DO NOT MODIFY THE CODE BELOW ...
374 |     ######################################################
375 |     log("QUESTION 11", output_df=None, other=df)
376 |     return df
377 | 
378 | 
379 | ######################################################
380 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ...
381 | ######################################################
382 | def question_12(jobs_df):
383 |     """Create a visualisation of data science jobs to help inform a decision
384 |     about where to live, based (minimally) on salary and cost of living.
385 | 
386 |     See the assignment spec for more details.
387 | 
388 |     Args:
389 |         jobs_df (DataFrame): The jobs DataFrame returned in question 10.
390 |     """
391 | 
392 |     ######################################################
393 |     # TODO: Your code goes here ...
394 |     ######################################################
395 | 
396 | 
397 |     ######################################################
398 |     # NOTE: DO NOT MODIFY THE CODE BELOW ...
399 |     ######################################################
400 |     plt.savefig(f"{studentid}-Q12.png")
401 | 
402 | 
403 | ######################################################
404 | # NOTE: DO NOT MODIFY THE MAIN FUNCTION BELOW ...
405 | ######################################################
406 | if __name__ == "__main__":
407 |     # data ingestion and cleaning
408 |     df1 = question_1("ds_jobs.csv")
409 |     df2 = question_2("cost_of_living.csv", 
410 |                      "https://www.cse.unsw.edu.au/~cs9321/24T1/ass1/cost_of_living.html")
411 |     df3 = question_3("exchange_rates.csv", 
412 |                      "https://www.cse.unsw.edu.au/~cs9321/24T1/ass1/exchange_rates.html")
413 |     df4 = question_4("country_codes.csv", 
414 |                      "https://www.cse.unsw.edu.au/~cs9321/24T1/ass1/country_codes.html")
415 | 
416 |     # data exploration
417 |     df5 = question_5(df1.copy(True))
418 | 
419 |     # data manipulation
420 |     df6 = question_6(df1.copy(True))
421 |     df7 = question_7(df6.copy(True), df4.copy(True))
422 |     df8 = question_8(df7.copy(True), df3.copy(True))
423 |     df9 = question_9(df2.copy(True))
424 |     df10 = question_10(df8.copy(True), df9.copy(True))
425 |     df11 = question_11(df10.copy(True))
426 | 
427 |     # data visualisation
428 |     question_12(df10.copy(True))
429 | 


--------------------------------------------------------------------------------
/24T1/ass2/zXXXXXXX.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | COMP9321 24T1 Assignment 2
  6 | Data publication as a RESTful service API
  7 | 
  8 | Getting Started
  9 | ---------------
 10 | 
 11 | 1. You MUST rename this file according to your zID, e.g., z1234567.py.
 12 | 
 13 | 2. To ensure your submission can be marked correctly, you're strongly encouraged
 14 |    to create a new virtual environment for this assignment.  Please see the
 15 |    instructions in the assignment 1 specification to create and activate a
 16 |    virtual environment.
 17 | 
 18 | 3. Once you have activated your virtual environment, you need to install the
 19 |    following, required packages:
 20 | 
 21 |    pip install python-dotenv==1.0.1
 22 |    pip install google-generativeai==0.4.1
 23 | 
 24 |    You may also use any of the packages we've used in the weekly labs.
 25 |    The most likely ones you'll want to install are:
 26 | 
 27 |    pip install flask==3.0.2
 28 |    pip install flask_restx==1.3.0
 29 |    pip install requests==2.31.0
 30 | 
 31 | 4. Create a file called `.env` in the same directory as this file.  This file
 32 |    will contain the Google API key you generatea in the next step.
 33 | 
 34 | 5. Go to the following page, click on the link to "Get an API key", and follow
 35 |    the instructions to generate an API key:
 36 | 
 37 |    https://ai.google.dev/tutorials/python_quickstart
 38 | 
 39 | 6. Add the following line to your `.env` file, replacing `your-api-key` with
 40 |    the API key you generated, and save the file:
 41 | 
 42 |    GOOGLE_API_KEY=your-api-key
 43 | 
 44 | 7. You can now start implementing your solution. You are free to edit this file how you like, but keep it readable
 45 |    such that a marker can read and understand your code if necessary for partial marks.
 46 | 
 47 | Submission
 48 | ----------
 49 | 
 50 | You need to submit this Python file and a `requirements.txt` file.
 51 | 
 52 | The `requirements.txt` file should list all the Python packages your code relies
 53 | on, and their versions.  You can generate this file by running the following
 54 | command while your virtual environment is active:
 55 | 
 56 | pip freeze > requirements.txt
 57 | 
 58 | You can submit the two files using the following command when connected to CSE,
 59 | and assuming the files are in the current directory (remember to replace `zid`
 60 | with your actual zID, i.e. the name of this file after renaming it):
 61 | 
 62 | give cs9321 assign2 zid.py requirements.txt
 63 | 
 64 | You can also submit through WebCMS3, using the tab at the top of the assignment
 65 | page.
 66 | 
 67 | """
 68 | 
 69 | # You can import more modules from the standard library here if you need them
 70 | # (which you will, e.g. sqlite3).
 71 | import os
 72 | from pathlib import Path
 73 | 
 74 | # You can import more third-party packages here if you need them, provided
 75 | # that they've been used in the weekly labs, or specified in this assignment,
 76 | # and their versions match.
 77 | from dotenv import load_dotenv          # Needed to load the environment variables from the .env file
 78 | import google.generativeai as genai     # Needed to access the Generative AI API
 79 | 
 80 | 
 81 | studentid = Path(__file__).stem         # Will capture your zID from the filename.
 82 | db_file   = f"{studentid}.db"           # Use this variable when referencing the SQLite database file.
 83 | txt_file  = f"{studentid}.txt"          # Use this variable when referencing the txt file for Q7.
 84 | 
 85 | 
 86 | # Load the environment variables from the .env file
 87 | load_dotenv()
 88 | 
 89 | # Configure the API key
 90 | genai.configure(api_key=os.environ["GOOGLE_API_KEY"])
 91 | 
 92 | # Create a Gemini Pro model
 93 | gemini = genai.GenerativeModel('gemini-pro')
 94 | 
 95 | if __name__ == "__main__":
 96 |     # Here's a quick example of using the Generative AI API:
 97 |     question = "Give me some facts about UNSW!"
 98 |     response = gemini.generate_content(question)
 99 |     print(question)
100 |     print(response.text)
101 | 


--------------------------------------------------------------------------------
/24T1/ass3/requirements.txt:
--------------------------------------------------------------------------------
  1 | anyio==4.3.0
  2 | appnope==0.1.4
  3 | argon2-cffi==23.1.0
  4 | argon2-cffi-bindings==21.2.0
  5 | arrow==1.3.0
  6 | asttokens==2.4.1
  7 | async-lru==2.0.4
  8 | attrs==23.2.0
  9 | Babel==2.14.0
 10 | beautifulsoup4==4.12.3
 11 | bleach==6.1.0
 12 | certifi==2024.2.2
 13 | cffi==1.16.0
 14 | charset-normalizer==3.3.2
 15 | comm==0.2.2
 16 | contourpy==1.2.0
 17 | cycler==0.12.1
 18 | debugpy==1.8.1
 19 | decorator==5.1.1
 20 | defusedxml==0.7.1
 21 | dnspython==2.5.0
 22 | executing==2.0.1
 23 | fastjsonschema==2.19.1
 24 | fonttools==4.49.0
 25 | fqdn==1.5.1
 26 | h11==0.14.0
 27 | httpcore==1.0.4
 28 | httpx==0.27.0
 29 | idna==3.6
 30 | imbalanced-learn==0.12.0
 31 | imblearn==0.0
 32 | ipykernel==6.29.3
 33 | ipython==8.22.2
 34 | isoduration==20.11.0
 35 | jedi==0.19.1
 36 | Jinja2==3.1.3
 37 | joblib==1.3.2
 38 | json5==0.9.24
 39 | jsonpointer==2.4
 40 | jsonschema==4.21.1
 41 | jsonschema-specifications==2023.12.1
 42 | jupyter-events==0.9.1
 43 | jupyter-lsp==2.2.4
 44 | jupyter_client==8.6.1
 45 | jupyter_core==5.7.2
 46 | jupyter_server==2.13.0
 47 | jupyter_server_terminals==0.5.3
 48 | jupyterlab==4.1.5
 49 | jupyterlab_pygments==0.3.0
 50 | jupyterlab_server==2.25.4
 51 | kiwisolver==1.4.5
 52 | lightgbm==4.3.0
 53 | lxml==5.1.0
 54 | MarkupSafe==2.1.5
 55 | matplotlib==3.8.2
 56 | matplotlib-inline==0.1.6
 57 | mistune==3.0.2
 58 | nbclient==0.10.0
 59 | nbconvert==7.16.2
 60 | nbformat==5.10.3
 61 | nest-asyncio==1.6.0
 62 | notebook==7.1.2
 63 | notebook_shim==0.2.4
 64 | numpy==1.26.0
 65 | overrides==7.7.0
 66 | packaging==23.2
 67 | pandas==2.2.0
 68 | pandocfilters==1.5.1
 69 | parso==0.8.3
 70 | pexpect==4.9.0
 71 | pillow==10.2.0
 72 | platformdirs==4.2.0
 73 | prometheus_client==0.20.0
 74 | prompt-toolkit==3.0.43
 75 | psutil==5.9.8
 76 | ptyprocess==0.7.0
 77 | pure-eval==0.2.2
 78 | pyarrow==15.0.1
 79 | pycparser==2.21
 80 | Pygments==2.17.2
 81 | pymongo==4.6.1
 82 | pyparsing==3.1.1
 83 | python-dateutil==2.8.2
 84 | python-json-logger==2.0.7
 85 | pytz==2024.1
 86 | PyYAML==6.0.1
 87 | pyzmq==25.1.2
 88 | rapidfuzz==3.6.1
 89 | referencing==0.34.0
 90 | requests==2.31.0
 91 | rfc3339-validator==0.1.4
 92 | rfc3986-validator==0.1.1
 93 | rpds-py==0.18.0
 94 | scikit-learn==1.4.1.post1
 95 | scipy==1.12.0
 96 | seaborn==0.13.2
 97 | Send2Trash==1.8.2
 98 | six==1.16.0
 99 | sniffio==1.3.1
100 | soupsieve==2.5
101 | stack-data==0.6.3
102 | terminado==0.18.1
103 | thefuzz==0.22.1
104 | threadpoolctl==3.3.0
105 | tinycss2==1.2.1
106 | tornado==6.4
107 | traitlets==5.14.2
108 | types-python-dateutil==2.9.0.20240316
109 | tzdata==2024.1
110 | uri-template==1.3.0
111 | urllib3==2.2.1
112 | wcwidth==0.2.13
113 | webcolors==1.13
114 | webencodings==0.5.1
115 | websocket-client==1.7.0
116 | xgboost==2.0.3
117 | 


--------------------------------------------------------------------------------
/Ass1_ChoroplethMap/Olympics_dataset.csv:
--------------------------------------------------------------------------------
  1 | Country,Num_games_s,Gold_s,Silver_s,Bronze_s,Total_s,Num_games_w,Gold_w,Silver_w,Bronze_w,Total_w,Num_games_t,Gold_t,Silver_t,Bronze_t,Total_t
  2 | Afghanistan,14.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0,2.0,2.0
  3 | Algeria,13.0,5.0,4.0,8.0,17.0,3.0,0.0,0.0,0.0,0.0,16.0,5.0,4.0,8.0,17.0
  4 | Argentina,24.0,21.0,25.0,28.0,74.0,19.0,0.0,0.0,0.0,0.0,43.0,21.0,25.0,28.0,74.0
  5 | Armenia,6.0,2.0,6.0,6.0,14.0,7.0,0.0,0.0,0.0,0.0,13.0,2.0,6.0,6.0,14.0
  6 | Australasia,2.0,3.0,4.0,5.0,12.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,4.0,5.0,12.0
  7 | Australia,26.0,147.0,163.0,187.0,497.0,19.0,5.0,5.0,5.0,15.0,45.0,152.0,168.0,192.0,512.0
  8 | Austria,27.0,18.0,33.0,36.0,87.0,23.0,64.0,81.0,87.0,232.0,50.0,82.0,114.0,123.0,319.0
  9 | Azerbaijan,6.0,7.0,11.0,25.0,43.0,6.0,0.0,0.0,0.0,0.0,12.0,7.0,11.0,25.0,43.0
 10 | Bahamas,16.0,6.0,2.0,6.0,14.0,0.0,0.0,0.0,0.0,0.0,16.0,6.0,2.0,6.0,14.0
 11 | Bahrain,9.0,2.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,9.0,2.0,1.0,0.0,3.0
 12 | Barbados,12.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,1.0,1.0
 13 | Belarus,6.0,12.0,27.0,39.0,78.0,7.0,8.0,5.0,5.0,18.0,13.0,20.0,32.0,44.0,96.0
 14 | Belgium,26.0,40.0,53.0,55.0,148.0,21.0,1.0,2.0,3.0,6.0,47.0,41.0,55.0,58.0,154.0
 15 | Bermuda,18.0,0.0,0.0,1.0,1.0,8.0,0.0,0.0,0.0,0.0,26.0,0.0,0.0,1.0,1.0
 16 | Bohemia,3.0,0.0,1.0,3.0,4.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,1.0,3.0,4.0
 17 | Botswana,10.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,1.0,0.0,1.0
 18 | Brazil,22.0,30.0,36.0,62.0,128.0,8.0,0.0,0.0,0.0,0.0,30.0,30.0,36.0,62.0,128.0
 19 | British West Indies,1.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,2.0
 20 | Bulgaria,20.0,51.0,87.0,80.0,218.0,20.0,1.0,2.0,3.0,6.0,40.0,52.0,89.0,83.0,224.0
 21 | Burundi,6.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,6.0,1.0,1.0,0.0,2.0
 22 | Cameroon,14.0,3.0,1.0,2.0,6.0,1.0,0.0,0.0,0.0,0.0,15.0,3.0,1.0,2.0,6.0
 23 | Canada,26.0,64.0,102.0,136.0,302.0,23.0,73.0,64.0,62.0,199.0,49.0,137.0,166.0,198.0,501.0
 24 | Chile,23.0,2.0,7.0,4.0,13.0,17.0,0.0,0.0,0.0,0.0,40.0,2.0,7.0,4.0,13.0
 25 | China,10.0,224.0,167.0,155.0,546.0,11.0,13.0,28.0,21.0,62.0,21.0,237.0,195.0,176.0,608.0
 26 | Colombia,19.0,5.0,9.0,14.0,28.0,2.0,0.0,0.0,0.0,0.0,21.0,5.0,9.0,14.0,28.0
 27 | Costa Rica,15.0,1.0,1.0,2.0,4.0,6.0,0.0,0.0,0.0,0.0,21.0,1.0,1.0,2.0,4.0
 28 | Ivory Coast,13.0,1.0,1.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,13.0,1.0,1.0,1.0,3.0
 29 | Croatia,7.0,11.0,10.0,12.0,33.0,8.0,4.0,6.0,1.0,11.0,15.0,15.0,16.0,13.0,44.0
 30 | Cuba,20.0,78.0,68.0,79.0,225.0,0.0,0.0,0.0,0.0,0.0,20.0,78.0,68.0,79.0,225.0
 31 | Cyprus,10.0,0.0,1.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,21.0,0.0,1.0,0.0,1.0
 32 | Czech Republic,6.0,15.0,17.0,24.0,56.0,7.0,9.0,11.0,11.0,31.0,13.0,24.0,28.0,35.0,87.0
 33 | Czechoslovakia,16.0,49.0,49.0,45.0,143.0,16.0,2.0,8.0,15.0,25.0,32.0,51.0,57.0,60.0,168.0
 34 | Denmark,27.0,45.0,74.0,75.0,194.0,14.0,0.0,1.0,0.0,1.0,41.0,45.0,75.0,75.0,195.0
 35 | Djibouti,8.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,1.0,1.0
 36 | Dominican Republic,14.0,3.0,2.0,2.0,7.0,0.0,0.0,0.0,0.0,0.0,14.0,3.0,2.0,2.0,7.0
 37 | Ecuador,14.0,1.0,1.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,15.0,1.0,1.0,0.0,2.0
 38 | Egypt,22.0,7.0,10.0,15.0,32.0,1.0,0.0,0.0,0.0,0.0,23.0,7.0,10.0,15.0,32.0
 39 | Eritrea,5.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,1.0,1.0
 40 | Estonia,12.0,9.0,9.0,16.0,34.0,10.0,4.0,2.0,1.0,7.0,22.0,13.0,11.0,17.0,41.0
 41 | Ethiopia,13.0,22.0,11.0,20.0,53.0,2.0,0.0,0.0,0.0,0.0,15.0,22.0,11.0,20.0,53.0
 42 | Fiji,14.0,1.0,0.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0,17.0,1.0,0.0,0.0,1.0
 43 | Finland,25.0,101.0,85.0,117.0,303.0,23.0,43.0,63.0,61.0,167.0,48.0,144.0,148.0,178.0,470.0
 44 | France,28.0,212.0,241.0,263.0,716.0,23.0,36.0,35.0,53.0,124.0,51.0,248.0,276.0,316.0,840.0
 45 | Gabon,10.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,1.0,0.0,1.0
 46 | Georgia,6.0,8.0,8.0,17.0,33.0,7.0,0.0,0.0,0.0,0.0,13.0,8.0,8.0,17.0,33.0
 47 | Germany,16.0,191.0,194.0,230.0,615.0,12.0,92.0,88.0,60.0,240.0,28.0,283.0,282.0,290.0,855.0
 48 | United Team of Germany,3.0,28.0,54.0,36.0,118.0,3.0,8.0,6.0,5.0,19.0,6.0,36.0,60.0,41.0,137.0
 49 | East Germany,5.0,153.0,129.0,127.0,409.0,6.0,39.0,36.0,35.0,110.0,11.0,192.0,165.0,162.0,519.0
 50 | West Germany,5.0,56.0,67.0,81.0,204.0,6.0,11.0,15.0,13.0,39.0,11.0,67.0,82.0,94.0,243.0
 51 | Ghana,14.0,0.0,1.0,3.0,4.0,2.0,0.0,0.0,0.0,0.0,16.0,0.0,1.0,3.0,4.0
 52 | Great Britain,28.0,263.0,295.0,291.0,849.0,23.0,11.0,4.0,16.0,31.0,51.0,274.0,299.0,307.0,880.0
 53 | Greece,28.0,33.0,43.0,40.0,116.0,19.0,0.0,0.0,0.0,0.0,47.0,33.0,43.0,40.0,116.0
 54 | Grenada,9.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,9.0,1.0,1.0,0.0,2.0
 55 | Guatemala,14.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,15.0,0.0,1.0,0.0,1.0
 56 | Guyana,17.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,17.0,0.0,0.0,1.0,1.0
 57 | Haiti,15.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,15.0,0.0,1.0,1.0,2.0
 58 | Hong Kong,16.0,1.0,1.0,1.0,3.0,5.0,0.0,0.0,0.0,0.0,21.0,1.0,1.0,1.0,3.0
 59 | Hungary,26.0,175.0,147.0,169.0,491.0,23.0,1.0,2.0,4.0,7.0,49.0,176.0,149.0,173.0,498.0
 60 | Iceland,20.0,0.0,2.0,2.0,4.0,18.0,0.0,0.0,0.0,0.0,38.0,0.0,2.0,2.0,4.0
 61 | India,24.0,9.0,7.0,12.0,28.0,10.0,0.0,0.0,0.0,0.0,34.0,9.0,7.0,12.0,28.0
 62 | Indonesia,15.0,7.0,13.0,12.0,32.0,0.0,0.0,0.0,0.0,0.0,15.0,7.0,13.0,12.0,32.0
 63 | Iran,16.0,19.0,22.0,28.0,69.0,11.0,0.0,0.0,0.0,0.0,27.0,19.0,22.0,28.0,69.0
 64 | Iraq,14.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0,1.0,1.0
 65 | Ireland,21.0,9.0,10.0,12.0,31.0,7.0,0.0,0.0,0.0,0.0,28.0,9.0,10.0,12.0,31.0
 66 | Israel,16.0,1.0,1.0,7.0,9.0,7.0,0.0,0.0,0.0,0.0,23.0,1.0,1.0,7.0,9.0
 67 | Italy,27.0,206.0,178.0,193.0,577.0,23.0,40.0,36.0,48.0,124.0,50.0,246.0,214.0,241.0,701.0
 68 | Jamaica,17.0,22.0,35.0,20.0,77.0,8.0,0.0,0.0,0.0,0.0,25.0,22.0,35.0,20.0,77.0
 69 | Japan,22.0,142.0,135.0,162.0,439.0,21.0,14.0,22.0,22.0,58.0,43.0,156.0,157.0,184.0,497.0
 70 | Jordan,10.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,10.0,1.0,0.0,0.0,1.0
 71 | Kazakhstan,6.0,15.0,20.0,27.0,62.0,7.0,1.0,3.0,4.0,8.0,13.0,16.0,23.0,31.0,70.0
 72 | Kenya,14.0,31.0,38.0,33.0,102.0,4.0,0.0,0.0,0.0,0.0,18.0,31.0,38.0,33.0,102.0
 73 | Kosovo,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,1.0
 74 | North Korea,10.0,16.0,16.0,22.0,54.0,9.0,0.0,1.0,1.0,2.0,19.0,16.0,17.0,23.0,56.0
 75 | South Korea,17.0,90.0,87.0,90.0,267.0,18.0,31.0,25.0,14.0,70.0,35.0,121.0,112.0,104.0,337.0
 76 | Kuwait,12.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,2.0,2.0
 77 | Kyrgyzstan,6.0,0.0,1.0,3.0,4.0,7.0,0.0,0.0,0.0,0.0,13.0,0.0,1.0,3.0,4.0
 78 | Latvia,11.0,3.0,11.0,5.0,19.0,11.0,0.0,4.0,4.0,8.0,22.0,3.0,15.0,9.0,27.0
 79 | Lebanon,17.0,0.0,2.0,2.0,4.0,17.0,0.0,0.0,0.0,0.0,34.0,0.0,2.0,2.0,4.0
 80 | Liechtenstein,17.0,0.0,0.0,0.0,0.0,19.0,2.0,2.0,6.0,10.0,36.0,2.0,2.0,6.0,10.0
 81 | Lithuania,9.0,6.0,7.0,12.0,25.0,9.0,0.0,0.0,0.0,0.0,18.0,6.0,7.0,12.0,25.0
 82 | Luxembourg,23.0,1.0,1.0,0.0,2.0,9.0,0.0,2.0,0.0,2.0,32.0,1.0,3.0,0.0,4.0
 83 | Macedonia,6.0,0.0,0.0,1.0,1.0,6.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,1.0,1.0
 84 | Malaysia,13.0,0.0,7.0,4.0,11.0,1.0,0.0,0.0,0.0,0.0,14.0,0.0,7.0,4.0,11.0
 85 | Mauritius,9.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,1.0,1.0
 86 | Mexico,23.0,13.0,24.0,32.0,69.0,9.0,0.0,0.0,0.0,0.0,32.0,13.0,24.0,32.0,69.0
 87 | Moldova,6.0,0.0,2.0,3.0,5.0,7.0,0.0,0.0,0.0,0.0,13.0,0.0,2.0,3.0,5.0
 88 | Mongolia,13.0,2.0,10.0,14.0,26.0,14.0,0.0,0.0,0.0,0.0,27.0,2.0,10.0,14.0,26.0
 89 | Montenegro,3.0,0.0,1.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0,6.0,0.0,1.0,0.0,1.0
 90 | Morocco,14.0,6.0,5.0,12.0,23.0,7.0,0.0,0.0,0.0,0.0,21.0,6.0,5.0,12.0,23.0
 91 | Mozambique,10.0,1.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,10.0,1.0,0.0,1.0,2.0
 92 | Namibia,7.0,0.0,4.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,4.0,0.0,4.0
 93 | Netherlands,26.0,85.0,92.0,108.0,285.0,21.0,45.0,44.0,41.0,130.0,47.0,130.0,136.0,149.0,415.0
 94 | Netherlands Antilles,13.0,0.0,1.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,15.0,0.0,1.0,0.0,1.0
 95 | New Zealand,23.0,46.0,27.0,44.0,117.0,16.0,0.0,1.0,2.0,3.0,39.0,46.0,28.0,46.0,120.0
 96 | Niger,12.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0,1.0,1.0,2.0
 97 | Nigeria,16.0,3.0,10.0,12.0,25.0,1.0,0.0,0.0,0.0,0.0,17.0,3.0,10.0,12.0,25.0
 98 | Norway,25.0,56.0,49.0,47.0,152.0,23.0,132.0,125.0,111.0,368.0,48.0,188.0,174.0,158.0,520.0
 99 | Pakistan,17.0,3.0,3.0,4.0,10.0,3.0,0.0,0.0,0.0,0.0,20.0,3.0,3.0,4.0,10.0
100 | Panama,17.0,1.0,0.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0,17.0,1.0,0.0,2.0,3.0
101 | Paraguay,12.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,13.0,0.0,1.0,0.0,1.0
102 | Peru,18.0,1.0,3.0,0.0,4.0,2.0,0.0,0.0,0.0,0.0,20.0,1.0,3.0,0.0,4.0
103 | Philippines,21.0,0.0,3.0,7.0,10.0,5.0,0.0,0.0,0.0,0.0,26.0,0.0,3.0,7.0,10.0
104 | Poland,21.0,68.0,83.0,133.0,284.0,23.0,7.0,7.0,8.0,22.0,44.0,75.0,90.0,141.0,306.0
105 | Portugal,24.0,4.0,8.0,12.0,24.0,8.0,0.0,0.0,0.0,0.0,32.0,4.0,8.0,12.0,24.0
106 | Puerto Rico,18.0,1.0,2.0,6.0,9.0,7.0,0.0,0.0,0.0,0.0,25.0,1.0,2.0,6.0,9.0
107 | Qatar,9.0,0.0,1.0,4.0,5.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,1.0,4.0,5.0
108 | Romania,21.0,89.0,95.0,122.0,306.0,21.0,0.0,0.0,1.0,1.0,42.0,89.0,95.0,123.0,307.0
109 | Russia,6.0,149.0,124.0,153.0,426.0,6.0,47.0,38.0,35.0,120.0,12.0,196.0,162.0,188.0,546.0
110 | Russian Empire,3.0,1.0,4.0,3.0,8.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0,4.0,3.0,8.0
111 | Soviet Union,9.0,395.0,319.0,296.0,1010.0,9.0,78.0,57.0,59.0,194.0,18.0,473.0,376.0,355.0,1204.0
112 | Saudi Arabia,11.0,0.0,1.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0,11.0,0.0,1.0,2.0,3.0
113 | Samoa,9.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,1.0,0.0,1.0
114 | Senegal,14.0,0.0,1.0,0.0,1.0,5.0,0.0,0.0,0.0,0.0,19.0,0.0,1.0,0.0,1.0
115 | Serbia,4.0,3.0,6.0,6.0,15.0,3.0,0.0,0.0,0.0,0.0,7.0,3.0,6.0,6.0,15.0
116 | Serbia and Montenegro,1.0,0.0,2.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,2.0,0.0,2.0
117 | Singapore,16.0,1.0,2.0,2.0,5.0,1.0,0.0,0.0,0.0,0.0,17.0,1.0,2.0,2.0,5.0
118 | Slovakia,6.0,9.0,12.0,7.0,28.0,7.0,3.0,4.0,1.0,8.0,13.0,12.0,16.0,8.0,36.0
119 | Slovenia,7.0,5.0,8.0,10.0,23.0,8.0,2.0,5.0,10.0,17.0,15.0,7.0,13.0,20.0,40.0
120 | South Africa,19.0,26.0,31.0,29.0,86.0,7.0,0.0,0.0,0.0,0.0,26.0,26.0,31.0,29.0,86.0
121 | Spain,23.0,45.0,64.0,41.0,150.0,20.0,1.0,0.0,3.0,4.0,43.0,46.0,64.0,44.0,154.0
122 | Sri Lanka,17.0,0.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,17.0,0.0,2.0,0.0,2.0
123 | Sudan,12.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0,1.0,0.0,1.0
124 | Suriname,12.0,1.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,12.0,1.0,0.0,1.0,2.0
125 | Sweden,27.0,145.0,170.0,179.0,494.0,23.0,57.0,46.0,55.0,158.0,50.0,202.0,216.0,234.0,652.0
126 | Switzerland,28.0,50.0,75.0,67.0,192.0,23.0,55.0,46.0,52.0,153.0,51.0,105.0,121.0,119.0,345.0
127 | Syria,13.0,1.0,1.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,13.0,1.0,1.0,1.0,3.0
128 | Chinese Taipei,14.0,5.0,7.0,12.0,24.0,12.0,0.0,0.0,0.0,0.0,26.0,5.0,7.0,12.0,24.0
129 | Tajikistan,6.0,1.0,1.0,2.0,4.0,4.0,0.0,0.0,0.0,0.0,10.0,1.0,1.0,2.0,4.0
130 | Tanzania,13.0,0.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,13.0,0.0,2.0,0.0,2.0
131 | Thailand,16.0,9.0,8.0,16.0,33.0,4.0,0.0,0.0,0.0,0.0,20.0,9.0,8.0,16.0,33.0
132 | Togo,10.0,0.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,1.0,1.0
133 | Tonga,9.0,0.0,1.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,11.0,0.0,1.0,0.0,1.0
134 | Trinidad and Tobago,17.0,2.0,6.0,11.0,19.0,3.0,0.0,0.0,0.0,0.0,20.0,2.0,6.0,11.0,19.0
135 | Tunisia,14.0,4.0,2.0,7.0,13.0,0.0,0.0,0.0,0.0,0.0,14.0,4.0,2.0,7.0,13.0
136 | Turkey,22.0,39.0,24.0,28.0,91.0,17.0,0.0,0.0,0.0,0.0,39.0,39.0,24.0,28.0,91.0
137 | Uganda,15.0,2.0,3.0,2.0,7.0,0.0,0.0,0.0,0.0,0.0,15.0,2.0,3.0,2.0,7.0
138 | Ukraine,6.0,35.0,30.0,56.0,121.0,7.0,3.0,1.0,4.0,8.0,13.0,38.0,31.0,60.0,129.0
139 | United Arab Emirates,9.0,1.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,9.0,1.0,0.0,1.0,2.0
140 | United States,27.0,1022.0,795.0,705.0,2522.0,23.0,105.0,110.0,90.0,305.0,50.0,1127.0,905.0,795.0,2827.0
141 | Uruguay,21.0,2.0,2.0,6.0,10.0,1.0,0.0,0.0,0.0,0.0,22.0,2.0,2.0,6.0,10.0
142 | Uzbekistan,6.0,8.0,6.0,17.0,31.0,7.0,1.0,0.0,0.0,1.0,13.0,9.0,6.0,17.0,32.0
143 | Venezuela,18.0,2.0,3.0,10.0,15.0,4.0,0.0,0.0,0.0,0.0,22.0,2.0,3.0,10.0,15.0
144 | Vietnam,15.0,1.0,3.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,15.0,1.0,3.0,0.0,4.0
145 | Virgin Islands,12.0,0.0,1.0,0.0,1.0,7.0,0.0,0.0,0.0,0.0,19.0,0.0,1.0,0.0,1.0
146 | Yugoslavia,18.0,28.0,31.0,31.0,90.0,16.0,0.0,3.0,1.0,4.0,34.0,28.0,34.0,32.0,94.0
147 | Zambia,13.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,13.0,0.0,1.0,1.0,2.0
148 | Zimbabwe,13.0,3.0,4.0,1.0,8.0,1.0,0.0,0.0,0.0,0.0,14.0,3.0,4.0,1.0,8.0
149 | Unified Team,1.0,45.0,38.0,29.0,112.0,1.0,9.0,6.0,8.0,23.0,2.0,54.0,44.0,37.0,135.0
150 | Independent Olympic Athletes,3.0,1.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0,0.0,1.0,2.0
151 | Independent Olympic Participants,1.0,0.0,1.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,2.0,3.0
152 | Olympic Athletes from Russia,0.0,0.0,0.0,0.0,0.0,1.0,2.0,6.0,9.0,17.0,1.0,2.0,6.0,9.0,17.0
153 | Mixed team,3.0,8.0,5.0,4.0,17.0,0.0,0.0,0.0,0.0,0.0,3.0,8.0,5.0,4.0,17.0
154 | 


--------------------------------------------------------------------------------
/Ass1_ChoroplethMap/README.md:
--------------------------------------------------------------------------------
  1 | # Choropleth Map of Olympics Medals
  2 | 
  3 | In this activity, we will use the [Plotly](https://plot.ly/) library to create an 
  4 | interactive choropleth map of Olympics medals. We will then make use of the 
  5 | [Dash](https://plot.ly/products/dash/) library to create a very simple web app to 
  6 | display the map.
  7 | 
  8 | ## Getting Started
  9 | 
 10 | Install the required packages using the following command
 11 | 
 12 | ```
 13 | pip install -r requirements.txt
 14 | ```
 15 | 
 16 | ## Creating an interactive choropleth map
 17 | 
 18 | Here we use the Olympics dataset that has already been tidied up. 
 19 | We generally pass in two arguments into the plotly plot function
 20 | 
 21 | * `data` is used to pass in the data to be plotted
 22 | * `layout` is used to customise the layout such as setting the title and font
 23 | 
 24 | ```python
 25 | import pandas as pd
 26 | import plotly.offline as py
 27 | 
 28 | df = pd.read_csv("Olympics_dataset.csv", thousands=",")
 29 | 
 30 | data = [dict(
 31 |     type="choropleth",
 32 |     locations=df["Country"],
 33 |     locationmode="country names",
 34 |     z=df["Total_t"],
 35 |     colorbar=dict(title="Number of medals")
 36 | )]
 37 | 
 38 | layout = dict(
 39 |     title="All-time Olympic Games medal"
 40 | )
 41 | 
 42 | fig = dict(data=data, layout=layout)
 43 | py.plot(fig, filename='cloropleth_map.html')
 44 | ```
 45 | 
 46 | ### Customising the choropleth map
 47 | 
 48 | We can change the colour scale by passing in a list of colour scales
 49 | 
 50 | ```python
 51 | data = [dict(
 52 |     type="choropleth",
 53 |     locations=df["Country"],
 54 |     locationmode="country names",
 55 |     z=df["Total_t"],
 56 |     colorscale=[[0, "rgb(5, 10, 172)"], [0.35, "rgb(40, 60, 190)"], [0.5, "rgb(70, 100, 245)"],
 57 |                 [0.6, "rgb(90, 120, 245)"], [0.7, "rgb(106, 137, 247)"], [1, "rgb(220, 220, 220)"]],
 58 |     autocolorscale=False,
 59 |     reversescale=True,
 60 | )]
 61 | ```
 62 | 
 63 | You can look for more customisation on the [reference page](https://plot.ly/python/reference/#choropleth) for choropleth maps
 64 | 
 65 | ## Creating a simple web app
 66 | 
 67 | Using the `data` and `layout` variables from above, we can create a very basic web app with a few lines of code. 
 68 | Create a file named `app.py` with the following code
 69 | 
 70 | ```python
 71 | import dash
 72 | import dash_core_components as dcc
 73 | import dash_html_components as html
 74 | 
 75 | app = dash.Dash(__name__)
 76 | app.layout = html.Div([
 77 |     dcc.Graph(
 78 |         id="medals_graph",
 79 |         figure={
 80 |             "data": data,
 81 |             "layout": layout
 82 |         }
 83 |     ),
 84 | ])
 85 | 
 86 | if __name__ == '__main__':
 87 |     app.run_server(debug=True)
 88 | ```
 89 | 
 90 | Run it with `python app.py` and visit [http:127.0.0.1:8050/](http:127.0.0.1:8050/) in your web browser.
 91 | You should be able to see your app
 92 | 
 93 | ### Adding a basic callback function
 94 | 
 95 | Let's add some radio buttons to show the medals in different Olympics games using `RadioItems`.
 96 | Create a new file or replace `app.py` as follow
 97 | 
 98 | ```python
 99 | import pandas as pd
100 | import dash
101 | import dash_core_components as dcc
102 | import dash_html_components as html
103 | 
104 | df = pd.read_csv("Olympics_dataset.csv", skipinitialspace=True, thousands=",")
105 | 
106 | app = dash.Dash(__name__)
107 | app.layout = html.Div([
108 |     dcc.Graph(id="medals_graph"),
109 | 
110 |     html.Div([
111 |         html.H4("Games"),
112 | 
113 |         dcc.RadioItems(
114 |             id="game_type",
115 |             options=[
116 |                 {"label": "Combined Total", "value": "combined"},
117 |                 {"label": "Summer Games", "value": "summer"},
118 |                 {"label": "Winter Games", "value": "winter"}],
119 |             value="combined"
120 |         )]
121 |     )
122 | ])
123 | 
124 | if __name__ == '__main__':
125 |     app.run_server(debug=True)
126 | ```
127 | 
128 | And we need to create a callback function to handle the input from the radio buttons. 
129 | We set the column name based on the radio button input to extract the relevant data from the dataframe.
130 | 
131 | ```python
132 | @app.callback(
133 |     dash.dependencies.Output("medals_graph", "figure"),
134 |     [dash.dependencies.Input("game_type", "value")])
135 | def update_figure(game_type):
136 |     if game_type == "summer":
137 |         column_name = "Total_s"
138 |     elif game_type == "winter":
139 |         column_name = "Total_w"
140 |     else:
141 |         column_name = "Total_t"
142 | 
143 |     data = [dict(
144 |         type="choropleth",
145 |         locations=df["Country"],
146 |         locationmode="country names",
147 |         z=df[column_name],
148 |         colorbar=dict(title="Number of medals")
149 |     )]
150 | 
151 |     layout = dict(
152 |         title="All-time Olympic Games medal"
153 |     )
154 | 
155 |     return {"data": data, "layout": layout}
156 | ```
157 | 
158 | ### Challenge
159 | 
160 | Add another set of radio buttons of medal types to choose from. 
161 | [Here](https://comp9321-ass1-extra.herokuapp.com/) is an example of the final web app.
162 | 
163 | ## References
164 | 
165 | * https://plot.ly/python/choropleth-maps/
166 | * https://dash.plot.ly/getting-started-part-2


--------------------------------------------------------------------------------
/Ass1_ChoroplethMap/app.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import dash
  3 | import dash_core_components as dcc
  4 | import dash_html_components as html
  5 | import pandas as pd
  6 | 
  7 | df = pd.read_csv("Olympics_dataset.csv", skipinitialspace=True, thousands=",")
  8 | 
  9 | app = dash.Dash(__name__)
 10 | server = app.server
 11 | 
 12 | app.layout = html.Div([
 13 |     dcc.Graph(id="medals_graph"),
 14 | 
 15 |     html.Div([
 16 |         html.H4("Games"),
 17 | 
 18 |         dcc.RadioItems(
 19 |             id="count_type",
 20 |             options=[
 21 |                 {"label": "Combined Total", "value": "combined"},
 22 |                 {"label": "Summer Games", "value": "summer"},
 23 |                 {"label": "Winter Games", "value": "winter"}],
 24 |             value="combined"
 25 |         )],
 26 | 
 27 |         style={'width': '48%', 'display': 'inline-block'}
 28 |     ),
 29 | 
 30 |     html.Div([
 31 |         html.H4("Medals"),
 32 | 
 33 |         dcc.RadioItems(
 34 |             id="medal_type",
 35 |             options=[
 36 |                 {"label": "All medals", "value": "all"},
 37 |                 {"label": "Gold", "value": "gold"},
 38 |                 {"label": "Silver", "value": "silver"},
 39 |                 {"label": "Bronze", "value": "bronze"}
 40 |             ],
 41 |             value="all"
 42 |         )],
 43 | 
 44 |         style={'width': '48%', 'float': 'right', 'display': 'inline-block'}
 45 |     )
 46 | ])
 47 | 
 48 | 
 49 | @app.callback(
 50 |     dash.dependencies.Output("medals_graph", "figure"),
 51 |     [dash.dependencies.Input("count_type", "value"),
 52 |      dash.dependencies.Input("medal_type", "value")])
 53 | def update_figure(count_type, medal_type):
 54 |     if medal_type == "all":
 55 |         column_name = "Total_"
 56 |     elif medal_type == "gold":
 57 |         column_name = "Gold_"
 58 |     elif medal_type == "silver":
 59 |         column_name = "Silver_"
 60 |     else:
 61 |         column_name = "Bronze_"
 62 | 
 63 |     if count_type == "summer":
 64 |         column_name += "s"
 65 |     elif count_type == "winter":
 66 |         column_name += "w"
 67 |     else:
 68 |         column_name += "t"
 69 | 
 70 |     data = [dict(
 71 |         type="choropleth",
 72 |         locations=df["Country"],
 73 |         locationmode="country names",
 74 |         z=df[column_name],
 75 |         colorscale=[[0, "rgb(5, 10, 172)"], [0.35, "rgb(40, 60, 190)"], [0.5, "rgb(70, 100, 245)"],
 76 |                     [0.6, "rgb(90, 120, 245)"], [0.7, "rgb(106, 137, 247)"], [1, "rgb(220, 220, 220)"]],
 77 |         autocolorscale=False,
 78 |         reversescale=True,
 79 |         marker=dict(
 80 |             line=dict(
 81 |                 color="rgb(180,180,180)",
 82 |                 width=0.5
 83 |             )),
 84 |         colorbar=dict(
 85 |             autotick=False,
 86 |             title="Number of medals"),
 87 |     )]
 88 | 
 89 |     layout = dict(
 90 |         title="All-time Olympic Games medal",
 91 |         geo=dict(
 92 |             showframe=False,
 93 |             showcoastlines=False,
 94 |             projection=dict(
 95 |                 type="Mercator"
 96 |             )
 97 |         )
 98 |     )
 99 | 
100 |     return {"data": data, "layout": layout}
101 | 
102 | 
103 | if __name__ == "__main__":
104 |     app.run_server(debug=True)
105 | 


--------------------------------------------------------------------------------
/Ass1_ChoroplethMap/requirements.txt:
--------------------------------------------------------------------------------
1 | dash
2 | dash-renderer
3 | dash-core-components
4 | dash-html-components
5 | plotly
6 | pandas


--------------------------------------------------------------------------------
/Jenkinsfile:
--------------------------------------------------------------------------------
 1 | pipeline {
 2 |     agent any
 3 |     triggers {
 4 |         pollSCM('') //Empty quotes tells it to build on a push
 5 |     }
 6 |     stages {
 7 |         stage('Build') {
 8 |             steps {
 9 |                 echo 'Building..'
10 |             }
11 |         }
12 |         stage('Test') {
13 |             steps {
14 |                 echo 'Testing..'
15 |             }
16 |         }
17 |         stage('Deploy') {
18 |             steps {
19 |                 echo 'Deploying....'
20 |             }
21 |         }
22 |     }
23 | }
24 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Code Repository for COMP9321, Data-Services-Engineering UNSW
2 | 


--------------------------------------------------------------------------------
/Week10_Regression_and_Clustering/activity_1.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from sklearn import linear_model
 3 | from sklearn.metrics import mean_squared_error
 4 | from sklearn.utils import shuffle
 5 | 
 6 | 
 7 | def load_diet(diet_path, split_percentage):
 8 |     df = pd.read_csv(diet_path, index_col=0)
 9 | 
10 |     df = shuffle(df)
11 |     diet_x = df.drop('weight6weeks', axis=1).values
12 |     diet_y = df['weight6weeks'].values
13 | 
14 |     # Split the dataset in train and test data
15 |     # A random permutation, to split the data randomly
16 | 
17 |     split_point = int(len(diet_x) * split_percentage)
18 |     diet_X_train = diet_x[:split_point]
19 |     diet_y_train = diet_y[:split_point]
20 |     diet_X_test = diet_x[split_point:]
21 |     diet_y_test = diet_y[split_point:]
22 | 
23 |     return diet_X_train, diet_y_train, diet_X_test, diet_y_test
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     diet_X_train, diet_y_train, diet_X_test, diet_y_test = load_diet("diet.csv", split_percentage=0.7)
28 |     model = linear_model.LinearRegression()
29 |     # model = linear_model.BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True,
30 |     #                                    fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300,
31 |     #                                    normalize=False, tol=0.001, verbose=False)
32 |     model.fit(diet_X_train, diet_y_train)
33 | 
34 |     y_pred = model.predict(diet_X_test)
35 | 
36 |     for i in range(len(diet_y_test)):
37 |         print("Expected:", diet_y_test[i], "Predicted:", y_pred[i])
38 | 
39 |     # The mean squared error
40 |     print("Mean squared error: %.2f"
41 |           % mean_squared_error(diet_y_test, y_pred))
42 | 


--------------------------------------------------------------------------------
/Week10_Regression_and_Clustering/activity_2.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import pandas as pd
 3 | from sklearn.cluster import KMeans
 4 | from sklearn.utils import shuffle
 5 | 
 6 | 
 7 | def load_iris(iris_path):
 8 |     df = pd.read_csv(iris_path)
 9 | 
10 |     df = shuffle(df)
11 |     df_without_label = df.drop('species', axis=1)
12 |     return df, df_without_label
13 | 
14 | 
15 | if __name__ == '__main__':
16 |     csv_file = 'iris.csv'
17 | 
18 |     # Split the data into test and train parts
19 |     df, df_without_label = load_iris(csv_file)
20 |     # Fit a k-means estimator
21 |     estimator = KMeans(n_clusters=3)
22 |     estimator.fit(df_without_label)
23 |     # Clusters are given in the labels_ attribute
24 |     labels = estimator.labels_
25 |     df['cluster'] = pd.Series(labels, index=df.index)
26 | 
27 |     print(labels)
28 |     # divide the dataset into three dataframes based on the species
29 |     cluster_0_df = df.query('cluster == 0')
30 |     cluster_1_df = df.query('cluster == 1')
31 |     cluster_2_df = df.query('cluster == 2')
32 | 
33 |     fig, axes = plt.subplots(nrows=1, ncols=1)
34 |     fig.set_size_inches(18.5, 10.5)
35 |     fig.tight_layout()
36 | 
37 |     ax = cluster_0_df.plot.scatter(x='petal_length', y='petal_width', label='Cluster-0', color='blue', ax=axes)
38 |     ax = cluster_1_df.plot.scatter(x='petal_length', y='petal_width', label='Cluster-1', color='red', ax=ax)
39 |     ax = cluster_2_df.plot.scatter(x='petal_length', y='petal_width', label='Cluster-2', color='green', ax=ax)
40 | 
41 |     for i, label in enumerate(df['species']):
42 | 
43 |         label = label[0:4]
44 |         ax.annotate(label, (list(df['petal_length'])[i], list(df['petal_width'])[i]), color='gray', fontsize=9,
45 |                     horizontalalignment='left',
46 |                     verticalalignment='bottom')
47 | 
48 |     plt.show()
49 | 


--------------------------------------------------------------------------------
/Week10_Regression_and_Clustering/activity_3.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import pandas as pd
 3 | from sklearn.cluster import KMeans, SpectralClustering, AgglomerativeClustering
 4 | from sklearn.utils import shuffle
 5 | 
 6 | 
 7 | def load_dataset(dataset_path):
 8 |     df = pd.read_csv(dataset_path)
 9 | 
10 |     df = shuffle(df)
11 |     df_without_label = df.drop('Diet', axis=1)
12 |     return df, df_without_label
13 | 
14 | 
15 | if __name__ == '__main__':
16 |     csv_file = 'diet.csv'
17 | 
18 |     # Split the data into test and train parts
19 |     df, df_without_label = load_dataset(csv_file)
20 |     # Fit a k-means estimator
21 |     estimator = AgglomerativeClustering(n_clusters=3)
22 |     estimator.fit(df_without_label)
23 |     # Clusters are given in the labels_ attribute
24 |     labels = estimator.labels_
25 |     df['cluster'] = pd.Series(labels, index=df.index)
26 | 
27 |     print(labels)
28 |     # divide the dataset into three dataframes based on the species
29 |     cluster_0_df = df.query('cluster == 0')
30 |     cluster_1_df = df.query('cluster == 1')
31 |     cluster_2_df = df.query('cluster == 2')
32 | 
33 |     fig, axes = plt.subplots(nrows=1, ncols=1)
34 |     fig.set_size_inches(18.5, 10.5)
35 |     fig.tight_layout()
36 | 
37 |     ax = cluster_0_df.plot.scatter(x='pre.weight', y='weight6weeks', label='Cluster-0', color='blue', ax=axes)
38 |     ax = cluster_1_df.plot.scatter(x='pre.weight', y='weight6weeks', label='Cluster-1', color='red', ax=ax)
39 |     ax = cluster_2_df.plot.scatter(x='pre.weight', y='weight6weeks', label='Cluster-2', color='green', ax=ax)
40 | 
41 |     for i, label in enumerate(df['Diet']):
42 | 
43 |         label = "Diet_" + str(label)
44 |         ax.annotate(label, (list(df['pre.weight'])[i], list(df['weight6weeks'])[i]), color='gray', fontsize=9,
45 |                     horizontalalignment='left',
46 |                     verticalalignment='bottom')
47 | 
48 |     plt.show()
49 | 


--------------------------------------------------------------------------------
/Week10_Regression_and_Clustering/diet.csv:
--------------------------------------------------------------------------------
 1 | Person,gender,Age,Height,pre.weight,Diet,weight6weeks
 2 | 25,0,41,171,60,2,60
 3 | 26,0,32,174,103,2,103
 4 | 1,0,22,159,58,1,54.2
 5 | 2,0,46,192,60,1,54
 6 | 3,0,55,170,64,1,63.3
 7 | 4,0,33,171,64,1,61.1
 8 | 5,0,50,170,65,1,62.2
 9 | 6,0,50,201,66,1,64
10 | 7,0,37,174,67,1,65
11 | 8,0,28,176,69,1,60.5
12 | 9,0,28,165,70,1,68.1
13 | 10,0,45,165,70,1,66.9
14 | 11,0,60,173,72,1,70.5
15 | 12,0,48,156,72,1,69
16 | 13,0,41,163,72,1,68.4
17 | 14,0,37,167,82,1,81.1
18 | 27,0,44,174,58,2,60.1
19 | 28,0,37,172,58,2,56
20 | 29,0,41,165,59,2,57.3
21 | 30,0,43,171,61,2,56.7
22 | 31,0,20,169,62,2,55
23 | 32,0,51,174,63,2,62.4
24 | 33,0,31,163,63,2,60.3
25 | 34,0,54,173,63,2,59.4
26 | 35,0,50,166,65,2,62
27 | 36,0,48,163,66,2,64
28 | 37,0,16,165,68,2,63.8
29 | 38,0,37,167,68,2,63.3
30 | 39,0,30,161,76,2,72.7
31 | 40,0,29,169,77,2,77.5
32 | 52,0,51,165,60,3,53
33 | 53,0,35,169,62,3,56.4
34 | 54,0,21,159,64,3,60.6
35 | 55,0,22,169,65,3,58.2
36 | 56,0,36,160,66,3,58.2
37 | 57,0,20,169,67,3,61.6
38 | 58,0,35,163,67,3,60.2
39 | 59,0,45,155,69,3,61.8
40 | 60,0,58,141,70,3,63
41 | 61,0,37,170,70,3,62.7
42 | 62,0,31,170,72,3,71.1
43 | 63,0,35,171,72,3,64.4
44 | 64,0,56,171,73,3,68.9
45 | 65,0,48,153,75,3,68.7
46 | 66,0,41,157,76,3,71
47 | 15,1,39,168,71,1,71.6
48 | 16,1,31,158,72,1,70.9
49 | 17,1,40,173,74,1,69.5
50 | 18,1,50,160,78,1,73.9
51 | 19,1,43,162,80,1,71
52 | 20,1,25,165,80,1,77.6
53 | 21,1,52,177,83,1,79.1
54 | 22,1,42,166,85,1,81.5
55 | 23,1,39,166,87,1,81.9
56 | 24,1,40,190,88,1,84.5
57 | 41,1,51,191,71,2,66.8
58 | 42,1,38,199,75,2,72.6
59 | 43,1,54,196,75,2,69.2
60 | 44,1,33,190,76,2,72.5
61 | 45,1,45,160,78,2,72.7
62 | 46,1,37,194,78,2,76.3
63 | 47,1,44,163,79,2,73.6
64 | 48,1,40,171,79,2,72.9
65 | 49,1,37,198,79,2,71.1
66 | 50,1,39,180,80,2,81.4
67 | 51,1,31,182,80,2,75.7
68 | 67,1,36,155,71,3,68.5
69 | 68,1,47,179,73,3,72.1
70 | 69,1,29,166,76,3,72.5
71 | 70,1,37,173,78,3,77.5
72 | 71,1,31,177,78,3,75.2
73 | 72,1,26,179,78,3,69.4
74 | 73,1,40,179,79,3,74.5
75 | 74,1,35,183,83,3,80.2
76 | 75,1,49,177,84,3,79.9
77 | 76,1,28,164,85,3,79.7
78 | 77,1,40,167,87,3,77.8
79 | 78,1,51,175,88,3,81.9
80 | 


--------------------------------------------------------------------------------
/Week10_Regression_and_Clustering/iris.csv:
--------------------------------------------------------------------------------
  1 | sepal_length,sepal_width,petal_length,petal_width,species
  2 | 5.1,3.5,1.4,0.2,setosa
  3 | 4.9,3.0,1.4,0.2,setosa
  4 | 4.7,3.2,1.3,0.2,setosa
  5 | 4.6,3.1,1.5,0.2,setosa
  6 | 5.0,3.6,1.4,0.2,setosa
  7 | 5.4,3.9,1.7,0.4,setosa
  8 | 4.6,3.4,1.4,0.3,setosa
  9 | 5.0,3.4,1.5,0.2,setosa
 10 | 4.4,2.9,1.4,0.2,setosa
 11 | 4.9,3.1,1.5,0.1,setosa
 12 | 5.4,3.7,1.5,0.2,setosa
 13 | 4.8,3.4,1.6,0.2,setosa
 14 | 4.8,3.0,1.4,0.1,setosa
 15 | 4.3,3.0,1.1,0.1,setosa
 16 | 5.8,4.0,1.2,0.2,setosa
 17 | 5.7,4.4,1.5,0.4,setosa
 18 | 5.4,3.9,1.3,0.4,setosa
 19 | 5.1,3.5,1.4,0.3,setosa
 20 | 5.7,3.8,1.7,0.3,setosa
 21 | 5.1,3.8,1.5,0.3,setosa
 22 | 5.4,3.4,1.7,0.2,setosa
 23 | 5.1,3.7,1.5,0.4,setosa
 24 | 4.6,3.6,1.0,0.2,setosa
 25 | 5.1,3.3,1.7,0.5,setosa
 26 | 4.8,3.4,1.9,0.2,setosa
 27 | 5.0,3.0,1.6,0.2,setosa
 28 | 5.0,3.4,1.6,0.4,setosa
 29 | 5.2,3.5,1.5,0.2,setosa
 30 | 5.2,3.4,1.4,0.2,setosa
 31 | 4.7,3.2,1.6,0.2,setosa
 32 | 4.8,3.1,1.6,0.2,setosa
 33 | 5.4,3.4,1.5,0.4,setosa
 34 | 5.2,4.1,1.5,0.1,setosa
 35 | 5.5,4.2,1.4,0.2,setosa
 36 | 4.9,3.1,1.5,0.1,setosa
 37 | 5.0,3.2,1.2,0.2,setosa
 38 | 5.5,3.5,1.3,0.2,setosa
 39 | 4.9,3.1,1.5,0.1,setosa
 40 | 4.4,3.0,1.3,0.2,setosa
 41 | 5.1,3.4,1.5,0.2,setosa
 42 | 5.0,3.5,1.3,0.3,setosa
 43 | 4.5,2.3,1.3,0.3,setosa
 44 | 4.4,3.2,1.3,0.2,setosa
 45 | 5.0,3.5,1.6,0.6,setosa
 46 | 5.1,3.8,1.9,0.4,setosa
 47 | 4.8,3.0,1.4,0.3,setosa
 48 | 5.1,3.8,1.6,0.2,setosa
 49 | 4.6,3.2,1.4,0.2,setosa
 50 | 5.3,3.7,1.5,0.2,setosa
 51 | 5.0,3.3,1.4,0.2,setosa
 52 | 7.0,3.2,4.7,1.4,versicolor
 53 | 6.4,3.2,4.5,1.5,versicolor
 54 | 6.9,3.1,4.9,1.5,versicolor
 55 | 5.5,2.3,4.0,1.3,versicolor
 56 | 6.5,2.8,4.6,1.5,versicolor
 57 | 5.7,2.8,4.5,1.3,versicolor
 58 | 6.3,3.3,4.7,1.6,versicolor
 59 | 4.9,2.4,3.3,1.0,versicolor
 60 | 6.6,2.9,4.6,1.3,versicolor
 61 | 5.2,2.7,3.9,1.4,versicolor
 62 | 5.0,2.0,3.5,1.0,versicolor
 63 | 5.9,3.0,4.2,1.5,versicolor
 64 | 6.0,2.2,4.0,1.0,versicolor
 65 | 6.1,2.9,4.7,1.4,versicolor
 66 | 5.6,2.9,3.6,1.3,versicolor
 67 | 6.7,3.1,4.4,1.4,versicolor
 68 | 5.6,3.0,4.5,1.5,versicolor
 69 | 5.8,2.7,4.1,1.0,versicolor
 70 | 6.2,2.2,4.5,1.5,versicolor
 71 | 5.6,2.5,3.9,1.1,versicolor
 72 | 5.9,3.2,4.8,1.8,versicolor
 73 | 6.1,2.8,4.0,1.3,versicolor
 74 | 6.3,2.5,4.9,1.5,versicolor
 75 | 6.1,2.8,4.7,1.2,versicolor
 76 | 6.4,2.9,4.3,1.3,versicolor
 77 | 6.6,3.0,4.4,1.4,versicolor
 78 | 6.8,2.8,4.8,1.4,versicolor
 79 | 6.7,3.0,5.0,1.7,versicolor
 80 | 6.0,2.9,4.5,1.5,versicolor
 81 | 5.7,2.6,3.5,1.0,versicolor
 82 | 5.5,2.4,3.8,1.1,versicolor
 83 | 5.5,2.4,3.7,1.0,versicolor
 84 | 5.8,2.7,3.9,1.2,versicolor
 85 | 6.0,2.7,5.1,1.6,versicolor
 86 | 5.4,3.0,4.5,1.5,versicolor
 87 | 6.0,3.4,4.5,1.6,versicolor
 88 | 6.7,3.1,4.7,1.5,versicolor
 89 | 6.3,2.3,4.4,1.3,versicolor
 90 | 5.6,3.0,4.1,1.3,versicolor
 91 | 5.5,2.5,4.0,1.3,versicolor
 92 | 5.5,2.6,4.4,1.2,versicolor
 93 | 6.1,3.0,4.6,1.4,versicolor
 94 | 5.8,2.6,4.0,1.2,versicolor
 95 | 5.0,2.3,3.3,1.0,versicolor
 96 | 5.6,2.7,4.2,1.3,versicolor
 97 | 5.7,3.0,4.2,1.2,versicolor
 98 | 5.7,2.9,4.2,1.3,versicolor
 99 | 6.2,2.9,4.3,1.3,versicolor
100 | 5.1,2.5,3.0,1.1,versicolor
101 | 5.7,2.8,4.1,1.3,versicolor
102 | 6.3,3.3,6.0,2.5,virginica
103 | 5.8,2.7,5.1,1.9,virginica
104 | 7.1,3.0,5.9,2.1,virginica
105 | 6.3,2.9,5.6,1.8,virginica
106 | 6.5,3.0,5.8,2.2,virginica
107 | 7.6,3.0,6.6,2.1,virginica
108 | 4.9,2.5,4.5,1.7,virginica
109 | 7.3,2.9,6.3,1.8,virginica
110 | 6.7,2.5,5.8,1.8,virginica
111 | 7.2,3.6,6.1,2.5,virginica
112 | 6.5,3.2,5.1,2.0,virginica
113 | 6.4,2.7,5.3,1.9,virginica
114 | 6.8,3.0,5.5,2.1,virginica
115 | 5.7,2.5,5.0,2.0,virginica
116 | 5.8,2.8,5.1,2.4,virginica
117 | 6.4,3.2,5.3,2.3,virginica
118 | 6.5,3.0,5.5,1.8,virginica
119 | 7.7,3.8,6.7,2.2,virginica
120 | 7.7,2.6,6.9,2.3,virginica
121 | 6.0,2.2,5.0,1.5,virginica
122 | 6.9,3.2,5.7,2.3,virginica
123 | 5.6,2.8,4.9,2.0,virginica
124 | 7.7,2.8,6.7,2.0,virginica
125 | 6.3,2.7,4.9,1.8,virginica
126 | 6.7,3.3,5.7,2.1,virginica
127 | 7.2,3.2,6.0,1.8,virginica
128 | 6.2,2.8,4.8,1.8,virginica
129 | 6.1,3.0,4.9,1.8,virginica
130 | 6.4,2.8,5.6,2.1,virginica
131 | 7.2,3.0,5.8,1.6,virginica
132 | 7.4,2.8,6.1,1.9,virginica
133 | 7.9,3.8,6.4,2.0,virginica
134 | 6.4,2.8,5.6,2.2,virginica
135 | 6.3,2.8,5.1,1.5,virginica
136 | 6.1,2.6,5.6,1.4,virginica
137 | 7.7,3.0,6.1,2.3,virginica
138 | 6.3,3.4,5.6,2.4,virginica
139 | 6.4,3.1,5.5,1.8,virginica
140 | 6.0,3.0,4.8,1.8,virginica
141 | 6.9,3.1,5.4,2.1,virginica
142 | 6.7,3.1,5.6,2.4,virginica
143 | 6.9,3.1,5.1,2.3,virginica
144 | 5.8,2.7,5.1,1.9,virginica
145 | 6.8,3.2,5.9,2.3,virginica
146 | 6.7,3.3,5.7,2.5,virginica
147 | 6.7,3.0,5.2,2.3,virginica
148 | 6.3,2.5,5.0,1.9,virginica
149 | 6.5,3.0,5.2,2.0,virginica
150 | 6.2,3.4,5.4,2.3,virginica
151 | 5.9,3.0,5.1,1.8,virginica


--------------------------------------------------------------------------------
/Week11_Preprocessing/activity_1.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import pandas as pd
 3 | from sklearn.metrics import precision_score, accuracy_score, recall_score
 4 | from sklearn.preprocessing import OrdinalEncoder
 5 | from sklearn.tree import DecisionTreeClassifier
 6 | 
 7 | 
 8 | def load_exposure(exposure_path, split_percentage):
 9 |     df = pd.read_csv(exposure_path, delimiter=";", encoding="ISO-8859-1").head(2000)
10 | 
11 |     df = df[[
12 |         'GHRP',
13 |         'Aid dependence',
14 |         'Remittances',
15 |         'food import dependence ',
16 |         'primary commodity export dependence',
17 |         'tourism as percentage of GDP',
18 |         'tourism dependence',
19 |         'Foreign currency reserves',
20 |         'Foreign direct investment, net inflows percent of GDP',
21 |         'Foreign direct investment',
22 |         'Covid_19_Economic_exposure_index',
23 |         'Income classification according to WB'
24 |     ]]
25 |     columns = ["Remittances", "Aid dependence", "Foreign direct investment", 'Foreign currency reserves',
26 |                'Foreign direct investment, net inflows percent of GDP', 'tourism dependence',
27 |                'tourism as percentage of GDP', 'food import dependence ',
28 |                'primary commodity export dependence',
29 |                'Covid_19_Economic_exposure_index', ]
30 | 
31 |     df = df[df['Income classification according to WB'].notna()]
32 | 
33 |     for column in columns:
34 |         df[column] = df[column].apply(lambda a: numpy.nan if a == "x" else float(str(a).replace(",", ".")))
35 | 
36 |     # Ordinal Encoders
37 |     encGhrp = OrdinalEncoder()
38 |     df['GHRP'] = df['GHRP'].fillna("Unknown")
39 |     df['GHRP'] = encGhrp.fit_transform(df[['GHRP']])
40 |     df = df.fillna(0)
41 | 
42 |     exposure_x = df.drop('Income classification according to WB', axis=1).values
43 |     exposure_y = df['Income classification according to WB'].values
44 | 
45 |     # Split exposure data in train and test data
46 |     split_point = int(len(exposure_x) * split_percentage)
47 |     exposure_X_train = exposure_x[:split_point]
48 |     exposure_y_train = exposure_y[:split_point]
49 |     exposure_X_test = exposure_x[split_point:]
50 |     exposure_y_test = exposure_y[split_point:]
51 | 
52 |     return exposure_X_train, exposure_y_train, exposure_X_test, exposure_y_test
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     csv_file = 'exposure.csv'
57 | 
58 |     # Split the data into test and train parts
59 |     exposure_X_train, exposure_y_train, exposure_X_test, exposure_y_test = load_exposure(csv_file, split_percentage=0.7)
60 | 
61 |     # train a classifier
62 |     dt = DecisionTreeClassifier()
63 |     dt.fit(exposure_X_train, exposure_y_train)
64 | 
65 |     # predict the test set
66 |     predictions = dt.predict(exposure_X_test)
67 | 
68 |     print("precision:\t", precision_score(exposure_y_test, predictions, average=None))
69 |     print("recall:\t\t", recall_score(exposure_y_test, predictions, average=None))
70 |     print("accuracy:\t", accuracy_score(exposure_y_test, predictions))
71 | 


--------------------------------------------------------------------------------
/Week11_Preprocessing/activity_2.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import pandas as pd
 3 | from sklearn.impute import SimpleImputer
 4 | from sklearn.metrics import precision_score, accuracy_score, recall_score
 5 | from sklearn.preprocessing import OrdinalEncoder
 6 | from sklearn.tree import DecisionTreeClassifier
 7 | 
 8 | 
 9 | def load_exposure(exposure_path, split_percentage, strategy):
10 |     df = pd.read_csv(exposure_path, delimiter=";", encoding="ISO-8859-1").head(2000)
11 | 
12 |     df = df[[
13 |         'GHRP',
14 |         'Aid dependence',
15 |         'Remittances',
16 |         'food import dependence ',
17 |         'primary commodity export dependence',
18 |         'tourism as percentage of GDP',
19 |         'tourism dependence',
20 |         'Foreign currency reserves',
21 |         'Foreign direct investment, net inflows percent of GDP',
22 |         'Foreign direct investment',
23 |         'Covid_19_Economic_exposure_index',
24 |         'Income classification according to WB'
25 |     ]]
26 |     columns = ["Remittances", "Aid dependence", "Foreign direct investment", 'Foreign currency reserves',
27 |                'Foreign direct investment, net inflows percent of GDP', 'tourism dependence',
28 |                'tourism as percentage of GDP', 'food import dependence ',
29 |                'primary commodity export dependence',
30 |                'Covid_19_Economic_exposure_index', ]
31 | 
32 |     df = df[df['Income classification according to WB'].notna()]
33 | 
34 |     for column in columns:
35 |         df[column] = df[column].apply(lambda a: numpy.nan if a == "x" else float(str(a).replace(",", ".")))
36 | 
37 |     # Ordinal Encoders
38 |     encGhrp = OrdinalEncoder()
39 |     df['GHRP'] = df['GHRP'].fillna("Unknown")
40 |     df['GHRP'] = encGhrp.fit_transform(df[['GHRP']])
41 | 
42 |     for column in columns:
43 |         imputer = SimpleImputer(missing_values=numpy.nan, strategy=strategy)
44 |         df[column] = imputer.fit_transform(df[[column]])
45 | 
46 |     exposure_x = df.drop('Income classification according to WB', axis=1).values
47 |     exposure_y = df['Income classification according to WB'].values
48 | 
49 |     # Split exposure data in train and test data
50 |     split_point = int(len(exposure_x) * split_percentage)
51 |     exposure_X_train = exposure_x[:split_point]
52 |     exposure_y_train = exposure_y[:split_point]
53 |     exposure_X_test = exposure_x[split_point:]
54 |     exposure_y_test = exposure_y[split_point:]
55 | 
56 |     return exposure_X_train, exposure_y_train, exposure_X_test, exposure_y_test
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     csv_file = 'exposure.csv'
61 | 
62 |     # Split the data into test and train parts
63 |     for strategy in ["mean", "median", "most_frequent", "constant"]:
64 |         exposure_X_train, exposure_y_train, exposure_X_test, exposure_y_test \
65 |             = load_exposure(csv_file, split_percentage=0.7, strategy=strategy)
66 | 
67 |         # train a classifier
68 |         dt = DecisionTreeClassifier()
69 |         dt.fit(exposure_X_train, exposure_y_train)
70 | 
71 |         # predict the test set
72 |         predictions = dt.predict(exposure_X_test)
73 | 
74 |         print("*************************** "+strategy+" ***********************************")
75 |         print("precision:\t", precision_score(exposure_y_test, predictions, average=None))
76 |         print("recall:\t\t", recall_score(exposure_y_test, predictions, average=None))
77 |         print("accuracy:\t", accuracy_score(exposure_y_test, predictions))
78 | 
79 | 


--------------------------------------------------------------------------------
/Week11_Preprocessing/activity_3.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import pandas as pd
 3 | from sklearn.impute import SimpleImputer
 4 | from sklearn.metrics import precision_score, accuracy_score, recall_score
 5 | from sklearn.preprocessing import OrdinalEncoder, PolynomialFeatures
 6 | from sklearn.tree import DecisionTreeClassifier
 7 | 
 8 | 
 9 | def load_exposure(exposure_path, split_percentage, strategy):
10 |     df = pd.read_csv(exposure_path, delimiter=";", encoding="ISO-8859-1").head(2000)
11 | 
12 |     df = df[[
13 |         'GHRP',
14 |         'Aid dependence',
15 |         'Remittances',
16 |         'food import dependence ',
17 |         'primary commodity export dependence',
18 |         'tourism as percentage of GDP',
19 |         'tourism dependence',
20 |         'Foreign currency reserves',
21 |         'Foreign direct investment, net inflows percent of GDP',
22 |         'Foreign direct investment',
23 |         'Covid_19_Economic_exposure_index',
24 |         'Income classification according to WB'
25 |     ]]
26 |     columns = ["Remittances", "Aid dependence", "Foreign direct investment", 'Foreign currency reserves',
27 |                'Foreign direct investment, net inflows percent of GDP', 'tourism dependence',
28 |                'tourism as percentage of GDP', 'food import dependence ',
29 |                'primary commodity export dependence',
30 |                'Covid_19_Economic_exposure_index', ]
31 | 
32 |     df = df[df['Income classification according to WB'].notna()]
33 | 
34 |     for column in columns:
35 |         df[column] = df[column].apply(lambda a: numpy.nan if a == "x" else float(str(a).replace(",", ".")))
36 | 
37 |     # Ordinal Encoders
38 |     encGhrp = OrdinalEncoder()
39 |     df['GHRP'] = df['GHRP'].fillna("Unknown")
40 |     df['GHRP'] = encGhrp.fit_transform(df[['GHRP']])
41 | 
42 |     for column in columns:
43 |         imputer = SimpleImputer(missing_values=numpy.nan, strategy=strategy)
44 |         df[column] = imputer.fit_transform(df[[column]])
45 | 
46 |     exposure_x = df.drop('Income classification according to WB', axis=1).values
47 |     exposure_y = df['Income classification according to WB'].values
48 | 
49 |     # Split exposure data in train and test data
50 |     split_point = int(len(exposure_x) * split_percentage)
51 |     exposure_X_train = exposure_x[:split_point]
52 |     exposure_y_train = exposure_y[:split_point]
53 |     exposure_X_test = exposure_x[split_point:]
54 |     exposure_y_test = exposure_y[split_point:]
55 | 
56 |     return exposure_X_train, exposure_y_train, exposure_X_test, exposure_y_test
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     csv_file = 'exposure.csv'
61 | 
62 |     # Split the data into test and train parts
63 |     exposure_X_train, exposure_y_train, exposure_X_test, exposure_y_test \
64 |         = load_exposure(csv_file, split_percentage=0.7, strategy="most_frequent")
65 | 
66 |     # train a classifier
67 |     dt = DecisionTreeClassifier()
68 |     dt.fit(exposure_X_train, exposure_y_train)
69 | 
70 |     # predict the test set
71 |     predictions = dt.predict(exposure_X_test)
72 | 
73 |     print("*************************** without PolynomialFeatures ***********************************")
74 |     print("precision:\t", precision_score(exposure_y_test, predictions, average=None))
75 |     print("recall:\t\t", recall_score(exposure_y_test, predictions, average=None))
76 |     print("accuracy:\t", accuracy_score(exposure_y_test, predictions))
77 | 
78 |     poly = PolynomialFeatures(1)
79 |     exposure_X_train_scaled = poly.fit_transform(exposure_X_train)
80 |     exposure_X_test_scaled = poly.fit_transform(exposure_X_test)
81 |     dt = DecisionTreeClassifier()
82 |     dt.fit(exposure_X_train_scaled, exposure_y_train)
83 |     predictions = dt.predict(exposure_X_test_scaled)
84 | 
85 |     print("*************************** with PolynomialFeatures ***********************************")
86 |     print("precision:\t", precision_score(exposure_y_test, predictions, average=None))
87 |     print("recall:\t\t", recall_score(exposure_y_test, predictions, average=None))
88 |     print("accuracy:\t", accuracy_score(exposure_y_test, predictions))


--------------------------------------------------------------------------------
/Week2_DataAccess/activity_1.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | 
 4 | def read_csv(csv_file):
 5 |     """
 6 |     :param csv_file: the path of csv file
 7 |     :return: A dataframe out of the csv file
 8 |     """
 9 |     return pd.read_csv(csv_file)
10 | 
11 | 
12 | def write_in_csv(dataframe, file):
13 |     """
14 |     :param dataframe: The dataframe which must be written into a csv file
15 |     :param file: where the csv must be stored
16 |     """
17 |     dataframe.to_csv(file, sep=',', encoding='utf-8')
18 | 
19 | 
20 | def print_dataframe(dataframe, print_column=True, print_rows=True):
21 |     # print column names
22 |     if print_column:
23 |         print(','.join(dataframe.columns))
24 | 
25 |     # print rows one by one
26 |     if print_rows:
27 |         for row in dataframe.itertuples(index=False, name=None):
28 |             row = ','.join(str(col) for col in row)
29 |             print(row)
30 | 
31 | if __name__ == '__main__':
32 |     csv_file = 'Demographic_Statistics_By_Zip_Code.csv'  # path to the downloaded csv file
33 |     dataframe = read_csv(csv_file)
34 | 
35 |     print("Loading the csv file")
36 |     print_dataframe(dataframe)
37 | 
38 |     print("Write the dataframe as a csv file")
39 |     write_in_csv(dataframe, "Demographic_Statistics_New.csv")  # path where the new csv file is stored
40 | 


--------------------------------------------------------------------------------
/Week2_DataAccess/activity_2.py:
--------------------------------------------------------------------------------
 1 | import sqlite3
 2 | import pandas as pd
 3 | from pandas.io import sql
 4 | 
 5 | 
 6 | def read_csv(csv_file):
 7 |     """
 8 |     :param csv_file: the path of csv file
 9 |     :return: A dataframe out of the csv file
10 |     """
11 |     return pd.read_csv(csv_file)
12 | 
13 | 
14 | def write_in_sqlite(dataframe, database_file, table_name):
15 |     """
16 |     :param dataframe: The dataframe which must be written into the database
17 |     :param database_file: where the database is stored
18 |     :param table_name: the name of the table
19 |     """
20 | 
21 |     cnx = sqlite3.connect(database_file)
22 |     sql.to_sql(dataframe, name=table_name, con=cnx)
23 | 
24 | 
25 | def read_from_sqlite(database_file, table_name):
26 |     """
27 |     :param database_file: where the database is stored
28 |     :param table_name: the name of the table
29 |     :return: A Dataframe
30 |     """
31 |     cnx = sqlite3.connect(database_file)
32 |     return sql.read_sql('select * from ' + table_name, cnx)
33 | 
34 | 
35 | if __name__ == '__main__':
36 |     table_name = "Demographic_Statistics"
37 |     database_file = 'Demographic_Statistics.db'  # name of sqlite db file that will be created
38 |     csv_file = 'Demographic_Statistics_By_Zip_Code.csv'  # path to the downloaded csv file
39 |     loaded_df = read_csv(csv_file)
40 | 
41 |     print("Creating database")
42 |     write_in_sqlite(loaded_df, database_file, table_name)
43 | 
44 |     print("Querying the database")
45 |     queried_df = read_from_sqlite(database_file, table_name)
46 | 
47 |     pd.set_option('display.width', 1000)
48 |     pd.options.display.max_colwidth = 3
49 |     pd.set_option('display.max_columns', 7)
50 | 
51 |     print(queried_df.head(10))
52 | 


--------------------------------------------------------------------------------
/Week2_DataAccess/activity_3.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import pandas as pd
 3 | from pymongo import MongoClient
 4 | 
 5 | 
 6 | def read_csv(csv_file):
 7 |     """
 8 |     :param csv_file: the path of csv file
 9 |     :return: A dataframe out of the csv file
10 |     """
11 |     return pd.read_csv(csv_file)
12 | 
13 | 
14 | def print_dataframe(dataframe, print_column=True, print_rows=True):
15 |     # print column names
16 |     if print_column:
17 |         print(','.join(dataframe.columns))
18 | 
19 |     # print rows one by one
20 |     if print_rows:
21 |         for row in dataframe.itertuples(index=False, name=None):
22 |             row = ','.join(str(col) for col in row)
23 |             print(row)
24 | 
25 | def write_in_mongodb(dataframe, mongo_host, mongo_port, db_name, collection):
26 |     """
27 |     :param dataframe: 
28 |     :param mongo_host: Mongodb server address 
29 |     :param mongo_port: Mongodb server port number
30 |     :param db_name: The name of the database
31 |     :param collection: the name of the collection inside the database
32 |     """
33 |     client = MongoClient(host=mongo_host, port=mongo_port)
34 |     db = client[db_name]
35 |     c = db[collection]
36 |     # You can only store documents in mongodb;
37 |     # so you need to convert rows inside the dataframe into a list of json objects
38 |     records = json.loads(dataframe.T.to_json()).values()
39 |     c.insert_many(records)
40 | 
41 | 
42 | 
43 | def read_from_mongodb(mongo_host, mongo_port, db_name, collection):
44 |     """
45 |     :param mongo_host: Mongodb server address 
46 |     :param mongo_port: Mongodb server port number
47 |     :param db_name: The name of the database
48 |     :param collection: the name of the collection inside the database
49 |     :return: A dataframe which contains all documents inside the collection
50 |     """
51 |     client = MongoClient(host=mongo_host, port=mongo_port)
52 |     db = client[db_name]
53 |     c = db[collection]
54 |     return pd.DataFrame(list(c.find()))
55 | 
56 | if __name__ == '__main__':
57 | 
58 |     db_name = 'comp9321'
59 |     mongo_port = 27017
60 |     mongo_host = 'localhost'
61 | 
62 |     csv_file = 'Demographic_Statistics_By_Zip_Code.csv'  # path to the downloaded csv file
63 |     df = read_csv(csv_file)
64 |     collection = 'Demographic_Statistics'
65 | 
66 |     print("Writing into the mongodb")
67 |     write_in_mongodb(df, mongo_host, mongo_port, db_name, collection)
68 | 
69 |     print("Querying the database")
70 |     df = read_from_mongodb(mongo_host, mongo_port, db_name, collection)
71 | 
72 |     print_dataframe(df)
73 | 


--------------------------------------------------------------------------------
/Week2_DataAccess/activity_4.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def get_json(url):
 6 |     """
 7 |     :param url: RUL of the resouce
 8 |     :return: JSON
 9 |     """
10 |     resp = requests.get(url=url)
11 |     data = resp.json()
12 |     return data
13 | 
14 | 
15 | def json_to_dataframe(json_obj):
16 |     """
17 |     Please Open the JSON using the given URL to be familiar with the 
18 |     structure of the expected JSON object
19 |     
20 |     The root element contains two main elements : data and meta; 
21 |     the former contains the statistics for a given zip code, and 
22 |     the latter contains the information about the columns
23 |     :param json_obj: JSON object for the dataset
24 |     :return: A dataframe
25 |     """
26 |     # let's get the list of statistics for all zip codes
27 |     json_data = json_obj
28 | 
29 |     return pd.DataFrame.from_records(json_data)
30 | 
31 | if __name__ == '__main__':
32 |     url = "https://raw.githubusercontent.com/joseluisq/json-datasets/master/json/operating-systems/macosx_releases.json"
33 | 
34 |     print("Fetch the json")
35 |     json_obj = get_json(url)
36 | 
37 |     print("Convert the json object to a dataframe")
38 |     df = json_to_dataframe(json_obj)
39 |     print(df.to_string())
40 | 


--------------------------------------------------------------------------------
/Week3_Data_Cleansing/Books.csv:
--------------------------------------------------------------------------------
 1 | Identifier,Edition Statement,Place of Publication,Date of Publication,Publisher,Title,Author,Contributors,Corporate Author,Corporate Contributors,Former owner,Engraver,Issuance type,Flickr URL,Shelfmarks
 2 | 000000206,,London,1879 [1878],S. Tinsley & Co.,Walter Forbes. [A novel.] By A. A,A. A.,"FORBES, Walter.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000206,British Library HMNTS 12641.b.30.
 3 | 000000216,,London; Virtue & Yorston,1868,Virtue & Co.,"All for Greed. [A novel. The dedication signed: A. A. A., i.e. Marie Pauline Rose, Baroness Blaze de Bury.]","A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000216,British Library HMNTS 12626.cc.2.
 4 | 000000218,,London,1869,"Bradbury, Evans & Co.","Love the Avenger. By the author of “All for Greed.” [The dedication signed: A. A. A., i.e. Marie Pauline Rose, Baroness Blaze de Bury.]","A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000218,British Library HMNTS 12625.dd.1.
 5 | 000000472,,London,1851,James Darling,"Welsh Sketches, chiefly ecclesiastical, to the close of the twelfth century. By the author of “Proposals for Christian Union” (E. S. A. [i.e. Ernest Appleyard])","A., E. S.","Appleyard, Ernest Silvanus.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000472,British Library HMNTS 10369.bbb.15.
 6 | 000000480,"A new edition, revised, etc.",London,1857,Wertheim & Macintosh,"[The World in which I live, and my place in it. By E. S. A. [i.e. Letitia Willgoss Stone.] Edited by ... J. H. Broome.]","A., E. S.","BROOME, John Henry.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000480,British Library HMNTS 9007.d.28.
 7 | 000000481,"Fourth edition, revised, etc.",London,1875,William Macintosh,"[The World in which I live, and my place in it. By E. S. A. [i.e. Letitia Willgoss Stone.] Edited by ... J. H. Broome.]","A., E. S.","BROOME, John Henry.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000481,British Library HMNTS 9006.ee.10.
 8 | 000000519,,London,1872,The Author,Lagonells. By the author of Darmayne (F. E. A. [i.e. Florence Emily Ashley]),"A., F. E.","ASHLEY, Florence Emily.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000519,British Library HMNTS 12637.e.3.
 9 | 000000667,,"Puerto Rico",,,"The Coming of Spring, and other poems. By J. A. [i.e. J. Andrews.]","A., J.|A., J.","ANDREWS, J. - Writer of Verse",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000667,British Library HMNTS 011652.g.73.
10 | 000000874,,London],1676,,"A Warning to the inhabitants of England, and London in particular ... By M. A. [i.e. Mary Adams.]",Remaʿ.,"ADAMS, Mary.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000874,British Library HMNTS 11645.bb.42.
11 | 000001143,,London,1676,,A Satyr against Vertue. (A poem: supposed to be spoken by a Town-Hector. [By John Oldham. The preface signed: T. A.]),"A., T.","OLDHAM, John.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001143,British Library HMNTS 11602.ee.10.(2.)
12 | 000001280,,Coventry,1802,Printed by J. Turner,"An Account of the many and great Loans, Benefactions and Charities, belonging to the City of Coventry ... A new edition. [The dedication signed: AB, CD, EF, GH, &c. By Edward Jackson and Samuel Carte.]",,"CARTE, Samuel.|JACKSON, Edward - Rector of Southam, and CARTE (Samuel)",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001280,British Library HMNTS 1430.g.17.
13 | 000001808,,Christiania,1859,,"Erindringer som Bidrag til Norges Historie fra 1800-1815. Anden Udgave ... Udgivet med nogle Rettelser og Tillæg af Christian C. A. Lange. Med Forfatterens Portraet, og hans Biographi af Amtmand J. C. Aall","AALL, Jacob.","AALL, J. C.|LANGE, Christian Christoph Andreas.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001808,British Library HMNTS 9425.cc.37.
14 | 000001905,,Firenze,1888,,Gli Studi storici in terra d'Otranto ... Frammenti estratti in gran parte dall' Archivio Storico Italiano ... a cura e spese di L(uigi) G(iuseppe) D(e) S(imone),"AAR, Ermanno - pseud. [i.e. Luigi Giuseppe Oronzo Mariano Raffaele Francesco Fortunato Felice de Simone.]","S., L. G. D.|SIMONE, Luigi Giuseppe Oronzo Mariano Raffaele Francesco Fortunato Felice de.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001905,British Library HMNTS 10136.g.22.
15 | 000001929,,Amsterdam,"1676, 38-54",,De Aardbol. Magazijn van hedendaagsche land- en volkenkunde ... Met platen en kaarten. [Deel 4-9 by P. H. W.],,"WITKAMP, Pieter Harme.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001929,British Library HMNTS 10002.g.16-19.
16 | 000002836,,Savona,1888,,Cronache Savonesi dal 1500 al 1570 ... Accresciute di documenti inediti pubblicate e annotate dal dott. G. Assereto,"ABATE, Giovanni Agostino.","ASSERETO, Giovanni.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002836,British Library HMNTS 10136.h.24.
17 | 000002854,,London,1888,E. Moxon & Co.,"See-Saw; a novel ... Edited [or rather, written] by W. W. Reade","ABATI, Francesco.","READE, William Winwood.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002854,British Library HMNTS 12623.bbb.12.
18 | 000002956,,Paris,1860-63,,"Géodésie d'une partie de la Haute Éthiopie, revue et rédigée par R. Radau. fasc. 1-3","ABBADIE, Antoine Thompson d'.","RADAU, Rodolphe.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002956,British Library HMNTS 10096.i.19.
19 | 000002957,,Paris,1873,,[With eleven maps.],"ABBADIE, Antoine Thompson d'.","RADAU, Rodolphe.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002957,British Library HMNTS 10095.i.13.
20 | 000003017,"Nueva edicion, anotada ... y continuada ... por J. J. de Acosta y Calbo.",Puerto-Rico,1866,,"[Historia geográfica, civil y politica de la Isla de S. Juan Bautista de Puerto Rico, Dala a luz A. Valladares de Sotomayor.]","ABBAD Y LASIERRA, Agustín Íñigo - Bishop of Barbastro","ACOSTA Y CALBO, José Julian de.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000003017,British Library HMNTS 10480.h.18.
21 | 000003131,,New York,1899,W. Abbatt,"The Crisis of the Revolution, being the story of Arnold and André now for the first time collected from all sources, and illustrated with views of all places identified with it ... Illustrations from original photographs by E. S. Bennett, etc","ABBATT, William.","ANDRÉ, John - Major|ARNOLD, Benedict.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000003131,British Library HMNTS 9603.g.14.


--------------------------------------------------------------------------------
/Week3_Data_Cleansing/City.csv:
--------------------------------------------------------------------------------
 1 | City,Country
 2 | London,England
 3 | Puerto Rico,U.S.A
 4 | Coventry,England
 5 | Christiania,Denmark
 6 | Firenze,Italy
 7 | Amsterdam,Netherlands
 8 | Savona,Italy
 9 | Paris,France
10 | New York,U.S.A


--------------------------------------------------------------------------------
/Week3_Data_Cleansing/activity_1.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | 
 4 | if __name__ == "__main__":
 5 |     columns_to_drop = ['Edition Statement',
 6 |                        'Corporate Author',
 7 |                        'Corporate Contributors',
 8 |                        'Former owner',
 9 |                        'Engraver',
10 |                        'Contributors',
11 |                        'Issuance type',
12 |                        'Shelfmarks'
13 |                        ]
14 |     csv_file = "Books.csv"
15 |     df = pd.read_csv(csv_file)
16 | 
17 |     print("The percentage of NaN in the data per column:")
18 |     num_of_rows = df.shape[0]
19 |     for column in df:
20 |         # df[column].isnull() : returns an array of True/False showing the cell is null or not
21 |         percent = 100 * df[column].isnull().sum() / num_of_rows
22 |         print(column, str(percent) + '%')
23 | 
24 |     print("****************************************")
25 |     print("Dataframe before dropping the columns")
26 |     print(df.to_string())
27 | 
28 |     print("****************************************")
29 |     print("Dataframe after dropping the columns")
30 |     df.drop(columns_to_drop, inplace=True, axis=1)
31 |     # Pandas' drop method is used to remove columns of a dataframe
32 |     # Inplace=True indicates that the changes should be applied to the given dataframe instead of creating a new one
33 |     # axis=1 : Whether to drop labels from the index (0 / 'index') or columns (1 / 'columns').
34 | 
35 |     print(df.to_string())
36 |     print("****************************************")
37 | 


--------------------------------------------------------------------------------
/Week3_Data_Cleansing/activity_2.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | 
 4 | if __name__ == "__main__":
 5 |     csv_file = "Books.csv"
 6 |     df = pd.read_csv(csv_file)
 7 | 
 8 |     # Replace the cell value of "Place of Publication" with "London" if it contains "London",
 9 |     # and replace all '-' characters with space
10 |     # We use the apply method which applies a lambda function to the cells of a dataframe
11 |     df['Place of Publication'] = df['Place of Publication'].apply(
12 |         lambda x: 'London' if 'London' in x else x.replace('-', ' '))
13 | 
14 |     ################################################################################################################
15 |     # Here is also another approach using numpy.where                                                              #
16 |     #    import numpy as np                                                                                        #
17 |     #    london = df['Place of Publication'].str.contains('London')                                                #
18 |     #    df['Place of Publication'] = np.where(london, 'London', df['Place of Publication'].str.replace('-', ' ')) #
19 |     ################################################################################################################
20 |     print(df['Place of Publication'])
21 | 
22 |     # We use Pandas' extract method which for each subject string in the Series,
23 |     # extracts groups from the first match of regular expression pat.
24 |     new_date = df['Date of Publication'].str.extract(r'^(\d{4})', expand=False)
25 |     # ^(\d{4}) : matches 4 digit numbers in the beginning of the string
26 |     new_date = pd.to_numeric(new_date)
27 |     df['Date of Publication'] = new_date
28 |     print(df['Date of Publication'])
29 | 
30 |     # replace all NaN with 0
31 |     new_date = new_date.fillna(0)
32 |     df['Date of Publication'] = new_date
33 |     print(df['Date of Publication'])
34 | 


--------------------------------------------------------------------------------
/Week3_Data_Cleansing/activity_3.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | def clean(dataframe):
 4 |     dataframe['Place of Publication'] = dataframe['Place of Publication'].apply(
 5 |         lambda x: 'London' if 'London' in x else x.replace('-', ' '))
 6 | 
 7 |     new_date = dataframe['Date of Publication'].str.extract(r'^(\d{4})', expand=False)
 8 |     new_date = pd.to_numeric(new_date)
 9 |     new_date = new_date.fillna(0)
10 |     dataframe['Date of Publication'] = new_date
11 | 
12 |     return dataframe
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     csv_file = "Books.csv"
17 |     df = pd.read_csv(csv_file)
18 |     df = clean(df)
19 | 
20 |     # Replace the spaces with the underline character ('_')
21 |     # Because panda's query method does not work well with column names which contains white spaces
22 |     df.columns = [c.replace(' ', '_') for c in df.columns]
23 | 
24 |     #
25 |     df = df.query('Date_of_Publication > 1866 and Place_of_Publication == "London"')
26 | 
27 |     print(df.to_string())
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/Week3_Data_Cleansing/activity_4.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | 
 4 | 
 5 | def clean(dataframe):
 6 |     dataframe['Place of Publication'] = dataframe['Place of Publication'].apply(
 7 |         lambda x: 'London' if 'London' in x else x.replace('-', ' '))
 8 | 
 9 |     new_date = dataframe['Date of Publication'].str.extract(r'^(\d{4})', expand=False)
10 |     new_date = pd.to_numeric(new_date)
11 |     new_date = new_date.fillna(0)
12 |     dataframe['Date of Publication'] = new_date
13 | 
14 |     return dataframe
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     csv_file = "Books.csv"
19 |     books_df = pd.read_csv(csv_file)
20 |     books_df = clean(books_df)
21 |     # Replace the spaces with the underline character ('_')
22 |     # Because panda's query method does not work well with column names which contains white spaces
23 |     books_df.columns = [c.replace(' ', '_') for c in books_df.columns]
24 | 
25 |     city_df = pd.read_csv('City.csv')
26 | 
27 |     # merge the two dataframes
28 |     df = pd.merge(books_df, city_df, how='left', left_on=['Place_of_Publication'], right_on=['City'])
29 | 
30 |     # Group by Country and keep the country as a column
31 |     gb_df = df.groupby(['Country'], as_index=False)
32 | 
33 |     # Select a column (as far as it has values for all rows, you can select any column)
34 |     df = gb_df['Identifier'].count()
35 | 
36 |     # print the dataframe which shows publication number by country
37 |     print(df.to_string())
38 | 


--------------------------------------------------------------------------------
/Week4_Visualization/Books.csv:
--------------------------------------------------------------------------------
 1 | Identifier,Edition Statement,Place of Publication,Date of Publication,Publisher,Title,Author,Contributors,Corporate Author,Corporate Contributors,Former owner,Engraver,Issuance type,Flickr URL,Shelfmarks
 2 | 000000206,,London,1879 [1878],S. Tinsley & Co.,Walter Forbes. [A novel.] By A. A,A. A.,"FORBES, Walter.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000206,British Library HMNTS 12641.b.30.
 3 | 000000216,,London; Virtue & Yorston,1868,Virtue & Co.,"All for Greed. [A novel. The dedication signed: A. A. A., i.e. Marie Pauline Rose, Baroness Blaze de Bury.]","A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000216,British Library HMNTS 12626.cc.2.
 4 | 000000218,,London,1869,"Bradbury, Evans & Co.","Love the Avenger. By the author of “All for Greed.” [The dedication signed: A. A. A., i.e. Marie Pauline Rose, Baroness Blaze de Bury.]","A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000218,British Library HMNTS 12625.dd.1.
 5 | 000000472,,London,1851,James Darling,"Welsh Sketches, chiefly ecclesiastical, to the close of the twelfth century. By the author of “Proposals for Christian Union” (E. S. A. [i.e. Ernest Appleyard])","A., E. S.","Appleyard, Ernest Silvanus.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000472,British Library HMNTS 10369.bbb.15.
 6 | 000000480,"A new edition, revised, etc.",London,1857,Wertheim & Macintosh,"[The World in which I live, and my place in it. By E. S. A. [i.e. Letitia Willgoss Stone.] Edited by ... J. H. Broome.]","A., E. S.","BROOME, John Henry.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000480,British Library HMNTS 9007.d.28.
 7 | 000000481,"Fourth edition, revised, etc.",London,1875,William Macintosh,"[The World in which I live, and my place in it. By E. S. A. [i.e. Letitia Willgoss Stone.] Edited by ... J. H. Broome.]","A., E. S.","BROOME, John Henry.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000481,British Library HMNTS 9006.ee.10.
 8 | 000000519,,London,1872,The Author,Lagonells. By the author of Darmayne (F. E. A. [i.e. Florence Emily Ashley]),"A., F. E.","ASHLEY, Florence Emily.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000519,British Library HMNTS 12637.e.3.
 9 | 000000667,,"Puerto Rico",,,"The Coming of Spring, and other poems. By J. A. [i.e. J. Andrews.]","A., J.|A., J.","ANDREWS, J. - Writer of Verse",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000667,British Library HMNTS 011652.g.73.
10 | 000000874,,London],1676,,"A Warning to the inhabitants of England, and London in particular ... By M. A. [i.e. Mary Adams.]",Remaʿ.,"ADAMS, Mary.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000874,British Library HMNTS 11645.bb.42.
11 | 000001143,,London,1676,,A Satyr against Vertue. (A poem: supposed to be spoken by a Town-Hector. [By John Oldham. The preface signed: T. A.]),"A., T.","OLDHAM, John.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001143,British Library HMNTS 11602.ee.10.(2.)
12 | 000001280,,Coventry,1802,Printed by J. Turner,"An Account of the many and great Loans, Benefactions and Charities, belonging to the City of Coventry ... A new edition. [The dedication signed: AB, CD, EF, GH, &c. By Edward Jackson and Samuel Carte.]",,"CARTE, Samuel.|JACKSON, Edward - Rector of Southam, and CARTE (Samuel)",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001280,British Library HMNTS 1430.g.17.
13 | 000001808,,Christiania,1859,,"Erindringer som Bidrag til Norges Historie fra 1800-1815. Anden Udgave ... Udgivet med nogle Rettelser og Tillæg af Christian C. A. Lange. Med Forfatterens Portraet, og hans Biographi af Amtmand J. C. Aall","AALL, Jacob.","AALL, J. C.|LANGE, Christian Christoph Andreas.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001808,British Library HMNTS 9425.cc.37.
14 | 000001905,,Firenze,1888,,Gli Studi storici in terra d'Otranto ... Frammenti estratti in gran parte dall' Archivio Storico Italiano ... a cura e spese di L(uigi) G(iuseppe) D(e) S(imone),"AAR, Ermanno - pseud. [i.e. Luigi Giuseppe Oronzo Mariano Raffaele Francesco Fortunato Felice de Simone.]","S., L. G. D.|SIMONE, Luigi Giuseppe Oronzo Mariano Raffaele Francesco Fortunato Felice de.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001905,British Library HMNTS 10136.g.22.
15 | 000001929,,Amsterdam,"1676, 38-54",,De Aardbol. Magazijn van hedendaagsche land- en volkenkunde ... Met platen en kaarten. [Deel 4-9 by P. H. W.],,"WITKAMP, Pieter Harme.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001929,British Library HMNTS 10002.g.16-19.
16 | 000002836,,Savona,1888,,Cronache Savonesi dal 1500 al 1570 ... Accresciute di documenti inediti pubblicate e annotate dal dott. G. Assereto,"ABATE, Giovanni Agostino.","ASSERETO, Giovanni.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002836,British Library HMNTS 10136.h.24.
17 | 000002854,,London,1888,E. Moxon & Co.,"See-Saw; a novel ... Edited [or rather, written] by W. W. Reade","ABATI, Francesco.","READE, William Winwood.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002854,British Library HMNTS 12623.bbb.12.
18 | 000002956,,Paris,1860-63,,"Géodésie d'une partie de la Haute Éthiopie, revue et rédigée par R. Radau. fasc. 1-3","ABBADIE, Antoine Thompson d'.","RADAU, Rodolphe.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002956,British Library HMNTS 10096.i.19.
19 | 000002957,,Paris,1873,,[With eleven maps.],"ABBADIE, Antoine Thompson d'.","RADAU, Rodolphe.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002957,British Library HMNTS 10095.i.13.
20 | 000003017,"Nueva edicion, anotada ... y continuada ... por J. J. de Acosta y Calbo.",Puerto-Rico,1866,,"[Historia geográfica, civil y politica de la Isla de S. Juan Bautista de Puerto Rico, Dala a luz A. Valladares de Sotomayor.]","ABBAD Y LASIERRA, Agustín Íñigo - Bishop of Barbastro","ACOSTA Y CALBO, José Julian de.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000003017,British Library HMNTS 10480.h.18.
21 | 000003131,,New York,1899,W. Abbatt,"The Crisis of the Revolution, being the story of Arnold and André now for the first time collected from all sources, and illustrated with views of all places identified with it ... Illustrations from original photographs by E. S. Bennett, etc","ABBATT, William.","ANDRÉ, John - Major|ARNOLD, Benedict.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000003131,British Library HMNTS 9603.g.14.


--------------------------------------------------------------------------------
/Week4_Visualization/activity_1.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def clean(df):
 6 |     # Let's Clean the data to get rid of exceptions
 7 |     df['Place of Publication'] = df['Place of Publication'].apply(
 8 |         lambda x: 'London' if 'London' in x else x.replace('-', ' '))
 9 |     return df
10 | 
11 | 
12 | if __name__ == '__main__':
13 |     csv_file = 'Books.csv'
14 |     df = pd.read_csv(csv_file)
15 | 
16 |     # Cleaning is Optional; but it will increase the accuracy of the results
17 |     df = clean(df)
18 | 
19 |     # value_counts: returns a Series containing counts of each category.
20 |     unival = df['Place of Publication'].value_counts()
21 |     unival.plot.pie(subplots=True)
22 | 
23 |     plt.show()
24 | 


--------------------------------------------------------------------------------
/Week4_Visualization/activity_2.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import pandas as pd
 3 | 
 4 | if __name__ == '__main__':
 5 |     csv_file = 'iris.csv'
 6 |     df = pd.read_csv(csv_file)
 7 | 
 8 |     df = df.groupby('species').mean()
 9 |     df.plot.bar()
10 | 
11 |     plt.show()
12 | 


--------------------------------------------------------------------------------
/Week4_Visualization/activity_3.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import pandas as pd
 3 | 
 4 | if __name__ == '__main__':
 5 |     csv_file = 'iris.csv'
 6 |     df = pd.read_csv(csv_file)
 7 | 
 8 |     # divide the dataset into three dataframes based on the species
 9 |     setosa_df = df.query('species == "setosa"')
10 |     versicolor_df = df.query('species == "versicolor"')
11 |     virginica_df = df.query('species == "virginica"')
12 | 
13 |     # Plot a scatter chart using x='sepal_length', y='sepal_width', and separate colors for each of the three dataframes
14 |     ax = setosa_df.plot.scatter(x='sepal_length', y='sepal_width', label='setosa')
15 |     ax = versicolor_df.plot.scatter(x='sepal_length', y='sepal_width', label='versicolor', color='green', ax=ax)
16 |     ax = virginica_df.plot.scatter(x='sepal_length', y='sepal_width', label='virginica', color='red', ax=ax)
17 | 
18 |     # Plot a scatter chart using x='petal_length', y='petal_width', and separate colors for each of the three dataframes
19 |     ax = setosa_df.plot.scatter(x='petal_length', y='petal_width', label='setosa')
20 |     ax = versicolor_df.plot.scatter(x='petal_length', y='petal_width', label='versicolor', color='green', ax=ax)
21 |     ax = virginica_df.plot.scatter(x='petal_length', y='petal_width', label='virginica', color='red', ax=ax)
22 | 
23 |     plt.show()
24 | 


--------------------------------------------------------------------------------
/Week4_Visualization/activity_4.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import pandas as pd
 3 | 
 4 | if __name__ == '__main__':
 5 |     csv_file = 'iris.csv'
 6 |     df = pd.read_csv(csv_file)
 7 | 
 8 |     # divide the dataset into three dataframes based on the species
 9 |     setosa_df = df.query('species == "setosa"')
10 |     versicolor_df = df.query('species == "versicolor"')
11 |     virginica_df = df.query('species == "virginica"')
12 | 
13 |     fig, axes = plt.subplots(nrows=1, ncols=2)
14 | 
15 |     # Plot a scatter chart using x='sepal_length', y='sepal_width', and separate colors for each of the three dataframes
16 |     ax = setosa_df.plot.scatter(x='sepal_length', y='sepal_width', label='setosa', ax=axes[0])
17 |     ax = versicolor_df.plot.scatter(x='sepal_length', y='sepal_width', label='versicolor', color='green', ax=ax)
18 |     ax = virginica_df.plot.scatter(x='sepal_length', y='sepal_width', label='virginica', color='red', ax=ax)
19 | 
20 |     # Plot a scatter chart using x='petal_length', y='petal_width', and separate colors for each of the three dataframes
21 |     ax = setosa_df.plot.scatter(x='petal_length', y='petal_width', label='setosa', ax=axes[1])
22 |     ax = versicolor_df.plot.scatter(x='petal_length', y='petal_width', label='versicolor', color='green', ax=ax)
23 |     ax = virginica_df.plot.scatter(x='petal_length', y='petal_width', label='virginica', color='red', ax=ax)
24 | 
25 | 
26 |     plt.show()
27 | 


--------------------------------------------------------------------------------
/Week4_Visualization/iris.csv:
--------------------------------------------------------------------------------
  1 | sepal_length,sepal_width,petal_length,petal_width,species
  2 | 5.1,3.5,1.4,0.2,setosa
  3 | 4.9,3.0,1.4,0.2,setosa
  4 | 4.7,3.2,1.3,0.2,setosa
  5 | 4.6,3.1,1.5,0.2,setosa
  6 | 5.0,3.6,1.4,0.2,setosa
  7 | 5.4,3.9,1.7,0.4,setosa
  8 | 4.6,3.4,1.4,0.3,setosa
  9 | 5.0,3.4,1.5,0.2,setosa
 10 | 4.4,2.9,1.4,0.2,setosa
 11 | 4.9,3.1,1.5,0.1,setosa
 12 | 5.4,3.7,1.5,0.2,setosa
 13 | 4.8,3.4,1.6,0.2,setosa
 14 | 4.8,3.0,1.4,0.1,setosa
 15 | 4.3,3.0,1.1,0.1,setosa
 16 | 5.8,4.0,1.2,0.2,setosa
 17 | 5.7,4.4,1.5,0.4,setosa
 18 | 5.4,3.9,1.3,0.4,setosa
 19 | 5.1,3.5,1.4,0.3,setosa
 20 | 5.7,3.8,1.7,0.3,setosa
 21 | 5.1,3.8,1.5,0.3,setosa
 22 | 5.4,3.4,1.7,0.2,setosa
 23 | 5.1,3.7,1.5,0.4,setosa
 24 | 4.6,3.6,1.0,0.2,setosa
 25 | 5.1,3.3,1.7,0.5,setosa
 26 | 4.8,3.4,1.9,0.2,setosa
 27 | 5.0,3.0,1.6,0.2,setosa
 28 | 5.0,3.4,1.6,0.4,setosa
 29 | 5.2,3.5,1.5,0.2,setosa
 30 | 5.2,3.4,1.4,0.2,setosa
 31 | 4.7,3.2,1.6,0.2,setosa
 32 | 4.8,3.1,1.6,0.2,setosa
 33 | 5.4,3.4,1.5,0.4,setosa
 34 | 5.2,4.1,1.5,0.1,setosa
 35 | 5.5,4.2,1.4,0.2,setosa
 36 | 4.9,3.1,1.5,0.1,setosa
 37 | 5.0,3.2,1.2,0.2,setosa
 38 | 5.5,3.5,1.3,0.2,setosa
 39 | 4.9,3.1,1.5,0.1,setosa
 40 | 4.4,3.0,1.3,0.2,setosa
 41 | 5.1,3.4,1.5,0.2,setosa
 42 | 5.0,3.5,1.3,0.3,setosa
 43 | 4.5,2.3,1.3,0.3,setosa
 44 | 4.4,3.2,1.3,0.2,setosa
 45 | 5.0,3.5,1.6,0.6,setosa
 46 | 5.1,3.8,1.9,0.4,setosa
 47 | 4.8,3.0,1.4,0.3,setosa
 48 | 5.1,3.8,1.6,0.2,setosa
 49 | 4.6,3.2,1.4,0.2,setosa
 50 | 5.3,3.7,1.5,0.2,setosa
 51 | 5.0,3.3,1.4,0.2,setosa
 52 | 7.0,3.2,4.7,1.4,versicolor
 53 | 6.4,3.2,4.5,1.5,versicolor
 54 | 6.9,3.1,4.9,1.5,versicolor
 55 | 5.5,2.3,4.0,1.3,versicolor
 56 | 6.5,2.8,4.6,1.5,versicolor
 57 | 5.7,2.8,4.5,1.3,versicolor
 58 | 6.3,3.3,4.7,1.6,versicolor
 59 | 4.9,2.4,3.3,1.0,versicolor
 60 | 6.6,2.9,4.6,1.3,versicolor
 61 | 5.2,2.7,3.9,1.4,versicolor
 62 | 5.0,2.0,3.5,1.0,versicolor
 63 | 5.9,3.0,4.2,1.5,versicolor
 64 | 6.0,2.2,4.0,1.0,versicolor
 65 | 6.1,2.9,4.7,1.4,versicolor
 66 | 5.6,2.9,3.6,1.3,versicolor
 67 | 6.7,3.1,4.4,1.4,versicolor
 68 | 5.6,3.0,4.5,1.5,versicolor
 69 | 5.8,2.7,4.1,1.0,versicolor
 70 | 6.2,2.2,4.5,1.5,versicolor
 71 | 5.6,2.5,3.9,1.1,versicolor
 72 | 5.9,3.2,4.8,1.8,versicolor
 73 | 6.1,2.8,4.0,1.3,versicolor
 74 | 6.3,2.5,4.9,1.5,versicolor
 75 | 6.1,2.8,4.7,1.2,versicolor
 76 | 6.4,2.9,4.3,1.3,versicolor
 77 | 6.6,3.0,4.4,1.4,versicolor
 78 | 6.8,2.8,4.8,1.4,versicolor
 79 | 6.7,3.0,5.0,1.7,versicolor
 80 | 6.0,2.9,4.5,1.5,versicolor
 81 | 5.7,2.6,3.5,1.0,versicolor
 82 | 5.5,2.4,3.8,1.1,versicolor
 83 | 5.5,2.4,3.7,1.0,versicolor
 84 | 5.8,2.7,3.9,1.2,versicolor
 85 | 6.0,2.7,5.1,1.6,versicolor
 86 | 5.4,3.0,4.5,1.5,versicolor
 87 | 6.0,3.4,4.5,1.6,versicolor
 88 | 6.7,3.1,4.7,1.5,versicolor
 89 | 6.3,2.3,4.4,1.3,versicolor
 90 | 5.6,3.0,4.1,1.3,versicolor
 91 | 5.5,2.5,4.0,1.3,versicolor
 92 | 5.5,2.6,4.4,1.2,versicolor
 93 | 6.1,3.0,4.6,1.4,versicolor
 94 | 5.8,2.6,4.0,1.2,versicolor
 95 | 5.0,2.3,3.3,1.0,versicolor
 96 | 5.6,2.7,4.2,1.3,versicolor
 97 | 5.7,3.0,4.2,1.2,versicolor
 98 | 5.7,2.9,4.2,1.3,versicolor
 99 | 6.2,2.9,4.3,1.3,versicolor
100 | 5.1,2.5,3.0,1.1,versicolor
101 | 5.7,2.8,4.1,1.3,versicolor
102 | 6.3,3.3,6.0,2.5,virginica
103 | 5.8,2.7,5.1,1.9,virginica
104 | 7.1,3.0,5.9,2.1,virginica
105 | 6.3,2.9,5.6,1.8,virginica
106 | 6.5,3.0,5.8,2.2,virginica
107 | 7.6,3.0,6.6,2.1,virginica
108 | 4.9,2.5,4.5,1.7,virginica
109 | 7.3,2.9,6.3,1.8,virginica
110 | 6.7,2.5,5.8,1.8,virginica
111 | 7.2,3.6,6.1,2.5,virginica
112 | 6.5,3.2,5.1,2.0,virginica
113 | 6.4,2.7,5.3,1.9,virginica
114 | 6.8,3.0,5.5,2.1,virginica
115 | 5.7,2.5,5.0,2.0,virginica
116 | 5.8,2.8,5.1,2.4,virginica
117 | 6.4,3.2,5.3,2.3,virginica
118 | 6.5,3.0,5.5,1.8,virginica
119 | 7.7,3.8,6.7,2.2,virginica
120 | 7.7,2.6,6.9,2.3,virginica
121 | 6.0,2.2,5.0,1.5,virginica
122 | 6.9,3.2,5.7,2.3,virginica
123 | 5.6,2.8,4.9,2.0,virginica
124 | 7.7,2.8,6.7,2.0,virginica
125 | 6.3,2.7,4.9,1.8,virginica
126 | 6.7,3.3,5.7,2.1,virginica
127 | 7.2,3.2,6.0,1.8,virginica
128 | 6.2,2.8,4.8,1.8,virginica
129 | 6.1,3.0,4.9,1.8,virginica
130 | 6.4,2.8,5.6,2.1,virginica
131 | 7.2,3.0,5.8,1.6,virginica
132 | 7.4,2.8,6.1,1.9,virginica
133 | 7.9,3.8,6.4,2.0,virginica
134 | 6.4,2.8,5.6,2.2,virginica
135 | 6.3,2.8,5.1,1.5,virginica
136 | 6.1,2.6,5.6,1.4,virginica
137 | 7.7,3.0,6.1,2.3,virginica
138 | 6.3,3.4,5.6,2.4,virginica
139 | 6.4,3.1,5.5,1.8,virginica
140 | 6.0,3.0,4.8,1.8,virginica
141 | 6.9,3.1,5.4,2.1,virginica
142 | 6.7,3.1,5.6,2.4,virginica
143 | 6.9,3.1,5.1,2.3,virginica
144 | 5.8,2.7,5.1,1.9,virginica
145 | 6.8,3.2,5.9,2.3,virginica
146 | 6.7,3.3,5.7,2.5,virginica
147 | 6.7,3.0,5.2,2.3,virginica
148 | 6.3,2.5,5.0,1.9,virginica
149 | 6.5,3.0,5.2,2.0,virginica
150 | 6.2,3.4,5.4,2.3,virginica
151 | 5.9,3.0,5.1,1.8,virginica


--------------------------------------------------------------------------------
/Week5_Flask/Books.csv:
--------------------------------------------------------------------------------
 1 | Identifier,Edition Statement,Place of Publication,Date of Publication,Publisher,Title,Author,Contributors,Corporate Author,Corporate Contributors,Former owner,Engraver,Issuance type,Flickr URL,Shelfmarks
 2 | 000000206,,London,1879 [1878],S. Tinsley & Co.,Walter Forbes. [A novel.] By A. A,A. A.,"FORBES, Walter.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000206,British Library HMNTS 12641.b.30.
 3 | 000000216,,London; Virtue & Yorston,1868,Virtue & Co.,"All for Greed. [A novel. The dedication signed: A. A. A., i.e. Marie Pauline Rose, Baroness Blaze de Bury.]","A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000216,British Library HMNTS 12626.cc.2.
 4 | 000000218,,London,1869,"Bradbury, Evans & Co.","Love the Avenger. By the author of “All for Greed.” [The dedication signed: A. A. A., i.e. Marie Pauline Rose, Baroness Blaze de Bury.]","A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000218,British Library HMNTS 12625.dd.1.
 5 | 000000472,,London,1851,James Darling,"Welsh Sketches, chiefly ecclesiastical, to the close of the twelfth century. By the author of “Proposals for Christian Union” (E. S. A. [i.e. Ernest Appleyard])","A., E. S.","Appleyard, Ernest Silvanus.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000472,British Library HMNTS 10369.bbb.15.
 6 | 000000480,"A new edition, revised, etc.",London,1857,Wertheim & Macintosh,"[The World in which I live, and my place in it. By E. S. A. [i.e. Letitia Willgoss Stone.] Edited by ... J. H. Broome.]","A., E. S.","BROOME, John Henry.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000480,British Library HMNTS 9007.d.28.
 7 | 000000481,"Fourth edition, revised, etc.",London,1875,William Macintosh,"[The World in which I live, and my place in it. By E. S. A. [i.e. Letitia Willgoss Stone.] Edited by ... J. H. Broome.]","A., E. S.","BROOME, John Henry.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000481,British Library HMNTS 9006.ee.10.
 8 | 000000519,,London,1872,The Author,Lagonells. By the author of Darmayne (F. E. A. [i.e. Florence Emily Ashley]),"A., F. E.","ASHLEY, Florence Emily.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000519,British Library HMNTS 12637.e.3.
 9 | 000000667,,"Puerto Rico",,,"The Coming of Spring, and other poems. By J. A. [i.e. J. Andrews.]","A., J.|A., J.","ANDREWS, J. - Writer of Verse",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000667,British Library HMNTS 011652.g.73.
10 | 000000874,,London],1676,,"A Warning to the inhabitants of England, and London in particular ... By M. A. [i.e. Mary Adams.]",Remaʿ.,"ADAMS, Mary.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000874,British Library HMNTS 11645.bb.42.
11 | 000001143,,London,1676,,A Satyr against Vertue. (A poem: supposed to be spoken by a Town-Hector. [By John Oldham. The preface signed: T. A.]),"A., T.","OLDHAM, John.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001143,British Library HMNTS 11602.ee.10.(2.)
12 | 000001280,,Coventry,1802,Printed by J. Turner,"An Account of the many and great Loans, Benefactions and Charities, belonging to the City of Coventry ... A new edition. [The dedication signed: AB, CD, EF, GH, &c. By Edward Jackson and Samuel Carte.]",,"CARTE, Samuel.|JACKSON, Edward - Rector of Southam, and CARTE (Samuel)",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001280,British Library HMNTS 1430.g.17.
13 | 000001808,,Christiania,1859,,"Erindringer som Bidrag til Norges Historie fra 1800-1815. Anden Udgave ... Udgivet med nogle Rettelser og Tillæg af Christian C. A. Lange. Med Forfatterens Portraet, og hans Biographi af Amtmand J. C. Aall","AALL, Jacob.","AALL, J. C.|LANGE, Christian Christoph Andreas.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001808,British Library HMNTS 9425.cc.37.
14 | 000001905,,Firenze,1888,,Gli Studi storici in terra d'Otranto ... Frammenti estratti in gran parte dall' Archivio Storico Italiano ... a cura e spese di L(uigi) G(iuseppe) D(e) S(imone),"AAR, Ermanno - pseud. [i.e. Luigi Giuseppe Oronzo Mariano Raffaele Francesco Fortunato Felice de Simone.]","S., L. G. D.|SIMONE, Luigi Giuseppe Oronzo Mariano Raffaele Francesco Fortunato Felice de.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001905,British Library HMNTS 10136.g.22.
15 | 000001929,,Amsterdam,"1676, 38-54",,De Aardbol. Magazijn van hedendaagsche land- en volkenkunde ... Met platen en kaarten. [Deel 4-9 by P. H. W.],,"WITKAMP, Pieter Harme.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001929,British Library HMNTS 10002.g.16-19.
16 | 000002836,,Savona,1888,,Cronache Savonesi dal 1500 al 1570 ... Accresciute di documenti inediti pubblicate e annotate dal dott. G. Assereto,"ABATE, Giovanni Agostino.","ASSERETO, Giovanni.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002836,British Library HMNTS 10136.h.24.
17 | 000002854,,London,1888,E. Moxon & Co.,"See-Saw; a novel ... Edited [or rather, written] by W. W. Reade","ABATI, Francesco.","READE, William Winwood.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002854,British Library HMNTS 12623.bbb.12.
18 | 000002956,,Paris,1860-63,,"Géodésie d'une partie de la Haute Éthiopie, revue et rédigée par R. Radau. fasc. 1-3","ABBADIE, Antoine Thompson d'.","RADAU, Rodolphe.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002956,British Library HMNTS 10096.i.19.
19 | 000002957,,Paris,1873,,[With eleven maps.],"ABBADIE, Antoine Thompson d'.","RADAU, Rodolphe.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002957,British Library HMNTS 10095.i.13.
20 | 000003017,"Nueva edicion, anotada ... y continuada ... por J. J. de Acosta y Calbo.",Puerto-Rico,1866,,"[Historia geográfica, civil y politica de la Isla de S. Juan Bautista de Puerto Rico, Dala a luz A. Valladares de Sotomayor.]","ABBAD Y LASIERRA, Agustín Íñigo - Bishop of Barbastro","ACOSTA Y CALBO, José Julian de.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000003017,British Library HMNTS 10480.h.18.
21 | 000003131,,New York,1899,W. Abbatt,"The Crisis of the Revolution, being the story of Arnold and André now for the first time collected from all sources, and illustrated with views of all places identified with it ... Illustrations from original photographs by E. S. Bennett, etc","ABBATT, William.","ANDRÉ, John - Major|ARNOLD, Benedict.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000003131,British Library HMNTS 9603.g.14.


--------------------------------------------------------------------------------
/Week5_Flask/activity_1.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from flask import Flask
 3 | from flask_restx import Resource, Api
 4 | 
 5 | app = Flask(__name__)
 6 | api = Api(app)
 7 | 
 8 | 
 9 | @api.route('/books/<int:id>')
10 | class Books(Resource):
11 |     def get(self, id):
12 |         if id not in df.index:
13 |             api.abort(404, "Book {} doesn't exist".format(id))
14 | 
15 |         book = dict(df.loc[id])
16 |         return book
17 | 
18 | 
19 | if __name__ == '__main__':
20 |     columns_to_drop = ['Edition Statement',
21 |                        'Corporate Author',
22 |                        'Corporate Contributors',
23 |                        'Former owner',
24 |                        'Engraver',
25 |                        'Contributors',
26 |                        'Issuance type',
27 |                        'Shelfmarks'
28 |                        ]
29 |     csv_file = "Books.csv"
30 |     df = pd.read_csv(csv_file)
31 | 
32 |     # drop unnecessary columns
33 |     df.drop(columns_to_drop, inplace=True, axis=1)
34 | 
35 |     # clean the date of publication & convert it to numeric data
36 |     new_date = df['Date of Publication'].str.extract(r'^(\d{4})', expand=False)
37 |     new_date = pd.to_numeric(new_date)
38 |     new_date = new_date.fillna(0)
39 |     df['Date of Publication'] = new_date
40 | 
41 |     # replace spaces in the name of columns
42 |     df.columns = [c.replace(' ', '_') for c in df.columns]
43 | 
44 |     # set the index column; this will help us to find books with their ids
45 |     df.set_index('Identifier', inplace=True)
46 | 
47 |     # run the application
48 |     app.run(debug=True)
49 | 


--------------------------------------------------------------------------------
/Week5_Flask/activity_2.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from flask import Flask
 3 | from flask_restx import Resource, Api
 4 | 
 5 | app = Flask(__name__)
 6 | api = Api(app)
 7 | 
 8 | 
 9 | @api.route('/books/<int:id>')
10 | class Books(Resource):
11 |     def get(self, id):
12 |         if id not in df.index:
13 |             api.abort(404, "Book {} doesn't exist".format(id))
14 | 
15 |         book = dict(df.loc[id])
16 |         return book
17 | 
18 |     def delete(self, id):
19 |         if id not in df.index:
20 |             api.abort(404, "Book {} doesn't exist".format(id))
21 | 
22 |         df.drop(id, inplace=True)
23 |         return {"message": "Book {} is removed.".format(id)}, 200
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     columns_to_drop = ['Edition Statement',
28 |                        'Corporate Author',
29 |                        'Corporate Contributors',
30 |                        'Former owner',
31 |                        'Engraver',
32 |                        'Contributors',
33 |                        'Issuance type',
34 |                        'Shelfmarks'
35 |                        ]
36 |     csv_file = "Books.csv"
37 |     df = pd.read_csv(csv_file)
38 | 
39 |     # drop unnecessary columns
40 |     df.drop(columns_to_drop, inplace=True, axis=1)
41 | 
42 |     # clean the date of publication & convert it to numeric data
43 |     new_date = df['Date of Publication'].str.extract(r'^(\d{4})', expand=False)
44 |     new_date = pd.to_numeric(new_date)
45 |     new_date = new_date.fillna(0)
46 |     df['Date of Publication'] = new_date
47 | 
48 |     # replace spaces in the name of columns
49 |     df.columns = [c.replace(' ', '_') for c in df.columns]
50 | 
51 |     # set the index column; this will help us to find books with their ids
52 |     df.set_index('Identifier', inplace=True)
53 | 
54 |     # run the application
55 |     app.run(debug=True)
56 | 


--------------------------------------------------------------------------------
/Week5_Flask/activity_3.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from flask import Flask
 3 | from flask import request
 4 | from flask_restx import Resource, Api
 5 | from flask_restx import fields
 6 | 
 7 | app = Flask(__name__)
 8 | api = Api(app)
 9 | 
10 | # The following is the schema of Book
11 | book_model = api.model('Book', {
12 |     'Flickr_URL': fields.String,
13 |     'Publisher': fields.String,
14 |     'Author': fields.String,
15 |     'Title': fields.String,
16 |     'Date_of_Publication': fields.Integer,
17 |     'Identifier': fields.Integer,
18 |     'Place_of_Publication': fields.String
19 | })
20 | 
21 | 
22 | @api.route('/books/<int:id>')
23 | class Books(Resource):
24 |     def get(self, id):
25 |         if id not in df.index:
26 |             api.abort(404, "Book {} doesn't exist".format(id))
27 | 
28 |         book = dict(df.loc[id])
29 |         return book
30 | 
31 |     def delete(self, id):
32 |         if id not in df.index:
33 |             api.abort(404, "Book {} doesn't exist".format(id))
34 | 
35 |         df.drop(id, inplace=True)
36 |         return {"message": "Book {} is removed.".format(id)}, 200
37 | 
38 |     @api.expect(book_model)
39 |     def put(self, id):
40 | 
41 |         if id not in df.index:
42 |             api.abort(404, "Book {} doesn't exist".format(id))
43 | 
44 |         # get the payload and convert it to a JSON
45 |         book = request.json
46 | 
47 |         # Book ID cannot be changed
48 |         if 'Identifier' in book and id != book['Identifier']:
49 |             return {"message": "Identifier cannot be changed".format(id)}, 400
50 | 
51 |         # Update the values
52 |         for key in book:
53 |             if key not in book_model.keys():
54 |                 # unexpected column
55 |                 return {"message": "Property {} is invalid".format(key)}, 400
56 |             df.loc[id, key] = book[key]
57 | 
58 |         # df.append(book, ignore_index=True)
59 |         return {"message": "Book {} has been successfully updated".format(id)}, 200
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     columns_to_drop = ['Edition Statement',
64 |                        'Corporate Author',
65 |                        'Corporate Contributors',
66 |                        'Former owner',
67 |                        'Engraver',
68 |                        'Contributors',
69 |                        'Issuance type',
70 |                        'Shelfmarks'
71 |                        ]
72 |     csv_file = "Books.csv"
73 |     df = pd.read_csv(csv_file)
74 | 
75 |     # drop unnecessary columns
76 |     df.drop(columns_to_drop, inplace=True, axis=1)
77 | 
78 |     # clean the date of publication & convert it to numeric data
79 |     new_date = df['Date of Publication'].str.extract(r'^(\d{4})', expand=False)
80 |     new_date = pd.to_numeric(new_date)
81 |     new_date = new_date.fillna(0)
82 |     df['Date of Publication'] = new_date
83 | 
84 |     # replace spaces in the name of columns
85 |     df.columns = [c.replace(' ', '_') for c in df.columns]
86 | 
87 |     # set the index column; this will help us to find books with their ids
88 |     df.set_index('Identifier', inplace=True)
89 | 
90 |     # run the application
91 |     app.run(debug=True)
92 | 


--------------------------------------------------------------------------------
/Week6_Flask2/Books.csv:
--------------------------------------------------------------------------------
 1 | Identifier,Edition Statement,Place of Publication,Date of Publication,Publisher,Title,Author,Contributors,Corporate Author,Corporate Contributors,Former owner,Engraver,Issuance type,Flickr URL,Shelfmarks
 2 | 000000206,,London,1879 [1878],S. Tinsley & Co.,Walter Forbes. [A novel.] By A. A,A. A.,"FORBES, Walter.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000206,British Library HMNTS 12641.b.30.
 3 | 000000216,,London; Virtue & Yorston,1868,Virtue & Co.,"All for Greed. [A novel. The dedication signed: A. A. A., i.e. Marie Pauline Rose, Baroness Blaze de Bury.]","A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000216,British Library HMNTS 12626.cc.2.
 4 | 000000218,,London,1869,"Bradbury, Evans & Co.","Love the Avenger. By the author of “All for Greed.” [The dedication signed: A. A. A., i.e. Marie Pauline Rose, Baroness Blaze de Bury.]","A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000218,British Library HMNTS 12625.dd.1.
 5 | 000000472,,London,1851,James Darling,"Welsh Sketches, chiefly ecclesiastical, to the close of the twelfth century. By the author of “Proposals for Christian Union” (E. S. A. [i.e. Ernest Appleyard])","A., E. S.","Appleyard, Ernest Silvanus.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000472,British Library HMNTS 10369.bbb.15.
 6 | 000000480,"A new edition, revised, etc.",London,1857,Wertheim & Macintosh,"[The World in which I live, and my place in it. By E. S. A. [i.e. Letitia Willgoss Stone.] Edited by ... J. H. Broome.]","A., E. S.","BROOME, John Henry.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000480,British Library HMNTS 9007.d.28.
 7 | 000000481,"Fourth edition, revised, etc.",London,1875,William Macintosh,"[The World in which I live, and my place in it. By E. S. A. [i.e. Letitia Willgoss Stone.] Edited by ... J. H. Broome.]","A., E. S.","BROOME, John Henry.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000481,British Library HMNTS 9006.ee.10.
 8 | 000000519,,London,1872,The Author,Lagonells. By the author of Darmayne (F. E. A. [i.e. Florence Emily Ashley]),"A., F. E.","ASHLEY, Florence Emily.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000519,British Library HMNTS 12637.e.3.
 9 | 000000667,,"Puerto Rico",,,"The Coming of Spring, and other poems. By J. A. [i.e. J. Andrews.]","A., J.|A., J.","ANDREWS, J. - Writer of Verse",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000667,British Library HMNTS 011652.g.73.
10 | 000000874,,London],1676,,"A Warning to the inhabitants of England, and London in particular ... By M. A. [i.e. Mary Adams.]",Remaʿ.,"ADAMS, Mary.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000874,British Library HMNTS 11645.bb.42.
11 | 000001143,,London,1676,,A Satyr against Vertue. (A poem: supposed to be spoken by a Town-Hector. [By John Oldham. The preface signed: T. A.]),"A., T.","OLDHAM, John.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001143,British Library HMNTS 11602.ee.10.(2.)
12 | 000001280,,Coventry,1802,Printed by J. Turner,"An Account of the many and great Loans, Benefactions and Charities, belonging to the City of Coventry ... A new edition. [The dedication signed: AB, CD, EF, GH, &c. By Edward Jackson and Samuel Carte.]",,"CARTE, Samuel.|JACKSON, Edward - Rector of Southam, and CARTE (Samuel)",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001280,British Library HMNTS 1430.g.17.
13 | 000001808,,Christiania,1859,,"Erindringer som Bidrag til Norges Historie fra 1800-1815. Anden Udgave ... Udgivet med nogle Rettelser og Tillæg af Christian C. A. Lange. Med Forfatterens Portraet, og hans Biographi af Amtmand J. C. Aall","AALL, Jacob.","AALL, J. C.|LANGE, Christian Christoph Andreas.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001808,British Library HMNTS 9425.cc.37.
14 | 000001905,,Firenze,1888,,Gli Studi storici in terra d'Otranto ... Frammenti estratti in gran parte dall' Archivio Storico Italiano ... a cura e spese di L(uigi) G(iuseppe) D(e) S(imone),"AAR, Ermanno - pseud. [i.e. Luigi Giuseppe Oronzo Mariano Raffaele Francesco Fortunato Felice de Simone.]","S., L. G. D.|SIMONE, Luigi Giuseppe Oronzo Mariano Raffaele Francesco Fortunato Felice de.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001905,British Library HMNTS 10136.g.22.
15 | 000001929,,Amsterdam,"1676, 38-54",,De Aardbol. Magazijn van hedendaagsche land- en volkenkunde ... Met platen en kaarten. [Deel 4-9 by P. H. W.],,"WITKAMP, Pieter Harme.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001929,British Library HMNTS 10002.g.16-19.
16 | 000002836,,Savona,1888,,Cronache Savonesi dal 1500 al 1570 ... Accresciute di documenti inediti pubblicate e annotate dal dott. G. Assereto,"ABATE, Giovanni Agostino.","ASSERETO, Giovanni.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002836,British Library HMNTS 10136.h.24.
17 | 000002854,,London,1888,E. Moxon & Co.,"See-Saw; a novel ... Edited [or rather, written] by W. W. Reade","ABATI, Francesco.","READE, William Winwood.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002854,British Library HMNTS 12623.bbb.12.
18 | 000002956,,Paris,1860-63,,"Géodésie d'une partie de la Haute Éthiopie, revue et rédigée par R. Radau. fasc. 1-3","ABBADIE, Antoine Thompson d'.","RADAU, Rodolphe.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002956,British Library HMNTS 10096.i.19.
19 | 000002957,,Paris,1873,,[With eleven maps.],"ABBADIE, Antoine Thompson d'.","RADAU, Rodolphe.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002957,British Library HMNTS 10095.i.13.
20 | 000003017,"Nueva edicion, anotada ... y continuada ... por J. J. de Acosta y Calbo.",Puerto-Rico,1866,,"[Historia geográfica, civil y politica de la Isla de S. Juan Bautista de Puerto Rico, Dala a luz A. Valladares de Sotomayor.]","ABBAD Y LASIERRA, Agustín Íñigo - Bishop of Barbastro","ACOSTA Y CALBO, José Julian de.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000003017,British Library HMNTS 10480.h.18.
21 | 000003131,,New York,1899,W. Abbatt,"The Crisis of the Revolution, being the story of Arnold and André now for the first time collected from all sources, and illustrated with views of all places identified with it ... Illustrations from original photographs by E. S. Bennett, etc","ABBATT, William.","ANDRÉ, John - Major|ARNOLD, Benedict.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000003131,British Library HMNTS 9603.g.14.


--------------------------------------------------------------------------------
/Week6_Flask2/activity_1.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | import pandas as pd
  4 | from flask import Flask
  5 | from flask import request
  6 | from flask_restx import Resource, Api
  7 | from flask_restx import fields
  8 | from flask_restx import inputs
  9 | from flask_restx import reqparse
 10 | 
 11 | app = Flask(__name__)
 12 | api = Api(app)
 13 | 
 14 | # The following is the schema of Book
 15 | book_model = api.model('Book', {
 16 |     'Flickr_URL': fields.String,
 17 |     'Publisher': fields.String,
 18 |     'Author': fields.String,
 19 |     'Title': fields.String,
 20 |     'Date_of_Publication': fields.Integer,
 21 |     'Identifier': fields.Integer,
 22 |     'Place_of_Publication': fields.String
 23 | })
 24 | 
 25 | parser = reqparse.RequestParser()
 26 | parser.add_argument('order', choices=list(column for column in book_model.keys()))
 27 | parser.add_argument('ascending', type=inputs.boolean)
 28 | 
 29 | 
 30 | @api.route('/books')
 31 | class BooksList(Resource):
 32 | 
 33 |     @api.expect(parser)
 34 |     def get(self):
 35 |         # get books as JSON string
 36 |         args = parser.parse_args()
 37 | 
 38 |         # retrieve the query parameters
 39 |         order_by = args.get('order')
 40 |         ascending = args.get('ascending', True)
 41 | 
 42 |         if order_by:
 43 |             df.sort_values(by=order_by, inplace=True, ascending=ascending)
 44 | 
 45 |         json_str = df.to_json(orient='index')
 46 | 
 47 |         # convert the string JSON to a real JSON
 48 |         ds = json.loads(json_str)
 49 |         ret = []
 50 | 
 51 |         for idx in ds:
 52 |             book = ds[idx]
 53 |             book['Identifier'] = int(idx)
 54 |             ret.append(book)
 55 | 
 56 |         return ret
 57 | 
 58 | 
 59 | @api.route('/books/<int:id>')
 60 | class Books(Resource):
 61 | 
 62 |     def get(self, id):
 63 |         if id not in df.index:
 64 |             api.abort(404, "Book {} doesn't exist".format(id))
 65 | 
 66 |         book = dict(df.loc[id])
 67 |         return book
 68 | 
 69 |     def delete(self, id):
 70 |         if id not in df.index:
 71 |             api.abort(404, "Book {} doesn't exist".format(id))
 72 | 
 73 |         df.drop(id, inplace=True)
 74 |         return {"message": "Book {} is removed.".format(id)}, 200
 75 | 
 76 |     @api.expect(book_model)
 77 |     def put(self, id):
 78 | 
 79 |         if id not in df.index:
 80 |             api.abort(404, "Book {} doesn't exist".format(id))
 81 | 
 82 |         # get the payload and convert it to a JSON
 83 |         book = request.json
 84 | 
 85 |         # Book ID cannot be changed
 86 |         if 'Identifier' in book and id != book['Identifier']:
 87 |             return {"message": "Identifier cannot be changed".format(id)}, 400
 88 | 
 89 |         # Update the values
 90 |         for key in book:
 91 |             if key not in book_model.keys():
 92 |                 # unexpected column
 93 |                 return {"message": "Property {} is invalid".format(key)}, 400
 94 |             df.loc[id, key] = book[key]
 95 | 
 96 |         df.append(book, ignore_index=True)
 97 |         return {"message": "Book {} has been successfully updated".format(id)}, 200
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     columns_to_drop = ['Edition Statement',
102 |                        'Corporate Author',
103 |                        'Corporate Contributors',
104 |                        'Former owner',
105 |                        'Engraver',
106 |                        'Contributors',
107 |                        'Issuance type',
108 |                        'Shelfmarks'
109 |                        ]
110 |     csv_file = "Books.csv"
111 |     df = pd.read_csv(csv_file)
112 | 
113 |     # drop unnecessary columns
114 |     df.drop(columns_to_drop, inplace=True, axis=1)
115 | 
116 |     # clean the date of publication & convert it to numeric data
117 |     new_date = df['Date of Publication'].str.extract(r'^(\d{4})', expand=False)
118 |     new_date = pd.to_numeric(new_date)
119 |     new_date = new_date.fillna(0)
120 |     df['Date of Publication'] = new_date
121 | 
122 |     # replace spaces in the name of columns
123 |     df.columns = [c.replace(' ', '_') for c in df.columns]
124 | 
125 |     # set the index column; this will help us to find books with their ids
126 |     df.set_index('Identifier', inplace=True)
127 | 
128 |     # run the application
129 |     app.run(debug=True)
130 | 


--------------------------------------------------------------------------------
/Week6_Flask2/activity_2.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | import pandas as pd
  4 | from flask import Flask
  5 | from flask import request
  6 | from flask_restx import Resource, Api
  7 | from flask_restx import fields
  8 | from flask_restx import inputs
  9 | from flask_restx import reqparse
 10 | 
 11 | app = Flask(__name__)
 12 | api = Api(app)
 13 | 
 14 | # The following is the schema of Book
 15 | book_model = api.model('Book', {
 16 |     'Flickr_URL': fields.String,
 17 |     'Publisher': fields.String,
 18 |     'Author': fields.String,
 19 |     'Title': fields.String,
 20 |     'Date_of_Publication': fields.Integer,
 21 |     'Identifier': fields.Integer,
 22 |     'Place_of_Publication': fields.String
 23 | })
 24 | 
 25 | parser = reqparse.RequestParser()
 26 | parser.add_argument('order', choices=list(column for column in book_model.keys()))
 27 | parser.add_argument('ascending', type=inputs.boolean)
 28 | 
 29 | 
 30 | @api.route('/books')
 31 | class BooksList(Resource):
 32 | 
 33 |     @api.expect(parser)
 34 |     def get(self):
 35 |         # get books as JSON string
 36 |         args = parser.parse_args()
 37 | 
 38 |         # retrieve the query parameters
 39 |         order_by = args.get('order')
 40 |         ascending = args.get('ascending', True)
 41 | 
 42 |         if order_by:
 43 |             df.sort_values(by=order_by, inplace=True, ascending=ascending)
 44 | 
 45 |         json_str = df.to_json(orient='index')
 46 | 
 47 |         # convert the string JSON to a real JSON
 48 |         ds = json.loads(json_str)
 49 |         ret = []
 50 | 
 51 |         for idx in ds:
 52 |             book = ds[idx]
 53 |             book['Identifier'] = int(idx)
 54 |             ret.append(book)
 55 | 
 56 |         return ret
 57 | 
 58 |     @api.expect(book_model, validate=True)
 59 |     def post(self):
 60 |         book = request.json
 61 | 
 62 |         if 'Identifier' not in book:
 63 |             return {"message": "Missing Identifier"}, 400
 64 | 
 65 |         id = book['Identifier']
 66 | 
 67 |         # check if the given identifier does not exist
 68 |         if id in df.index:
 69 |             return {"message": "A book with Identifier={} is already in the dataset".format(id)}, 400
 70 | 
 71 |         # Put the values into the dataframe
 72 |         for key in book:
 73 |             if key not in book_model.keys():
 74 |                 # unexpected column
 75 |                 return {"message": "Property {} is invalid".format(key)}, 400
 76 |             df.loc[id, key] = book[key]
 77 | 
 78 |         # df.append(book, ignore_index=True)
 79 |         return {"message": "Book {} is created".format(id)}, 201
 80 | 
 81 | 
 82 | @api.route('/books/<int:id>')
 83 | class Books(Resource):
 84 |     def get(self, id):
 85 |         if id not in df.index:
 86 |             api.abort(404, "Book {} doesn't exist".format(id))
 87 | 
 88 |         book = dict(df.loc[id])
 89 |         return book
 90 | 
 91 |     def delete(self, id):
 92 |         if id not in df.index:
 93 |             api.abort(404, "Book {} doesn't exist".format(id))
 94 | 
 95 |         df.drop(id, inplace=True)
 96 |         return {"message": "Book {} is removed.".format(id)}, 200
 97 | 
 98 |     @api.expect(book_model)
 99 |     def put(self, id):
100 | 
101 |         if id not in df.index:
102 |             api.abort(404, "Book {} doesn't exist".format(id))
103 | 
104 |         # get the payload and convert it to a JSON
105 |         book = request.json
106 | 
107 |         # Book ID cannot be changed
108 |         if 'Identifier' in book and id != book['Identifier']:
109 |             return {"message": "Identifier cannot be changed".format(id)}, 400
110 | 
111 |         # Update the values
112 |         for key in book:
113 |             if key not in book_model.keys():
114 |                 # unexpected column
115 |                 return {"message": "Property {} is invalid".format(key)}, 400
116 |             df.loc[id, key] = book[key]
117 | 
118 |         df.append(book, ignore_index=True)
119 |         return {"message": "Book {} has been successfully updated".format(id)}, 200
120 | 
121 | 
122 | if __name__ == '__main__':
123 |     columns_to_drop = ['Edition Statement',
124 |                        'Corporate Author',
125 |                        'Corporate Contributors',
126 |                        'Former owner',
127 |                        'Engraver',
128 |                        'Contributors',
129 |                        'Issuance type',
130 |                        'Shelfmarks'
131 |                        ]
132 |     csv_file = "Books.csv"
133 |     df = pd.read_csv(csv_file)
134 | 
135 |     # drop unnecessary columns
136 |     df.drop(columns_to_drop, inplace=True, axis=1)
137 | 
138 |     # clean the date of publication & convert it to numeric data
139 |     new_date = df['Date of Publication'].str.extract(r'^(\d{4})', expand=False)
140 |     new_date = pd.to_numeric(new_date)
141 |     new_date = new_date.fillna(0)
142 |     df['Date of Publication'] = new_date
143 | 
144 |     # replace spaces in the name of columns
145 |     df.columns = [c.replace(' ', '_') for c in df.columns]
146 | 
147 |     # set the index column; this will help us to find books with their ids
148 |     df.set_index('Identifier', inplace=True)
149 | 
150 |     # run the application
151 |     app.run(debug=True)
152 | 


--------------------------------------------------------------------------------
/Week6_Flask2/activity_3.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | import pandas as pd
  4 | from flask import Flask
  5 | from flask import request
  6 | from flask_restx import Resource, Api
  7 | from flask_restx import fields
  8 | from flask_restx import inputs
  9 | from flask_restx import reqparse
 10 | 
 11 | app = Flask(__name__)
 12 | api = Api(app,
 13 |           default="Books",  # Default namespace
 14 |           title="Book Dataset",  # Documentation Title
 15 |           description="This is just a simple example to show how publish data as a service.")  # Documentation Description
 16 | 
 17 | # The following is the schema of Book
 18 | book_model = api.model('Book', {
 19 |     'Flickr_URL': fields.String(example="jeee"),
 20 |     'Publisher': fields.String,
 21 |     'Author': fields.String,
 22 |     'Title': fields.String,
 23 |     'Date_of_Publication': fields.Integer,
 24 |     'Identifier': fields.Integer,
 25 |     'Place_of_Publication': fields.String
 26 | })
 27 | 
 28 | parser = reqparse.RequestParser()
 29 | parser.add_argument('order', choices=list(column for column in book_model.keys()))
 30 | parser.add_argument('ascending', type=inputs.boolean)
 31 | 
 32 | 
 33 | @api.route('/books')
 34 | class BooksList(Resource):
 35 | 
 36 |     @api.response(200, 'Successful')
 37 |     @api.doc(description="Get all books")
 38 |     @api.expect(parser)
 39 |     def get(self):
 40 |         # get books as JSON string
 41 |         args = parser.parse_args()
 42 | 
 43 |         # retrieve the query parameters
 44 |         order_by = args.get('order')
 45 |         ascending = args.get('ascending', True)
 46 | 
 47 |         if order_by:
 48 |             df.sort_values(by=order_by, inplace=True, ascending=ascending)
 49 | 
 50 |         json_str = df.to_json(orient='index')
 51 | 
 52 |         # convert the string JSON to a real JSON
 53 |         ds = json.loads(json_str)
 54 |         ret = []
 55 | 
 56 |         for idx in ds:
 57 |             book = ds[idx]
 58 |             book['Identifier'] = int(idx)
 59 |             ret.append(book)
 60 | 
 61 |         return ret
 62 | 
 63 |     @api.response(201, 'Book Created Successfully')
 64 |     @api.response(400, 'Validation Error')
 65 |     @api.doc(description="Add a new book")
 66 |     @api.expect(book_model, validate=True)
 67 |     def post(self):
 68 |         book = request.json
 69 | 
 70 |         if 'Identifier' not in book:
 71 |             return {"message": "Missing Identifier"}, 400
 72 | 
 73 |         id = book['Identifier']
 74 | 
 75 |         # check if the given identifier does not exist
 76 |         if id in df.index:
 77 |             return {"message": "A book with Identifier={} is already in the dataset".format(id)}, 400
 78 | 
 79 |         # Put the values into the dataframe
 80 |         for key in book:
 81 |             if key not in book_model.keys():
 82 |                 # unexpected column
 83 |                 return {"message": "Property {} is invalid".format(key)}, 400
 84 |             df.loc[id, key] = book[key]
 85 | 
 86 |         # df.append(book, ignore_index=True)
 87 |         return {"message": "Book {} is created".format(id)}, 201
 88 | 
 89 | 
 90 | @api.route('/books/<int:id>')
 91 | @api.param('id', 'The Book identifier')
 92 | class Books(Resource):
 93 |     @api.response(404, 'Book was not found')
 94 |     @api.response(200, 'Successful')
 95 |     @api.doc(description="Get a book by its ID")
 96 |     def get(self, id):
 97 |         if id not in df.index:
 98 |             api.abort(404, "Book {} doesn't exist".format(id))
 99 | 
100 |         book = dict(df.loc[id])
101 |         return book
102 | 
103 |     @api.response(404, 'Book was not found')
104 |     @api.response(200, 'Successful')
105 |     @api.doc(description="Delete a book by its ID")
106 |     def delete(self, id):
107 |         if id not in df.index:
108 |             api.abort(404, "Book {} doesn't exist".format(id))
109 | 
110 |         df.drop(id, inplace=True)
111 |         return {"message": "Book {} is removed.".format(id)}, 200
112 | 
113 |     @api.response(404, 'Book was not found')
114 |     @api.response(400, 'Validation Error')
115 |     @api.response(200, 'Successful')
116 |     @api.expect(book_model, validate=True)
117 |     @api.doc(description="Update a book by its ID")
118 |     def put(self, id):
119 | 
120 |         if id not in df.index:
121 |             api.abort(404, "Book {} doesn't exist".format(id))
122 | 
123 |         # get the payload and convert it to a JSON
124 |         book = request.json
125 | 
126 |         # Book ID cannot be changed
127 |         if 'Identifier' in book and id != book['Identifier']:
128 |             return {"message": "Identifier cannot be changed".format(id)}, 400
129 | 
130 |         # Update the values
131 |         for key in book:
132 |             if key not in book_model.keys():
133 |                 # unexpected column
134 |                 return {"message": "Property {} is invalid".format(key)}, 400
135 |             df.loc[id, key] = book[key]
136 | 
137 |         df._append(book, ignore_index=True)
138 |         return {"message": "Book {} has been successfully updated".format(id)}, 200
139 | 
140 | 
141 | if __name__ == '__main__':
142 |     columns_to_drop = ['Edition Statement',
143 |                        'Corporate Author',
144 |                        'Corporate Contributors',
145 |                        'Former owner',
146 |                        'Engraver',
147 |                        'Contributors',
148 |                        'Issuance type',
149 |                        'Shelfmarks'
150 |                        ]
151 |     csv_file = "Books.csv"
152 |     df = pd.read_csv(csv_file)
153 | 
154 |     # drop unnecessary columns
155 |     df.drop(columns_to_drop, inplace=True, axis=1)
156 | 
157 |     # clean the date of publication & convert it to numeric data
158 |     new_date = df['Date of Publication'].str.extract(r'^(\d{4})', expand=False)
159 |     new_date = pd.to_numeric(new_date)
160 |     new_date = new_date.fillna(0)
161 |     df['Date of Publication'] = new_date
162 | 
163 |     # replace spaces in the name of columns
164 |     df.columns = [c.replace(' ', '_') for c in df.columns]
165 | 
166 |     # set the index column; this will help us to find books with their ids
167 |     df.set_index('Identifier', inplace=True)
168 | 
169 |     # run the application
170 |     app.run(debug=True, port="5000")
171 | 


--------------------------------------------------------------------------------
/Week7_Client/activity_1.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | 
 3 | 
 4 | def print_book(book):
 5 |     print("Book {")
 6 |     for key in book.keys():
 7 |         attr = str(key)
 8 |         # if isinstance(book[key], str):
 9 |         #     val = str(book[key].encode('utf-8'))
10 |         # else:
11 |         val = str(book[key])
12 | 
13 |         print("\t" + attr + ":" + val)
14 |     print("}")
15 | 
16 | 
17 | if __name__ == '__main__':
18 | 
19 |     r = requests.get("http://127.0.0.1:5000/books", params={'order': 'Date_of_Publication', 'ascending':True})
20 |     print("Status Code:" + str(r.status_code))
21 |     books = r.json()
22 |     for i in range(1, 5):
23 |         print_book(books[i])
24 | 


--------------------------------------------------------------------------------
/Week7_Client/activity_2.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | 
 3 | if __name__ == '__main__':
 4 |     book = {
 5 |         "Date_of_Publication": 2018,
 6 |         "Publisher": "UNSW",
 7 |         "Author": "Nobody",
 8 |         "Title": "Nothing",
 9 |         "Flickr_URL": "http://somewhere",
10 |         "Identifier": 2,
11 |         "Place_of_Publication": "Sydney"
12 |     }
13 | 
14 |     r = requests.post("http://127.0.0.1:5000/books", json=book)
15 | 
16 |     print("Status Code:" + str(r.status_code))
17 |     resp = r.json()
18 | 
19 |     print(resp['message'])
20 | 


--------------------------------------------------------------------------------
/Week7_Client/activity_3.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | 
 3 | 
 4 | def print_book(book):
 5 |     print("Book {")
 6 |     for key in book.keys():
 7 |         attr = str(key)
 8 |         val = str(book[key])
 9 |         print("\t" + attr + ":" + val)
10 | 
11 |     print("}")
12 | 
13 | 
14 | def get_book(id):
15 |     r = requests.get("http://127.0.0.1:5000/books/" + str(id))
16 |     book = r.json()
17 |     print("Get status Code:" + str(r.status_code))
18 |     if r.ok:
19 |         print_book(book)
20 |         return book
21 |     else:
22 |         print('Error:' + book['message'])
23 | 
24 | 
25 | if __name__ == '__main__':
26 | 
27 |     print("***** Book information before update *****")
28 |     book = get_book('206')
29 | 
30 |     # update the book information
31 |     print("***** Updating Book Information *****")
32 |     book['Author'] = 'Nobody'
33 |     book['Identifier'] = 206
34 |     book['Date_of_Publication']= 1879
35 |     print(book)
36 |     r = requests.put("http://127.0.0.1:5000/books/206", json=book)
37 |     print("Put status Code:" + str(r.status_code))
38 |     print(r.json()['message'])
39 | 
40 |     print("***** Book information after update *****")
41 |     book = get_book('206')
42 | 
43 | 


--------------------------------------------------------------------------------
/Week7_Client/activity_4.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | 
 3 | 
 4 | def print_book(book):
 5 |     print("Book {")
 6 |     for key in book.keys():
 7 |         attr = str(key)
 8 |         val = str(book[key])
 9 |         print("\t" + attr + ":" + val)
10 |     print("}")
11 | 
12 | 
13 | def get_book(id):
14 |     r = requests.get("http://127.0.0.1:5000/books/" + str(id))
15 |     book = r.json()
16 |     print("Get status Code:" + str(r.status_code))
17 |     if r.ok:
18 |         print_book(book)
19 |         return book
20 |     else:
21 |         print('Error:' + book['message'])
22 | 
23 | 
24 | def remove_book(id):
25 |     r = requests.delete("http://127.0.0.1:5000/books/"+id)
26 |     print("Delete status Code:" + str(r.status_code))
27 |     print(r.json()['message'])
28 | 
29 | if __name__ == '__main__':
30 | 
31 |     print("***** Book information before update *****")
32 |     book = get_book('206')
33 | 
34 |     # update the book information
35 |     print("***** Deleting Book *****")
36 |     remove_book('206')
37 | 
38 |     print("***** Book information after Delete *****")
39 |     book = get_book('206')
40 | 
41 | 


--------------------------------------------------------------------------------
/Week7_GraphQL/activity_1.py:
--------------------------------------------------------------------------------
 1 | from ariadne.constants import PLAYGROUND_HTML
 2 | from flask import Flask, request, jsonify
 3 | from ariadne import gql, QueryType, MutationType, make_executable_schema, graphql_sync
 4 | 
 5 | # Define types using Schema Definition Language (https://graphql.org/learn/schema/)
 6 | type_defs = gql(
 7 |     """
 8 |     type Books {
 9 |         title: String!
10 |         description: String!
11 |         author: String!
12 |         }  
13 |         
14 |     type Query {
15 |         books: [Books]
16 |     }
17 |     """
18 | )
19 | 
20 | query = QueryType()
21 | 
22 | # Define resolvers
23 | @query.field("books")
24 | def books(*_):
25 |     return [book.to_json() for book in books_db.values()]
26 | 
27 | 
28 | # Create executable schema
29 | schema = make_executable_schema(type_defs, [query])
30 | 
31 | # initialize flask app
32 | app = Flask(__name__)
33 | books_db = dict()
34 | 
35 | 
36 | class Books:
37 |     def __init__(self, id=None, title=None, description=None, author=None):
38 |         self.id = id
39 |         self.title = title
40 |         self.description = description
41 |         self.author = author
42 | 
43 |     def to_json(self):
44 |         return {
45 |             "title": self.title,
46 |             "description": self.description,
47 |             "author": self.author,
48 |         }
49 | 
50 | 
51 | # Create a GraphQL Playground UI for the GraphQL schema
52 | @app.route("/graphql", methods=["GET"])
53 | def graphql_playground():
54 |     return PLAYGROUND_HTML
55 | 
56 | 
57 | # Create a GraphQL endpoint for executing GraphQL queries
58 | @app.route("/graphql", methods=["POST"])
59 | def graphql_server():
60 |     data = request.get_json()
61 |     success, result = graphql_sync(schema, data, context_value={"request": request})
62 |     status_code = 200 if success else 400
63 |     return jsonify(result), status_code
64 | 
65 | 
66 | # Run the app
67 | if __name__ == "__main__":
68 |     books_db[1] = Books(len(books_db) + 1,"Data Services", "A Fake Book", "No Body")
69 |     books_db[2] = Books(len(books_db) + 1,"Advance Data Services", "A Fake Book", "No Body")
70 |     app.run(debug=True)
71 | 


--------------------------------------------------------------------------------
/Week7_GraphQL/activity_2.py:
--------------------------------------------------------------------------------
 1 | from ariadne.constants import PLAYGROUND_HTML
 2 | from flask import Flask, request, jsonify
 3 | from ariadne import gql, QueryType, MutationType, make_executable_schema, graphql_sync
 4 | 
 5 | # Define type definitions (schema) using SDL
 6 | type_defs = gql(
 7 |     """
 8 |     type Books {
 9 |         title: String!
10 |         description: String!
11 |         author: String!
12 |         }  
13 | 
14 |     type Query {
15 |         books: [Books]
16 |     }
17 | 
18 |     type Mutation{add_book(title: String!, description: String!, author: String!): Books}
19 |     """
20 | )
21 | 
22 | query = QueryType()
23 | mutation = MutationType()
24 | 
25 | 
26 | # Define resolvers
27 | @query.field("books")
28 | def books(*_):
29 |     return [book.to_json() for book in books_db.values()]
30 | 
31 | 
32 | @mutation.field("add_book")
33 | def add_book(_, info, title, description, author):
34 |     book = Books(len(books_db) + 1, title, description, author)
35 |     book.save()
36 |     return book.to_json()
37 | 
38 | 
39 | # Create executable schema
40 | schema = make_executable_schema(type_defs, [query, mutation])
41 | 
42 | # initialize flask app
43 | app = Flask(__name__)
44 | books_db = dict()
45 | 
46 | 
47 | class Books:
48 |     def __init__(self, id=None, title=None, description=None, author=None):
49 |         self.id = id
50 |         self.title = title
51 |         self.description = description
52 |         self.author = author
53 | 
54 |     def to_json(self):
55 |         return {
56 |             "title": self.title,
57 |             "description": self.description,
58 |             "author": self.author,
59 |         }
60 | 
61 |     def save(self):
62 |         if self.id is None:
63 |             self.id = len(books_db) + 1
64 |         books_db[self.id] = self
65 | 
66 | 
67 | # Create a GraphQL Playground UI for the GraphQL schema
68 | @app.route("/graphql", methods=["GET"])
69 | def graphql_playground():
70 |     return PLAYGROUND_HTML
71 | 
72 | 
73 | # Create a GraphQL endpoint for executing GraphQL queries
74 | @app.route("/graphql", methods=["POST"])
75 | def graphql_server():
76 |     data = request.get_json()
77 |     success, result = graphql_sync(schema, data, context_value={"request": request})
78 |     status_code = 200 if success else 400
79 |     return jsonify(result), status_code
80 | 
81 | 
82 | # Run the app
83 | if __name__ == "__main__":
84 |     app.run(debug=True)
85 | 


--------------------------------------------------------------------------------
/Week7_GraphQL/activity_3.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import subprocess
  3 | 
  4 | import flask_migrate.cli
  5 | from ariadne.constants import PLAYGROUND_HTML
  6 | from flask_migrate import Migrate
  7 | from flask_sqlalchemy import SQLAlchemy
  8 | from flask import Flask, request, jsonify
  9 | from ariadne import gql, QueryType, MutationType, make_executable_schema, graphql_sync
 10 | 
 11 | # Define type definitions (schema) using SDL
 12 | type_defs = gql(
 13 |     """
 14 |     type Books {
 15 |         title: String!
 16 |         description: String!
 17 |         author: String!
 18 |         }  
 19 | 
 20 |     type Query {
 21 |         books: [Books]
 22 |     }
 23 | 
 24 |     type Mutation{add_book(title: String!, description: String!, author: String!): Books}
 25 |     """
 26 | )
 27 | 
 28 | query = QueryType()
 29 | mutation = MutationType()
 30 | 
 31 | 
 32 | # Define resolvers
 33 | @query.field("books")
 34 | def books(*_):
 35 |     return [book.to_json() for book in Books.query.all()]
 36 | 
 37 | @mutation.field("add_book")
 38 | def add_book(_, info, title, description, author):
 39 |     book = Books(len(books_db) + 1, title, description, author)
 40 |     book.save()
 41 |     return book.to_json()
 42 | 
 43 | 
 44 | # Create executable schema
 45 | schema = make_executable_schema(type_defs, [query, mutation])
 46 | 
 47 | # initialize flask app
 48 | app = Flask(__name__)
 49 | app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///db.sqlite3"
 50 | app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False
 51 | db = SQLAlchemy(app)
 52 | migrate = Migrate(app, db)
 53 | books_db = dict()
 54 | 
 55 | 
 56 | class Books(db.Model):
 57 |     id = db.Column(db.Integer, primary_key=True)
 58 |     title = db.Column(db.String(100), nullable=False)
 59 |     author = db.Column(db.String(100), nullable=False)
 60 |     description = db.Column(db.String(255), nullable=False)
 61 | 
 62 |     def __init__(self, id=None, title=None, description=None, author=None):
 63 |         self.id = id
 64 |         self.title = title
 65 |         self.description = description
 66 |         self.author = author
 67 | 
 68 |     def to_json(self):
 69 |         return {
 70 |             "title": self.title,
 71 |             "description": self.description,
 72 |             "author": self.author,
 73 |         }
 74 | 
 75 |     def save(self):
 76 |         db.session.add(self)
 77 |         db.session.commit()
 78 | 
 79 | 
 80 | # Create a GraphQL Playground UI for the GraphQL schema
 81 | @app.route("/graphql", methods=["GET"])
 82 | def graphql_playground():
 83 |     return PLAYGROUND_HTML
 84 | 
 85 | 
 86 | # Create a GraphQL endpoint for executing GraphQL queries
 87 | @app.route("/graphql", methods=["POST"])
 88 | def graphql_server():
 89 |     data = request.get_json()
 90 |     success, result = graphql_sync(schema, data, context_value={"request": request})
 91 |     status_code = 200 if success else 400
 92 |     return jsonify(result), status_code
 93 | 
 94 | 
 95 | # Run the app
 96 | if __name__ == "__main__":
 97 | 
 98 |     """
 99 |     Run the following commands in a terminal before running the application to setup the database
100 |     """
101 |     # cd to the directory
102 |     # export FLASK_APP = activity_3.py
103 |     # flask db init
104 |     # flask db migrate
105 |     # flask db upgrade
106 | 
107 |     app.run(debug=True)
108 | 


--------------------------------------------------------------------------------
/Week8_Authentication/Books.csv:
--------------------------------------------------------------------------------
 1 | Identifier,Edition Statement,Place of Publication,Date of Publication,Publisher,Title,Author,Contributors,Corporate Author,Corporate Contributors,Former owner,Engraver,Issuance type,Flickr URL,Shelfmarks
 2 | 000000206,,London,1879 [1878],S. Tinsley & Co.,Walter Forbes. [A novel.] By A. A,A. A.,"FORBES, Walter.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000206,British Library HMNTS 12641.b.30.
 3 | 000000216,,London; Virtue & Yorston,1868,Virtue & Co.,"All for Greed. [A novel. The dedication signed: A. A. A., i.e. Marie Pauline Rose, Baroness Blaze de Bury.]","A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000216,British Library HMNTS 12626.cc.2.
 4 | 000000218,,London,1869,"Bradbury, Evans & Co.","Love the Avenger. By the author of “All for Greed.” [The dedication signed: A. A. A., i.e. Marie Pauline Rose, Baroness Blaze de Bury.]","A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000218,British Library HMNTS 12625.dd.1.
 5 | 000000472,,London,1851,James Darling,"Welsh Sketches, chiefly ecclesiastical, to the close of the twelfth century. By the author of “Proposals for Christian Union” (E. S. A. [i.e. Ernest Appleyard])","A., E. S.","Appleyard, Ernest Silvanus.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000472,British Library HMNTS 10369.bbb.15.
 6 | 000000480,"A new edition, revised, etc.",London,1857,Wertheim & Macintosh,"[The World in which I live, and my place in it. By E. S. A. [i.e. Letitia Willgoss Stone.] Edited by ... J. H. Broome.]","A., E. S.","BROOME, John Henry.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000480,British Library HMNTS 9007.d.28.
 7 | 000000481,"Fourth edition, revised, etc.",London,1875,William Macintosh,"[The World in which I live, and my place in it. By E. S. A. [i.e. Letitia Willgoss Stone.] Edited by ... J. H. Broome.]","A., E. S.","BROOME, John Henry.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000481,British Library HMNTS 9006.ee.10.
 8 | 000000519,,London,1872,The Author,Lagonells. By the author of Darmayne (F. E. A. [i.e. Florence Emily Ashley]),"A., F. E.","ASHLEY, Florence Emily.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000519,British Library HMNTS 12637.e.3.
 9 | 000000667,,"Puerto Rico",,,"The Coming of Spring, and other poems. By J. A. [i.e. J. Andrews.]","A., J.|A., J.","ANDREWS, J. - Writer of Verse",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000667,British Library HMNTS 011652.g.73.
10 | 000000874,,London],1676,,"A Warning to the inhabitants of England, and London in particular ... By M. A. [i.e. Mary Adams.]",Remaʿ.,"ADAMS, Mary.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000874,British Library HMNTS 11645.bb.42.
11 | 000001143,,London,1676,,A Satyr against Vertue. (A poem: supposed to be spoken by a Town-Hector. [By John Oldham. The preface signed: T. A.]),"A., T.","OLDHAM, John.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001143,British Library HMNTS 11602.ee.10.(2.)
12 | 000001280,,Coventry,1802,Printed by J. Turner,"An Account of the many and great Loans, Benefactions and Charities, belonging to the City of Coventry ... A new edition. [The dedication signed: AB, CD, EF, GH, &c. By Edward Jackson and Samuel Carte.]",,"CARTE, Samuel.|JACKSON, Edward - Rector of Southam, and CARTE (Samuel)",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001280,British Library HMNTS 1430.g.17.
13 | 000001808,,Christiania,1859,,"Erindringer som Bidrag til Norges Historie fra 1800-1815. Anden Udgave ... Udgivet med nogle Rettelser og Tillæg af Christian C. A. Lange. Med Forfatterens Portraet, og hans Biographi af Amtmand J. C. Aall","AALL, Jacob.","AALL, J. C.|LANGE, Christian Christoph Andreas.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001808,British Library HMNTS 9425.cc.37.
14 | 000001905,,Firenze,1888,,Gli Studi storici in terra d'Otranto ... Frammenti estratti in gran parte dall' Archivio Storico Italiano ... a cura e spese di L(uigi) G(iuseppe) D(e) S(imone),"AAR, Ermanno - pseud. [i.e. Luigi Giuseppe Oronzo Mariano Raffaele Francesco Fortunato Felice de Simone.]","S., L. G. D.|SIMONE, Luigi Giuseppe Oronzo Mariano Raffaele Francesco Fortunato Felice de.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001905,British Library HMNTS 10136.g.22.
15 | 000001929,,Amsterdam,"1676, 38-54",,De Aardbol. Magazijn van hedendaagsche land- en volkenkunde ... Met platen en kaarten. [Deel 4-9 by P. H. W.],,"WITKAMP, Pieter Harme.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001929,British Library HMNTS 10002.g.16-19.
16 | 000002836,,Savona,1888,,Cronache Savonesi dal 1500 al 1570 ... Accresciute di documenti inediti pubblicate e annotate dal dott. G. Assereto,"ABATE, Giovanni Agostino.","ASSERETO, Giovanni.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002836,British Library HMNTS 10136.h.24.
17 | 000002854,,London,1888,E. Moxon & Co.,"See-Saw; a novel ... Edited [or rather, written] by W. W. Reade","ABATI, Francesco.","READE, William Winwood.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002854,British Library HMNTS 12623.bbb.12.
18 | 000002956,,Paris,1860-63,,"Géodésie d'une partie de la Haute Éthiopie, revue et rédigée par R. Radau. fasc. 1-3","ABBADIE, Antoine Thompson d'.","RADAU, Rodolphe.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002956,British Library HMNTS 10096.i.19.
19 | 000002957,,Paris,1873,,[With eleven maps.],"ABBADIE, Antoine Thompson d'.","RADAU, Rodolphe.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002957,British Library HMNTS 10095.i.13.
20 | 000003017,"Nueva edicion, anotada ... y continuada ... por J. J. de Acosta y Calbo.",Puerto-Rico,1866,,"[Historia geográfica, civil y politica de la Isla de S. Juan Bautista de Puerto Rico, Dala a luz A. Valladares de Sotomayor.]","ABBAD Y LASIERRA, Agustín Íñigo - Bishop of Barbastro","ACOSTA Y CALBO, José Julian de.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000003017,British Library HMNTS 10480.h.18.
21 | 000003131,,New York,1899,W. Abbatt,"The Crisis of the Revolution, being the story of Arnold and André now for the first time collected from all sources, and illustrated with views of all places identified with it ... Illustrations from original photographs by E. S. Bennett, etc","ABBATT, William.","ANDRÉ, John - Major|ARNOLD, Benedict.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000003131,British Library HMNTS 9603.g.14.


--------------------------------------------------------------------------------
/Week8_Authentication/activity_1.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from functools import wraps
  3 | 
  4 | import pandas as pd
  5 | from flask import Flask
  6 | from flask import request
  7 | from flask_restx import Resource, Api
  8 | from flask_restx import abort
  9 | from flask_restx import fields
 10 | from flask_restx import inputs
 11 | from flask_restx import reqparse
 12 | 
 13 | app = Flask(__name__)
 14 | api = Api(app,
 15 |           default="Books",  # Default namespace
 16 |           title="Book Dataset",  # Documentation Title
 17 |           description="This is just a simple example to show how publish data as a service.")  # Documentation Description
 18 | 
 19 | 
 20 | def requires_auth(f):
 21 |     @wraps(f)
 22 |     def decorated(*args, **kwargs):
 23 |         auth = request.authorization
 24 |         if not auth:
 25 |             abort(401)
 26 | 
 27 |         if not (auth.username == 'admin' and 'admin' == auth.password):
 28 |             abort(401)
 29 | 
 30 |         return f(*args, **kwargs)
 31 | 
 32 |     return decorated
 33 | 
 34 | 
 35 | # The following is the schema of Book
 36 | book_model = api.model('Book', {
 37 |     'Flickr_URL': fields.String,
 38 |     'Publisher': fields.String,
 39 |     'Author': fields.String,
 40 |     'Title': fields.String,
 41 |     'Date_of_Publication': fields.Integer,
 42 |     'Identifier': fields.Integer,
 43 |     'Place_of_Publication': fields.String
 44 | })
 45 | 
 46 | parser = reqparse.RequestParser()
 47 | parser.add_argument('order', choices=list(column for column in book_model.keys()))
 48 | parser.add_argument('ascending', type=inputs.boolean)
 49 | 
 50 | 
 51 | @api.route('/books')
 52 | class BooksList(Resource):
 53 |     @api.response(200, 'Successful')
 54 |     @api.doc(description="Get all books")
 55 |     @requires_auth
 56 |     def get(self):
 57 |         # get books as JSON string
 58 |         args = parser.parse_args()
 59 | 
 60 |         # retrieve the query parameters
 61 |         order_by = args.get('order')
 62 |         ascending = args.get('ascending', True)
 63 | 
 64 |         if order_by:
 65 |             df.sort_values(by=order_by, inplace=True, ascending=ascending)
 66 | 
 67 |         json_str = df.to_json(orient='index')
 68 | 
 69 |         # convert the string JSON to a real JSON
 70 |         ds = json.loads(json_str)
 71 |         ret = []
 72 | 
 73 |         for idx in ds:
 74 |             book = ds[idx]
 75 |             book['Identifier'] = int(idx)
 76 |             ret.append(book)
 77 | 
 78 |         return ret
 79 | 
 80 |     @api.response(201, 'Book Created Successfully')
 81 |     @api.response(400, 'Validation Error')
 82 |     @api.doc(description="Add a new book")
 83 |     @api.expect(book_model, validate=True)
 84 |     @requires_auth
 85 |     def post(self):
 86 |         book = request.json
 87 | 
 88 |         if 'Identifier' not in book:
 89 |             return {"message": "Missing Identifier"}, 400
 90 | 
 91 |         id = book['Identifier']
 92 | 
 93 |         # check if the given identifier does not exist
 94 |         if id in df.index:
 95 |             return {"message": "A book with Identifier={} is already in the dataset".format(id)}, 400
 96 | 
 97 |         # Put the values into the dataframe
 98 |         for key in book:
 99 |             if key not in book_model.keys():
100 |                 # unexpected column
101 |                 return {"message": "Property {} is invalid".format(key)}, 400
102 |             df.loc[id, key] = book[key]
103 | 
104 |         # df.append(book, ignore_index=True)
105 |         return {"message": "Book {} is created".format(id)}, 201
106 | 
107 | 
108 | @api.route('/books/<int:id>')
109 | @api.param('id', 'The Book identifier')
110 | class Books(Resource):
111 |     @api.response(404, 'Book was not found')
112 |     @api.response(200, 'Successful')
113 |     @api.doc(description="Get a book by its ID")
114 |     @requires_auth
115 |     def get(self, id):
116 |         if id not in df.index:
117 |             api.abort(404, "Book {} doesn't exist".format(id))
118 | 
119 |         book = dict(df.loc[id])
120 |         return book
121 | 
122 |     @api.response(404, 'Book was not found')
123 |     @api.response(200, 'Successful')
124 |     @api.doc(description="Delete a book by its ID")
125 |     @requires_auth
126 |     def delete(self, id):
127 |         if id not in df.index:
128 |             api.abort(404, "Book {} doesn't exist".format(id))
129 | 
130 |         df.drop(id, inplace=True)
131 |         return {"message": "Book {} is removed.".format(id)}, 200
132 | 
133 |     @api.response(404, 'Book was not found')
134 |     @api.response(400, 'Validation Error')
135 |     @api.response(200, 'Successful')
136 |     @api.expect(book_model, validate=True)
137 |     @api.doc(description="Update a book by its ID")
138 |     @requires_auth
139 |     def put(self, id):
140 | 
141 |         if id not in df.index:
142 |             api.abort(404, "Book {} doesn't exist".format(id))
143 | 
144 |         # get the payload and convert it to a JSON
145 |         book = request.json
146 | 
147 |         # Book ID cannot be changed
148 |         if 'Identifier' in book and id != book['Identifier']:
149 |             return {"message": "Identifier cannot be changed".format(id)}, 400
150 | 
151 |         # Update the values
152 |         for key in book:
153 |             if key not in book_model.keys():
154 |                 # unexpected column
155 |                 return {"message": "Property {} is invalid".format(key)}, 400
156 |             df.loc[id, key] = book[key]
157 | 
158 |         df.append(book, ignore_index=True)
159 |         return {"message": "Book {} has been successfully updated".format(id)}, 200
160 | 
161 | 
162 | if __name__ == '__main__':
163 |     columns_to_drop = ['Edition Statement',
164 |                        'Corporate Author',
165 |                        'Corporate Contributors',
166 |                        'Former owner',
167 |                        'Engraver',
168 |                        'Contributors',
169 |                        'Issuance type',
170 |                        'Shelfmarks'
171 |                        ]
172 |     csv_file = "Books.csv"
173 |     df = pd.read_csv(csv_file)
174 | 
175 |     # drop unnecessary columns
176 |     df.drop(columns_to_drop, inplace=True, axis=1)
177 | 
178 |     # clean the date of publication & convert it to numeric data
179 |     new_date = df['Date of Publication'].str.extract(r'^(\d{4})', expand=False)
180 |     new_date = pd.to_numeric(new_date)
181 |     new_date = new_date.fillna(0)
182 |     df['Date of Publication'] = new_date
183 | 
184 |     # replace spaces in the name of columns
185 |     df.columns = [c.replace(' ', '_') for c in df.columns]
186 | 
187 |     # set the index column; this will help us to find books with their ids
188 |     df.set_index('Identifier', inplace=True)
189 | 
190 |     # run the application
191 |     app.run(debug=True)
192 | 


--------------------------------------------------------------------------------
/Week8_Authentication/activity_1_client.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from requests.auth import HTTPBasicAuth
 3 | 
 4 | 
 5 | def print_book(book):
 6 |     print("Book {")
 7 |     for key in book.keys():
 8 |         attr = str(key)
 9 |         val = str(book[key])
10 |         print("\t" + attr + ":" + val)
11 |     print("}")
12 | 
13 | 
14 | def get_book(id, username, password):
15 |     r = requests.get("http://127.0.0.1:5000/books/" + str(id), auth=HTTPBasicAuth(username, password))
16 |     book = r.json()
17 |     print("Get status Code:" + str(r.status_code))
18 |     if r.ok:
19 |         print_book(book)
20 |         return book
21 |     else:
22 |         print('Error:' + book['message'])
23 | 
24 | if __name__ == '__main__':
25 | 
26 |     print("***** Book information With Valid Credentials *****")
27 |     book = get_book(206, 'admin', 'admin')
28 | 
29 |     print("***** Book information With Invalid Credentials *****")
30 |     book = get_book(206, 'xxxxxxxxx', 'yyyyyyyy')
31 | 
32 | 


--------------------------------------------------------------------------------
/Week8_Authentication/activity_2.py:
--------------------------------------------------------------------------------
 1 | from time import sleep, time
 2 | from itsdangerous import JSONWebSignatureSerializer, BadSignature, SignatureExpired
 3 | 
 4 | 
 5 | class AuthenticationToken:
 6 |     def __init__(self, secret_key, expires_in):
 7 |         self.secret_key = secret_key
 8 |         self.expires_in = expires_in
 9 |         self.serializer = JSONWebSignatureSerializer(secret_key)
10 | 
11 |     def generate_token(self, username):
12 | 
13 |         info = {
14 |             'username': username,
15 |             'creation_time': time()
16 |         }
17 | 
18 |         token = self.serializer.dumps(info)
19 |         return token.decode()
20 | 
21 |     def validate_token(self, token):
22 |         info = self.serializer.loads(token.encode())
23 | 
24 |         if time() - info['creation_time'] > self.expires_in:
25 |             raise SignatureExpired("The Token has been expired; get a new token")
26 | 
27 |         return info['username']
28 | 
29 | 
30 | if __name__ == "__main__":
31 | 
32 |     SECRET_KEY = "A SECRET KEY; USUALLY A VERY LONG RANDOM STRING"
33 |     expires_in = 10
34 |     auth = AuthenticationToken(SECRET_KEY, expires_in)
35 |     token = auth.generate_token('admin')
36 |     print("Generated token is:", token)
37 | 
38 |     info = auth.validate_token(token)
39 |     print("The token decoded as:", str(info))
40 | 
41 |     sleep(expires_in + 1)
42 | 
43 |     try:
44 |         expired_info = auth.validate_token(token)
45 |     except SignatureExpired as e:
46 |         print(e)
47 |     except BadSignature  as e:
48 |         print("Invalid Token")
49 | 
50 |     try:
51 |         expired_info = auth.validate_token("sssssssssssss")
52 |     except SignatureExpired as e:
53 |         print(e)
54 |     except BadSignature as e:
55 |         print("Invalid Token")
56 | 


--------------------------------------------------------------------------------
/Week8_Authentication/activity_3.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import json
  3 | from functools import wraps
  4 | 
  5 | import jwt
  6 | import pandas as pd
  7 | from flask import Flask
  8 | from flask import request
  9 | from flask_restx import Resource, Api, abort
 10 | from flask_restx import fields
 11 | from flask_restx import inputs
 12 | from flask_restx import reqparse
 13 | 
 14 | 
 15 | class AuthenticationToken:
 16 |     def __init__(self, secret_key, expires_in):
 17 |         self.secret_key = secret_key
 18 |         self.expires_in = expires_in
 19 | 
 20 |     def generate_token(self, username):
 21 |         info = {
 22 |             'username': username,
 23 |             'exp': datetime.datetime.utcnow() + datetime.timedelta(seconds=self.expires_in)
 24 |         }
 25 |         return jwt.encode(info, self.secret_key, algorithm='HS256')
 26 | 
 27 |     def validate_token(self, token):
 28 |         info = jwt.decode(token, self.secret_key, algorithms=['HS256'])
 29 |         return info['username']
 30 | 
 31 | 
 32 | SECRET_KEY = "A SECRET KEY; USUALLY A VERY LONG RANDOM STRING"
 33 | expires_in = 600
 34 | auth = AuthenticationToken(SECRET_KEY, expires_in)
 35 | 
 36 | app = Flask(__name__)
 37 | api = Api(app, authorizations={
 38 |     'API-KEY': {
 39 |         'type': 'apiKey',
 40 |         'in': 'header',
 41 |         'name': 'AUTH-TOKEN'
 42 |     }
 43 | },
 44 |           security='API-KEY',
 45 |           default="Books",  # Default namespace
 46 |           title="Book Dataset",  # Documentation Title
 47 |           description="This is just a simple example to show how publish data as a service.")  # Documentation Description
 48 | 
 49 | 
 50 | def requires_auth(f):
 51 |     @wraps(f)
 52 |     def decorated(*args, **kwargs):
 53 | 
 54 |         token = request.headers.get('AUTH-TOKEN')
 55 |         if not token:
 56 |             abort(401, 'Authentication token is missing')
 57 | 
 58 |         try:
 59 |             user = auth.validate_token(token)
 60 |         except Exception as e:
 61 |             abort(401, e)
 62 | 
 63 |         return f(*args, **kwargs)
 64 | 
 65 |     return decorated
 66 | 
 67 | 
 68 | # The following is the schema of Book
 69 | book_model = api.model('Book', {
 70 |     'Flickr_URL': fields.String,
 71 |     'Publisher': fields.String,
 72 |     'Author': fields.String,
 73 |     'Title': fields.String,
 74 |     'Date_of_Publication': fields.Integer,
 75 |     'Identifier': fields.Integer,
 76 |     'Place_of_Publication': fields.String
 77 | })
 78 | 
 79 | parser = reqparse.RequestParser()
 80 | parser.add_argument('order', choices=list(column for column in book_model.keys()))
 81 | parser.add_argument('ascending', type=inputs.boolean)
 82 | 
 83 | credential_model = api.model('credential', {
 84 |     'username': fields.String,
 85 |     'password': fields.String
 86 | })
 87 | 
 88 | credential_parser = reqparse.RequestParser()
 89 | credential_parser.add_argument('username', type=str)
 90 | credential_parser.add_argument('password', type=str)
 91 | 
 92 | 
 93 | @api.route('/token')
 94 | class Token(Resource):
 95 |     @api.response(200, 'Successful')
 96 |     @api.doc(description="Generates a authentication token")
 97 |     @api.expect(credential_parser, validate=True)
 98 |     def get(self):
 99 |         args = credential_parser.parse_args()
100 | 
101 |         username = args.get('username')
102 |         password = args.get('password')
103 | 
104 |         if username == 'admin' and password == 'admin':
105 |             return {"token": auth.generate_token(username)}
106 | 
107 |         return {"message": "authorization has been refused for those credentials."}, 401
108 | 
109 | 
110 | @api.route('/books')
111 | class BooksList(Resource):
112 |     @api.response(200, 'Successful')
113 |     @api.doc(description="Get all books")
114 |     @requires_auth
115 |     def get(self):
116 |         # get books as JSON string
117 |         args = parser.parse_args()
118 | 
119 |         # retrieve the query parameters
120 |         order_by = args.get('order')
121 |         ascending = args.get('ascending', True)
122 | 
123 |         if order_by:
124 |             df.sort_values(by=order_by, inplace=True, ascending=ascending)
125 | 
126 |         json_str = df.to_json(orient='index')
127 | 
128 |         # convert the string JSON to a real JSON
129 |         ds = json.loads(json_str)
130 |         ret = []
131 | 
132 |         for idx in ds:
133 |             book = ds[idx]
134 |             book['Identifier'] = int(idx)
135 |             ret.append(book)
136 | 
137 |         return ret
138 | 
139 |     @api.response(201, 'Book Created Successfully')
140 |     @api.response(400, 'Validation Error')
141 |     @api.doc(description="Add a new book")
142 |     @api.expect(book_model, validate=True)
143 |     @requires_auth
144 |     def post(self):
145 |         book = request.json
146 | 
147 |         if 'Identifier' not in book:
148 |             return {"message": "Missing Identifier"}, 400
149 | 
150 |         id = book['Identifier']
151 | 
152 |         # check if the given identifier does not exist
153 |         if id in df.index:
154 |             return {"message": "A book with Identifier={} is already in the dataset".format(id)}, 400
155 | 
156 |         # Put the values into the dataframe
157 |         for key in book:
158 |             if key not in book_model.keys():
159 |                 # unexpected column
160 |                 return {"message": "Property {} is invalid".format(key)}, 400
161 |             df.loc[id, key] = book[key]
162 | 
163 |         # df.append(book, ignore_index=True)
164 |         return {"message": "Book {} is created".format(id)}, 201
165 | 
166 | 
167 | @api.route('/books/<int:id>')
168 | @api.param('id', 'The Book identifier')
169 | class Books(Resource):
170 |     @api.response(404, 'Book was not found')
171 |     @api.response(200, 'Successful')
172 |     @api.doc(description="Get a book by its ID")
173 |     @requires_auth
174 |     def get(self, id):
175 |         if id not in df.index:
176 |             api.abort(404, "Book {} doesn't exist".format(id))
177 | 
178 |         book = dict(df.loc[id])
179 |         return book
180 | 
181 |     @api.response(404, 'Book was not found')
182 |     @api.response(200, 'Successful')
183 |     @api.doc(description="Delete a book by its ID")
184 |     @requires_auth
185 |     def delete(self, id):
186 |         if id not in df.index:
187 |             api.abort(404, "Book {} doesn't exist".format(id))
188 | 
189 |         df.drop(id, inplace=True)
190 |         return {"message": "Book {} is removed.".format(id)}, 200
191 | 
192 |     @api.response(404, 'Book was not found')
193 |     @api.response(400, 'Validation Error')
194 |     @api.response(200, 'Successful')
195 |     @api.expect(book_model, validate=True)
196 |     @api.doc(description="Update a book by its ID")
197 |     @requires_auth
198 |     def put(self, id):
199 | 
200 |         if id not in df.index:
201 |             api.abort(404, "Book {} doesn't exist".format(id))
202 | 
203 |         # get the payload and convert it to a JSON
204 |         book = request.json
205 | 
206 |         # Book ID cannot be changed
207 |         if 'Identifier' in book and id != book['Identifier']:
208 |             return {"message": "Identifier cannot be changed".format(id)}, 400
209 | 
210 |         # Update the values
211 |         for key in book:
212 |             if key not in book_model.keys():
213 |                 # unexpected column
214 |                 return {"message": "Property {} is invalid".format(key)}, 400
215 |             df.loc[id, key] = book[key]
216 | 
217 |         df.append(book, ignore_index=True)
218 |         return {"message": "Book {} has been successfully updated".format(id)}, 200
219 | 
220 | 
221 | if __name__ == '__main__':
222 |     columns_to_drop = ['Edition Statement',
223 |                        'Corporate Author',
224 |                        'Corporate Contributors',
225 |                        'Former owner',
226 |                        'Engraver',
227 |                        'Contributors',
228 |                        'Issuance type',
229 |                        'Shelfmarks'
230 |                        ]
231 |     csv_file = "Books.csv"
232 |     df = pd.read_csv(csv_file)
233 | 
234 |     # drop unnecessary columns
235 |     df.drop(columns_to_drop, inplace=True, axis=1)
236 | 
237 |     # clean the date of publication & convert it to numeric data
238 |     new_date = df['Date of Publication'].str.extract(r'^(\d{4})', expand=False)
239 |     new_date = pd.to_numeric(new_date)
240 |     new_date = new_date.fillna(0)
241 |     df['Date of Publication'] = new_date
242 | 
243 |     # replace spaces in the name of columns
244 |     df.columns = [c.replace(' ', '_') for c in df.columns]
245 | 
246 |     # set the index column; this will help us to find books with their ids
247 |     df.set_index('Identifier', inplace=True)
248 | 
249 |     # run the application
250 |     app.run(debug=True)
251 | 


--------------------------------------------------------------------------------
/Week9_Classification/activity_1.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from sklearn.neighbors import KNeighborsClassifier
 3 | from sklearn.utils import shuffle
 4 | 
 5 | 
 6 | def load_iris(iris_path, split_percentage):
 7 |     df = pd.read_csv(iris_path)
 8 | 
 9 |     df = shuffle(df)
10 |     iris_x = df.drop('species', axis=1).values
11 |     iris_y = df['species'].values
12 | 
13 |     # Split iris data in train and test data
14 |     # A random permutation, to split the data randomly
15 | 
16 |     split_point = int(len(iris_x) * split_percentage)
17 |     iris_X_train = iris_x[:split_point]
18 |     iris_y_train = iris_y[:split_point]
19 |     iris_X_test = iris_x[split_point:]
20 |     iris_y_test = iris_y[split_point:]
21 | 
22 |     return iris_X_train, iris_y_train, iris_X_test, iris_y_test
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     csv_file = 'iris.csv'
27 | 
28 |     # Split the data into test and train parts
29 |     iris_X_train, iris_y_train, iris_X_test, iris_y_test = load_iris(csv_file, split_percentage=0.7)
30 | 
31 |     # train a classifier
32 |     knn = KNeighborsClassifier()
33 |     knn.fit(iris_X_train, iris_y_train)
34 | 
35 |     # predict the test set
36 |     predictions = knn.predict(iris_X_test)
37 | 
38 |     print("Actual: ")
39 |     print(iris_y_test)
40 | 
41 |     print("Predictions: ")
42 |     print(predictions)
43 | 
44 | 


--------------------------------------------------------------------------------
/Week9_Classification/activity_2.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from sklearn.metrics import confusion_matrix
 3 | from sklearn.neighbors import KNeighborsClassifier
 4 | from sklearn.utils import shuffle
 5 | from sklearn.metrics import precision_score, accuracy_score, recall_score
 6 | 
 7 | 
 8 | def load_iris(iris_path, split_percentage):
 9 |     df = pd.read_csv(iris_path)
10 | 
11 |     df = shuffle(df)
12 |     iris_x = df.drop('species', axis=1).values
13 |     iris_y = df['species'].values
14 | 
15 |     # Split iris data in train and test data
16 |     # A random permutation, to split the data randomly
17 | 
18 |     split_point = int(len(iris_x) * split_percentage)
19 |     iris_X_train = iris_x[:split_point]
20 |     iris_y_train = iris_y[:split_point]
21 |     iris_X_test = iris_x[split_point:]
22 |     iris_y_test = iris_y[split_point:]
23 | 
24 |     return iris_X_train, iris_y_train, iris_X_test, iris_y_test
25 | 
26 | 
27 | if __name__ == '__main__':
28 |     csv_file = 'iris.csv'
29 | 
30 |     # Split the data into test and train parts
31 |     iris_X_train, iris_y_train, iris_X_test, iris_y_test = load_iris(csv_file, split_percentage=0.7)
32 | 
33 |     # train a classifier
34 |     knn = KNeighborsClassifier()
35 |     knn.fit(iris_X_train, iris_y_train)
36 | 
37 |     # predict the test set
38 |     predictions = knn.predict(iris_X_test)
39 | 
40 |     print("confusion_matrix:\n", confusion_matrix(iris_y_test, predictions))
41 |     print("precision:\t", precision_score(iris_y_test, predictions, average=None))
42 |     print("recall:\t\t", recall_score(iris_y_test, predictions, average=None))
43 |     print("accuracy:\t", accuracy_score(iris_y_test, predictions))
44 | 


--------------------------------------------------------------------------------
/Week9_Classification/activity_3.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
 3 | from sklearn.linear_model import LogisticRegression
 4 | from sklearn.model_selection import cross_val_score
 5 | from sklearn.naive_bayes import GaussianNB
 6 | from sklearn.neighbors import KNeighborsClassifier
 7 | from sklearn.svm import SVC
 8 | from sklearn.tree import DecisionTreeClassifier
 9 | from sklearn.utils import shuffle
10 | 
11 | 
12 | def load_iris(iris_path, split_percentage):
13 |     df = pd.read_csv(iris_path)
14 | 
15 |     df = shuffle(df)
16 |     iris_x = df.drop('species', axis=1).values
17 |     iris_y = df['species'].values
18 | 
19 |     # Split iris data in train and test data
20 |     # A random permutation, to split the data randomly
21 | 
22 |     split_point = int(len(iris_x) * split_percentage)
23 |     iris_X_train = iris_x[:split_point]
24 |     iris_y_train = iris_y[:split_point]
25 |     iris_X_test = iris_x[split_point:]
26 |     iris_y_test = iris_y[split_point:]
27 | 
28 |     return iris_X_train, iris_y_train, iris_X_test, iris_y_test
29 | 
30 | 
31 | if __name__ == '__main__':
32 | 
33 |     csv_file = 'iris.csv'
34 |     iris_X, iris_y, _, _ = load_iris(csv_file, split_percentage=0.7)
35 | 
36 |     classifiers = [KNeighborsClassifier(),
37 |                    DecisionTreeClassifier(),
38 |                    LinearDiscriminantAnalysis(),
39 |                    LogisticRegression(),
40 |                    GaussianNB(),
41 |                    SVC()]
42 | 
43 |     classifier_accuracy_list = []
44 |     for i, classifier in enumerate(classifiers):
45 |         # split the dataset into 5 folds; then test the classifier against each fold one by one
46 |         accuracies = cross_val_score(classifier, iris_X, iris_y, cv=5)
47 |         classifier_accuracy_list.append((accuracies.mean(), type(classifier).__name__))
48 | 
49 |     # sort the classifiers
50 |     classifier_accuracy_list = sorted(classifier_accuracy_list, reverse=True)
51 |     for item in classifier_accuracy_list:
52 |         print(item[1], ':', item[0])
53 | 


--------------------------------------------------------------------------------
/Week9_Classification/iris.csv:
--------------------------------------------------------------------------------
  1 | sepal_length,sepal_width,petal_length,petal_width,species
  2 | 5.1,3.5,1.4,0.2,setosa
  3 | 4.9,3.0,1.4,0.2,setosa
  4 | 4.7,3.2,1.3,0.2,setosa
  5 | 4.6,3.1,1.5,0.2,setosa
  6 | 5.0,3.6,1.4,0.2,setosa
  7 | 5.4,3.9,1.7,0.4,setosa
  8 | 4.6,3.4,1.4,0.3,setosa
  9 | 5.0,3.4,1.5,0.2,setosa
 10 | 4.4,2.9,1.4,0.2,setosa
 11 | 4.9,3.1,1.5,0.1,setosa
 12 | 5.4,3.7,1.5,0.2,setosa
 13 | 4.8,3.4,1.6,0.2,setosa
 14 | 4.8,3.0,1.4,0.1,setosa
 15 | 4.3,3.0,1.1,0.1,setosa
 16 | 5.8,4.0,1.2,0.2,setosa
 17 | 5.7,4.4,1.5,0.4,setosa
 18 | 5.4,3.9,1.3,0.4,setosa
 19 | 5.1,3.5,1.4,0.3,setosa
 20 | 5.7,3.8,1.7,0.3,setosa
 21 | 5.1,3.8,1.5,0.3,setosa
 22 | 5.4,3.4,1.7,0.2,setosa
 23 | 5.1,3.7,1.5,0.4,setosa
 24 | 4.6,3.6,1.0,0.2,setosa
 25 | 5.1,3.3,1.7,0.5,setosa
 26 | 4.8,3.4,1.9,0.2,setosa
 27 | 5.0,3.0,1.6,0.2,setosa
 28 | 5.0,3.4,1.6,0.4,setosa
 29 | 5.2,3.5,1.5,0.2,setosa
 30 | 5.2,3.4,1.4,0.2,setosa
 31 | 4.7,3.2,1.6,0.2,setosa
 32 | 4.8,3.1,1.6,0.2,setosa
 33 | 5.4,3.4,1.5,0.4,setosa
 34 | 5.2,4.1,1.5,0.1,setosa
 35 | 5.5,4.2,1.4,0.2,setosa
 36 | 4.9,3.1,1.5,0.1,setosa
 37 | 5.0,3.2,1.2,0.2,setosa
 38 | 5.5,3.5,1.3,0.2,setosa
 39 | 4.9,3.1,1.5,0.1,setosa
 40 | 4.4,3.0,1.3,0.2,setosa
 41 | 5.1,3.4,1.5,0.2,setosa
 42 | 5.0,3.5,1.3,0.3,setosa
 43 | 4.5,2.3,1.3,0.3,setosa
 44 | 4.4,3.2,1.3,0.2,setosa
 45 | 5.0,3.5,1.6,0.6,setosa
 46 | 5.1,3.8,1.9,0.4,setosa
 47 | 4.8,3.0,1.4,0.3,setosa
 48 | 5.1,3.8,1.6,0.2,setosa
 49 | 4.6,3.2,1.4,0.2,setosa
 50 | 5.3,3.7,1.5,0.2,setosa
 51 | 5.0,3.3,1.4,0.2,setosa
 52 | 7.0,3.2,4.7,1.4,versicolor
 53 | 6.4,3.2,4.5,1.5,versicolor
 54 | 6.9,3.1,4.9,1.5,versicolor
 55 | 5.5,2.3,4.0,1.3,versicolor
 56 | 6.5,2.8,4.6,1.5,versicolor
 57 | 5.7,2.8,4.5,1.3,versicolor
 58 | 6.3,3.3,4.7,1.6,versicolor
 59 | 4.9,2.4,3.3,1.0,versicolor
 60 | 6.6,2.9,4.6,1.3,versicolor
 61 | 5.2,2.7,3.9,1.4,versicolor
 62 | 5.0,2.0,3.5,1.0,versicolor
 63 | 5.9,3.0,4.2,1.5,versicolor
 64 | 6.0,2.2,4.0,1.0,versicolor
 65 | 6.1,2.9,4.7,1.4,versicolor
 66 | 5.6,2.9,3.6,1.3,versicolor
 67 | 6.7,3.1,4.4,1.4,versicolor
 68 | 5.6,3.0,4.5,1.5,versicolor
 69 | 5.8,2.7,4.1,1.0,versicolor
 70 | 6.2,2.2,4.5,1.5,versicolor
 71 | 5.6,2.5,3.9,1.1,versicolor
 72 | 5.9,3.2,4.8,1.8,versicolor
 73 | 6.1,2.8,4.0,1.3,versicolor
 74 | 6.3,2.5,4.9,1.5,versicolor
 75 | 6.1,2.8,4.7,1.2,versicolor
 76 | 6.4,2.9,4.3,1.3,versicolor
 77 | 6.6,3.0,4.4,1.4,versicolor
 78 | 6.8,2.8,4.8,1.4,versicolor
 79 | 6.7,3.0,5.0,1.7,versicolor
 80 | 6.0,2.9,4.5,1.5,versicolor
 81 | 5.7,2.6,3.5,1.0,versicolor
 82 | 5.5,2.4,3.8,1.1,versicolor
 83 | 5.5,2.4,3.7,1.0,versicolor
 84 | 5.8,2.7,3.9,1.2,versicolor
 85 | 6.0,2.7,5.1,1.6,versicolor
 86 | 5.4,3.0,4.5,1.5,versicolor
 87 | 6.0,3.4,4.5,1.6,versicolor
 88 | 6.7,3.1,4.7,1.5,versicolor
 89 | 6.3,2.3,4.4,1.3,versicolor
 90 | 5.6,3.0,4.1,1.3,versicolor
 91 | 5.5,2.5,4.0,1.3,versicolor
 92 | 5.5,2.6,4.4,1.2,versicolor
 93 | 6.1,3.0,4.6,1.4,versicolor
 94 | 5.8,2.6,4.0,1.2,versicolor
 95 | 5.0,2.3,3.3,1.0,versicolor
 96 | 5.6,2.7,4.2,1.3,versicolor
 97 | 5.7,3.0,4.2,1.2,versicolor
 98 | 5.7,2.9,4.2,1.3,versicolor
 99 | 6.2,2.9,4.3,1.3,versicolor
100 | 5.1,2.5,3.0,1.1,versicolor
101 | 5.7,2.8,4.1,1.3,versicolor
102 | 6.3,3.3,6.0,2.5,virginica
103 | 5.8,2.7,5.1,1.9,virginica
104 | 7.1,3.0,5.9,2.1,virginica
105 | 6.3,2.9,5.6,1.8,virginica
106 | 6.5,3.0,5.8,2.2,virginica
107 | 7.6,3.0,6.6,2.1,virginica
108 | 4.9,2.5,4.5,1.7,virginica
109 | 7.3,2.9,6.3,1.8,virginica
110 | 6.7,2.5,5.8,1.8,virginica
111 | 7.2,3.6,6.1,2.5,virginica
112 | 6.5,3.2,5.1,2.0,virginica
113 | 6.4,2.7,5.3,1.9,virginica
114 | 6.8,3.0,5.5,2.1,virginica
115 | 5.7,2.5,5.0,2.0,virginica
116 | 5.8,2.8,5.1,2.4,virginica
117 | 6.4,3.2,5.3,2.3,virginica
118 | 6.5,3.0,5.5,1.8,virginica
119 | 7.7,3.8,6.7,2.2,virginica
120 | 7.7,2.6,6.9,2.3,virginica
121 | 6.0,2.2,5.0,1.5,virginica
122 | 6.9,3.2,5.7,2.3,virginica
123 | 5.6,2.8,4.9,2.0,virginica
124 | 7.7,2.8,6.7,2.0,virginica
125 | 6.3,2.7,4.9,1.8,virginica
126 | 6.7,3.3,5.7,2.1,virginica
127 | 7.2,3.2,6.0,1.8,virginica
128 | 6.2,2.8,4.8,1.8,virginica
129 | 6.1,3.0,4.9,1.8,virginica
130 | 6.4,2.8,5.6,2.1,virginica
131 | 7.2,3.0,5.8,1.6,virginica
132 | 7.4,2.8,6.1,1.9,virginica
133 | 7.9,3.8,6.4,2.0,virginica
134 | 6.4,2.8,5.6,2.2,virginica
135 | 6.3,2.8,5.1,1.5,virginica
136 | 6.1,2.6,5.6,1.4,virginica
137 | 7.7,3.0,6.1,2.3,virginica
138 | 6.3,3.4,5.6,2.4,virginica
139 | 6.4,3.1,5.5,1.8,virginica
140 | 6.0,3.0,4.8,1.8,virginica
141 | 6.9,3.1,5.4,2.1,virginica
142 | 6.7,3.1,5.6,2.4,virginica
143 | 6.9,3.1,5.1,2.3,virginica
144 | 5.8,2.7,5.1,1.9,virginica
145 | 6.8,3.2,5.9,2.3,virginica
146 | 6.7,3.3,5.7,2.5,virginica
147 | 6.7,3.0,5.2,2.3,virginica
148 | 6.3,2.5,5.0,1.9,virginica
149 | 6.5,3.0,5.2,2.0,virginica
150 | 6.2,3.4,5.4,2.3,virginica
151 | 5.9,3.0,5.1,1.8,virginica


--------------------------------------------------------------------------------
/assignments/Countries-Continents.csv:
--------------------------------------------------------------------------------
  1 | Continent,Country
  2 | Africa,Algeria
  3 | Africa,Angola
  4 | Africa,Benin
  5 | Africa,Botswana
  6 | Africa,Burkina
  7 | Africa,Burundi
  8 | Africa,Cameroon
  9 | Africa,Cape Verde
 10 | Africa,Central African Republic
 11 | Africa,Chad
 12 | Africa,Comoros
 13 | Africa,Congo
 14 | Africa,"Congo, Democratic Republic of"
 15 | Africa,Djibouti
 16 | Africa,Egypt
 17 | Africa,Equatorial Guinea
 18 | Africa,Eritrea
 19 | Africa,Ethiopia
 20 | Africa,Gabon
 21 | Africa,Gambia
 22 | Africa,Ghana
 23 | Africa,Guinea
 24 | Africa,Guinea-Bissau
 25 | Africa,Ivory Coast
 26 | Africa,Kenya
 27 | Africa,Lesotho
 28 | Africa,Liberia
 29 | Africa,Libya
 30 | Africa,Madagascar
 31 | Africa,Malawi
 32 | Africa,Mali
 33 | Africa,Mauritania
 34 | Africa,Mauritius
 35 | Africa,Morocco
 36 | Africa,Mozambique
 37 | Africa,Namibia
 38 | Africa,Niger
 39 | Africa,Nigeria
 40 | Africa,Rwanda
 41 | Africa,Sao Tome and Principe
 42 | Africa,Senegal
 43 | Africa,Seychelles
 44 | Africa,Sierra Leone
 45 | Africa,Somalia
 46 | Africa,South Africa
 47 | Africa,South Sudan
 48 | Africa,Sudan
 49 | Africa,Swaziland
 50 | Africa,Tanzania
 51 | Africa,Togo
 52 | Africa,Tunisia
 53 | Africa,Uganda
 54 | Africa,Zambia
 55 | Africa,Zimbabwe
 56 | Asia,Afghanistan
 57 | Asia,Bahrain
 58 | Asia,Bangladesh
 59 | Asia,Bhutan
 60 | Asia,Brunei
 61 | Asia,Burma (Myanmar)
 62 | Asia,Cambodia
 63 | Asia,China
 64 | Asia,East Timor
 65 | Asia,India
 66 | Asia,Indonesia
 67 | Asia,Iran
 68 | Asia,Iraq
 69 | Asia,Israel
 70 | Asia,Japan
 71 | Asia,Jordan
 72 | Asia,Kazakhstan
 73 | Asia,"Korea, North"
 74 | Asia,"Korea, South"
 75 | Asia,Kuwait
 76 | Asia,Kyrgyzstan
 77 | Asia,Laos
 78 | Asia,Lebanon
 79 | Asia,Malaysia
 80 | Asia,Maldives
 81 | Asia,Mongolia
 82 | Asia,Nepal
 83 | Asia,Oman
 84 | Asia,Pakistan
 85 | Asia,Philippines
 86 | Asia,Qatar
 87 | Asia,Russian Federation
 88 | Asia,Saudi Arabia
 89 | Asia,Singapore
 90 | Asia,Sri Lanka
 91 | Asia,Syria
 92 | Asia,Tajikistan
 93 | Asia,Thailand
 94 | Asia,Turkey
 95 | Asia,Turkmenistan
 96 | Asia,United Arab Emirates
 97 | Asia,Uzbekistan
 98 | Asia,Vietnam
 99 | Asia,Yemen
100 | Europe,Albania
101 | Europe,Andorra
102 | Europe,Armenia
103 | Europe,Austria
104 | Europe,Azerbaijan
105 | Europe,Belarus
106 | Europe,Belgium
107 | Europe,Bosnia and Herzegovina
108 | Europe,Bulgaria
109 | Europe,Croatia
110 | Europe,Cyprus
111 | Europe,CZ
112 | Europe,Denmark
113 | Europe,Estonia
114 | Europe,Finland
115 | Europe,France
116 | Europe,Georgia
117 | Europe,Germany
118 | Europe,Greece
119 | Europe,Hungary
120 | Europe,Iceland
121 | Europe,Ireland
122 | Europe,Italy
123 | Europe,Latvia
124 | Europe,Liechtenstein
125 | Europe,Lithuania
126 | Europe,Luxembourg
127 | Europe,Macedonia
128 | Europe,Malta
129 | Europe,Moldova
130 | Europe,Monaco
131 | Europe,Montenegro
132 | Europe,Netherlands
133 | Europe,Norway
134 | Europe,Poland
135 | Europe,Portugal
136 | Europe,Romania
137 | Europe,San Marino
138 | Europe,Serbia
139 | Europe,Slovakia
140 | Europe,Slovenia
141 | Europe,Spain
142 | Europe,Sweden
143 | Europe,Switzerland
144 | Europe,Ukraine
145 | Europe,United Kingdom
146 | Europe,Vatican City
147 | North America,Antigua and Barbuda
148 | North America,Bahamas
149 | North America,Barbados
150 | North America,Belize
151 | North America,Canada
152 | North America,Costa Rica
153 | North America,Cuba
154 | North America,Dominica
155 | North America,Dominican Republic
156 | North America,El Salvador
157 | North America,Grenada
158 | North America,Guatemala
159 | North America,Haiti
160 | North America,Honduras
161 | North America,Jamaica
162 | North America,Mexico
163 | North America,Nicaragua
164 | North America,Panama
165 | North America,Saint Kitts and Nevis
166 | North America,Saint Lucia
167 | North America,Saint Vincent and the Grenadines
168 | North America,Trinidad and Tobago
169 | North America,US
170 | Oceania,Australia
171 | Oceania,Fiji
172 | Oceania,Kiribati
173 | Oceania,Marshall Islands
174 | Oceania,Micronesia
175 | Oceania,Nauru
176 | Oceania,New Zealand
177 | Oceania,Palau
178 | Oceania,Papua New Guinea
179 | Oceania,Samoa
180 | Oceania,Solomon Islands
181 | Oceania,Tonga
182 | Oceania,Tuvalu
183 | Oceania,Vanuatu
184 | South America,Argentina
185 | South America,Bolivia
186 | South America,Brazil
187 | South America,Chile
188 | South America,Colombia
189 | South America,Ecuador
190 | South America,Guyana
191 | South America,Paraguay
192 | South America,Peru
193 | South America,Suriname
194 | South America,Uruguay
195 | South America,Venezuela
196 | 


--------------------------------------------------------------------------------
/assignments/Getting Started Academic Cloud.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mysilver/COMP9321-Data-Services/b477e20a819d63a53c684d0cb5fb332dc5b63e6c/assignments/Getting Started Academic Cloud.pdf


--------------------------------------------------------------------------------
/assignments/Olympics_dataset1.csv:
--------------------------------------------------------------------------------
  1 | ﻿Team,,Summer Games,,,,
  2 | ,Rubish,Number of Games the country participated in,Gold,Silver,Bronze,Total
  3 |  Afghanistan (AFG),BLA,14,0,0,2,2
  4 |  Algeria (ALG),BLA,13,5,4,8,17
  5 |  Argentina (ARG),BLA,24,21,25,28,74
  6 |  Armenia (ARM),BLA,6,2,6,6,14
  7 |  Australasia (ANZ) [ANZ],BLA,2,3,4,5,12
  8 | Kingdom of the Void (VOID),,,,,,
  9 |  Australia (AUS) [AUS] [Z],BLA,26,147,163,187,497
 10 |  Austria (AUT),BLA,27,18,33,36,87
 11 |  Azerbaijan (AZE),BLA,6,7,11,25,43
 12 |  Bahamas (BAH),BLA,16,6,2,6,14
 13 |  Bahrain (BRN),BLA,9,2,1,0,3
 14 |  Barbados (BAR) [BAR],BLA,12,0,0,1,1
 15 |  Belarus (BLR),BLA,6,12,27,39,78
 16 |  Belgium (BEL),BLA,26,40,53,55,148
 17 |  Bermuda (BER),BLA,18,0,0,1,1
 18 |  Bohemia (BOH) [BOH] [Z],BLA,3,0,1,3,4
 19 |  Botswana (BOT),BLA,10,0,1,0,1
 20 | Treasure Island (TRI),,,,,,
 21 |  Brazil (BRA),BLA,22,30,36,62,128
 22 |  British West Indies (BWI) [BWI],BLA,1,0,0,2,2
 23 |  Bulgaria (BUL) [H],BLA,20,51,87,80,218
 24 |  Burundi (BDI),BLA,6,1,1,0,2
 25 |  Cameroon (CMR),BLA,14,3,1,2,6
 26 |  Canada (CAN),BLA,26,64,102,136,302
 27 |  Chile (CHI) [I],BLA,23,2,7,4,13
 28 |  China (CHN) [CHN],BLA,10,224,167,155,546
 29 |  Colombia (COL),BLA,19,5,9,14,28
 30 |  Costa Rica (CRC),BLA,15,1,1,2,4
 31 |  Ivory Coast (CIV) [CIV],BLA,13,1,1,1,3
 32 |  Croatia (CRO),BLA,7,11,10,12,33
 33 |  Cuba (CUB) [Z],BLA,20,78,68,79,225
 34 |  Cyprus (CYP),BLA,10,0,1,0,1
 35 |  Czech Republic (CZE) [CZE],BLA,6,15,17,24,56
 36 |  Czechoslovakia (TCH) [TCH],BLA,16,49,49,45,143
 37 |  Denmark (DEN) [Z],BLA,27,45,74,75,194
 38 |  Djibouti (DJI) [B],BLA,8,0,0,1,1
 39 |  Dominican Republic (DOM),BLA,14,3,2,2,7
 40 |  Ecuador (ECU),BLA,14,1,1,0,2
 41 |  Egypt (EGY) [EGY] [Z],BLA,22,7,10,15,32
 42 |  Eritrea (ERI),BLA,5,0,0,1,1
 43 |  Estonia (EST),BLA,12,9,9,16,34
 44 |  Ethiopia (ETH),BLA,13,22,11,20,53
 45 |  Fiji (FIJ),BLA,14,1,0,0,1
 46 |  Finland (FIN),BLA,25,101,85,117,303
 47 |  France (FRA) [O] [P] [Z],BLA,28,212,241,263,716
 48 |  Gabon (GAB),BLA,10,0,1,0,1
 49 |  Georgia (GEO),BLA,6,8,8,17,33
 50 |  Germany (GER) [GER] [Z],BLA,16,191,194,230,615
 51 |  United Team of Germany (EUA) [EUA],BLA,3,28,54,36,118
 52 |  East Germany (GDR) [GDR],BLA,5,153,129,127,409
 53 |  West Germany (FRG) [FRG],BLA,5,56,67,81,204
 54 |  Ghana (GHA) [GHA],BLA,14,0,1,3,4
 55 |  Great Britain (GBR) [GBR] [Z],BLA,28,263,295,291,849
 56 | Republic of Mars (MARS),,,,,,
 57 |  Greece (GRE) [Z],BLA,28,33,43,40,116
 58 |  Grenada (GRN),BLA,9,1,1,0,2
 59 |  Guatemala (GUA),BLA,14,0,1,0,1
 60 |  Guyana (GUY) [GUY],BLA,17,0,0,1,1
 61 |  Haiti (HAI) [J],BLA,15,0,1,1,2
 62 |  Hong Kong (HKG) [HKG],BLA,16,1,1,1,3
 63 |  Hungary (HUN),BLA,26,175,147,169,491
 64 |  Iceland (ISL),BLA,20,0,2,2,4
 65 |  India (IND) [F],BLA,24,9,7,12,28
 66 |  Indonesia (INA),BLA,15,7,13,12,32
 67 |  Iran (IRI) [K],BLA,16,19,22,28,69
 68 |  Iraq (IRQ),BLA,14,0,0,1,1
 69 |  Ireland (IRL),BLA,21,9,10,12,31
 70 |  Israel (ISR),BLA,16,1,1,7,9
 71 |  Italy (ITA) [M] [S],BLA,27,206,178,193,577
 72 |  Jamaica (JAM) [JAM],BLA,17,22,35,20,77
 73 |  Japan (JPN),BLA,22,142,135,162,439
 74 |  Jordan (JOR),BLA,10,1,0,0,1
 75 |  Kazakhstan (KAZ),BLA,6,15,20,27,62
 76 |  Kenya (KEN),BLA,14,31,38,33,102
 77 |  Kosovo (KOS),BLA,1,1,0,0,1
 78 |  North Korea (PRK),BLA,10,16,16,22,54
 79 |  South Korea (KOR),BLA,17,90,87,90,267
 80 |  Kuwait (KUW),BLA,12,0,0,2,2
 81 |  Kyrgyzstan (KGZ),BLA,6,0,1,3,4
 82 |  Latvia (LAT),BLA,11,3,11,5,19
 83 |  Lebanon (LIB),BLA,17,0,2,2,4
 84 |  Liechtenstein (LIE),BLA,17,0,0,0,0
 85 |  Lithuania (LTU),BLA,9,6,7,12,25
 86 |  Luxembourg (LUX) [O],BLA,23,1,1,0,2
 87 | Kingdom of the Rock (ROCK),,,,,,
 88 |  Macedonia (MKD),BLA,6,0,0,1,1
 89 |  Malaysia (MAS) [MAS],BLA,13,0,7,4,11
 90 |  Mauritius (MRI),BLA,9,0,0,1,1
 91 |  Mexico (MEX),BLA,23,13,24,32,69
 92 |  Moldova (MDA),BLA,6,0,2,3,5
 93 |  Mongolia (MGL),BLA,13,2,10,14,26
 94 |  Montenegro (MNE),BLA,3,0,1,0,1
 95 |  Morocco (MAR),BLA,14,6,5,12,23
 96 |  Mozambique (MOZ),BLA,10,1,0,1,2
 97 |  Namibia (NAM),BLA,7,0,4,0,4
 98 |  Netherlands (NED) [Z],BLA,26,85,92,108,285
 99 |  Netherlands Antilles (AHO) [AHO] [I],BLA,13,0,1,0,1
100 |  New Zealand (NZL) [NZL],BLA,23,46,27,44,117
101 |  Niger (NIG),BLA,12,0,1,1,2
102 |  Nigeria (NGR),BLA,16,3,10,12,25
103 |  Norway (NOR) [Q],BLA,25,56,49,47,152
104 |  Pakistan (PAK),BLA,17,3,3,4,10
105 |  Panama (PAN),BLA,17,1,0,2,3
106 |  Paraguay (PAR),BLA,12,0,1,0,1
107 |  Peru (PER) [L],BLA,18,1,3,0,4
108 | Kingdom of the Stromlands (STORM),,,,,,
109 |  Philippines (PHI),BLA,21,0,3,7,10
110 |  Poland (POL),BLA,21,68,83,133,284
111 |  Portugal (POR),BLA,24,4,8,12,24
112 |  Puerto Rico (PUR),BLA,18,1,2,6,9
113 |  Qatar (QAT),BLA,9,0,1,4,5
114 |  Romania (ROU),BLA,21,89,95,122,306
115 |  Russia (RUS) [RUS],BLA,6,149,124,153,426
116 |  Russian Empire (RU1) [RU1],BLA,3,1,4,3,8
117 |  Soviet Union (URS) [URS],BLA,9,395,319,296,"1,010"
118 |  Saudi Arabia (KSA),BLA,11,0,1,2,3
119 |  Samoa (SAM),BLA,9,0,1,0,1
120 |  Senegal (SEN),BLA,14,0,1,0,1
121 |  Serbia (SRB) [SRB],BLA,4,3,6,6,15
122 |  Serbia and Montenegro (SCG) [SCG],BLA,1,0,2,0,2
123 |  Singapore (SIN),BLA,16,1,2,2,5
124 |  Slovakia (SVK) [SVK],BLA,6,9,12,7,28
125 |  Slovenia (SLO),BLA,7,5,8,10,23
126 |  South Africa (RSA),BLA,19,26,31,29,86
127 |  Spain (ESP) [Z],BLA,23,45,64,41,150
128 |  Sri Lanka (SRI) [SRI],BLA,17,0,2,0,2
129 |  Sudan (SUD),BLA,12,0,1,0,1
130 |  Suriname (SUR) [E],BLA,12,1,0,1,2
131 |  Sweden (SWE) [Z],BLA,27,145,170,179,494
132 |  Switzerland (SUI),BLA,28,50,75,67,192
133 |  Syria (SYR),BLA,13,1,1,1,3
134 |  Chinese Taipei (TPE) [TPE] [TPE2],BLA,14,5,7,12,24
135 |  Tajikistan (TJK),BLA,6,1,1,2,4
136 |  Tanzania (TAN) [TAN],BLA,13,0,2,0,2
137 |  Thailand (THA),BLA,16,9,8,16,33
138 |  Togo (TOG),BLA,10,0,0,1,1
139 |  Tonga (TGA),BLA,9,0,1,0,1
140 |  Trinidad and Tobago (TRI) [TRI],BLA,17,2,6,11,19
141 |  Tunisia (TUN),BLA,14,4,2,7,13
142 |  Turkey (TUR),BLA,22,39,24,28,91
143 |  Uganda (UGA),BLA,15,2,3,2,7
144 |  Ukraine (UKR),BLA,6,35,30,56,121
145 |  United Arab Emirates (UAE),BLA,9,1,0,1,2
146 |  United States (USA) [P] [Q] [R] [Z],BLA,27,"1,022",795,705,"2,522"
147 |  Uruguay (URU),BLA,21,2,2,6,10
148 |  Uzbekistan (UZB),BLA,6,8,6,17,31
149 |  Venezuela (VEN),BLA,18,2,3,10,15
150 |  Vietnam (VIE),BLA,15,1,3,0,4
151 |  Virgin Islands (ISV),BLA,12,0,1,0,1
152 |  Yugoslavia (YUG) [YUG],BLA,18,28,31,31,90
153 |  Zambia (ZAM) [ZAM],BLA,13,0,1,1,2
154 | Republic of Gamers (GAME),BLA,,,,,
155 |  Zimbabwe (ZIM) [ZIM],BLA,13,3,4,1,8
156 |  Unified Team (EUN) [EUN],BLA,1,45,38,29,112
157 |  Independent Olympic Athletes (IOA) [IOA],BLA,3,1,0,1,2
158 |  Independent Olympic Participants (IOP) [IOP],BLA,1,0,1,2,3
159 |  Olympic Athletes from Russia (OAR) [OAR],BLA,0,0,0,0,0
160 |  Mixed team (ZZX) [ZZX],BLA,3,8,5,4,17
161 | Totals,BLA,28,"5,115","5,080","5,482","15,677"
162 | 


--------------------------------------------------------------------------------
/assignments/Olympics_dataset2.csv:
--------------------------------------------------------------------------------
  1 | ﻿Team,Winter Games,,,,,Combined Total,,,,
  2 | ,Number of Games the country participated in,Gold,Silver,Bronze,Total,Number of Games the country participated in,Gold,Silver,Bronze,Total
  3 |  Afghanistan (AFG),0,0,0,0,0,14,0,0,2,2
  4 |  Algeria (ALG),3,0,0,0,0,16,5,4,8,17
  5 |  Argentina (ARG),19,0,0,0,0,43,21,25,28,74
  6 |  Armenia (ARM),7,0,0,0,0,13,2,6,6,14
  7 |  Australasia (ANZ) [ANZ],0,0,0,0,0,2,3,4,5,12
  8 | Kingdom of the Void (VOID),,,,,,,,,,
  9 |  Australia (AUS) [AUS] [Z],19,5,5,5,15,45,152,168,192,512
 10 |  Austria (AUT),23,64,81,87,232,50,82,114,123,319
 11 |  Azerbaijan (AZE),6,0,0,0,0,12,7,11,25,43
 12 |  Bahamas (BAH),0,0,0,0,0,16,6,2,6,14
 13 |  Bahrain (BRN),0,0,0,0,0,9,2,1,0,3
 14 |  Barbados (BAR) [BAR],0,0,0,0,0,12,0,0,1,1
 15 |  Belarus (BLR),7,8,5,5,18,13,20,32,44,96
 16 |  Belgium (BEL),21,1,2,3,6,47,41,55,58,154
 17 |  Bermuda (BER),8,0,0,0,0,26,0,0,1,1
 18 |  Bohemia (BOH) [BOH] [Z],0,0,0,0,0,3,0,1,3,4
 19 |  Botswana (BOT),0,0,0,0,0,10,0,1,0,1
 20 | Treasure Island (TRI),,,,,,,,,,
 21 |  Brazil (BRA),8,0,0,0,0,30,30,36,62,128
 22 |  British West Indies (BWI) [BWI],0,0,0,0,0,1,0,0,2,2
 23 |  Bulgaria (BUL) [H],20,1,2,3,6,40,52,89,83,224
 24 |  Burundi (BDI),0,0,0,0,0,6,1,1,0,2
 25 |  Cameroon (CMR),1,0,0,0,0,15,3,1,2,6
 26 |  Canada (CAN),23,73,64,62,199,49,137,166,198,501
 27 |  Chile (CHI) [I],17,0,0,0,0,40,2,7,4,13
 28 |  China (CHN) [CHN],11,13,28,21,62,21,237,195,176,608
 29 |  Colombia (COL),2,0,0,0,0,21,5,9,14,28
 30 |  Costa Rica (CRC),6,0,0,0,0,21,1,1,2,4
 31 |  Ivory Coast (CIV) [CIV],0,0,0,0,0,13,1,1,1,3
 32 |  Croatia (CRO),8,4,6,1,11,15,15,16,13,44
 33 |  Cuba (CUB) [Z],0,0,0,0,0,20,78,68,79,225
 34 |  Cyprus (CYP),11,0,0,0,0,21,0,1,0,1
 35 |  Czech Republic (CZE) [CZE],7,9,11,11,31,13,24,28,35,87
 36 |  Czechoslovakia (TCH) [TCH],16,2,8,15,25,32,51,57,60,168
 37 |  Denmark (DEN) [Z],14,0,1,0,1,41,45,75,75,195
 38 |  Djibouti (DJI) [B],0,0,0,0,0,8,0,0,1,1
 39 |  Dominican Republic (DOM),0,0,0,0,0,14,3,2,2,7
 40 |  Ecuador (ECU),1,0,0,0,0,15,1,1,0,2
 41 |  Egypt (EGY) [EGY] [Z],1,0,0,0,0,23,7,10,15,32
 42 |  Eritrea (ERI),1,0,0,0,0,6,0,0,1,1
 43 |  Estonia (EST),10,4,2,1,7,22,13,11,17,41
 44 |  Ethiopia (ETH),2,0,0,0,0,15,22,11,20,53
 45 |  Fiji (FIJ),3,0,0,0,0,17,1,0,0,1
 46 |  Finland (FIN),23,43,63,61,167,48,144,148,178,470
 47 |  France (FRA) [O] [P] [Z],23,36,35,53,124,51,248,276,316,840
 48 |  Gabon (GAB),0,0,0,0,0,10,0,1,0,1
 49 |  Georgia (GEO),7,0,0,0,0,13,8,8,17,33
 50 |  Germany (GER) [GER] [Z],12,92,88,60,240,28,283,282,290,855
 51 |  United Team of Germany (EUA) [EUA],3,8,6,5,19,6,36,60,41,137
 52 |  East Germany (GDR) [GDR],6,39,36,35,110,11,192,165,162,519
 53 |  West Germany (FRG) [FRG],6,11,15,13,39,11,67,82,94,243
 54 |  Ghana (GHA) [GHA],2,0,0,0,0,16,0,1,3,4
 55 |  Great Britain (GBR) [GBR] [Z],23,11,4,16,31,51,274,299,307,880
 56 | Republic of Mars (MARS),,,,,,,,,,
 57 |  Greece (GRE) [Z],19,0,0,0,0,47,33,43,40,116
 58 |  Grenada (GRN),0,0,0,0,0,9,1,1,0,2
 59 |  Guatemala (GUA),1,0,0,0,0,15,0,1,0,1
 60 |  Guyana (GUY) [GUY],0,0,0,0,0,17,0,0,1,1
 61 |  Haiti (HAI) [J],0,0,0,0,0,15,0,1,1,2
 62 |  Hong Kong (HKG) [HKG],5,0,0,0,0,21,1,1,1,3
 63 |  Hungary (HUN),23,1,2,4,7,49,176,149,173,498
 64 |  Iceland (ISL),18,0,0,0,0,38,0,2,2,4
 65 |  India (IND) [F],10,0,0,0,0,34,9,7,12,28
 66 |  Indonesia (INA),0,0,0,0,0,15,7,13,12,32
 67 |  Iran (IRI) [K],11,0,0,0,0,27,19,22,28,69
 68 |  Iraq (IRQ),0,0,0,0,0,14,0,0,1,1
 69 |  Ireland (IRL),7,0,0,0,0,28,9,10,12,31
 70 |  Israel (ISR),7,0,0,0,0,23,1,1,7,9
 71 |  Italy (ITA) [M] [S],23,40,36,48,124,50,246,214,241,701
 72 |  Jamaica (JAM) [JAM],8,0,0,0,0,25,22,35,20,77
 73 |  Japan (JPN),21,14,22,22,58,43,156,157,184,497
 74 |  Jordan (JOR),0,0,0,0,0,10,1,0,0,1
 75 |  Kazakhstan (KAZ),7,1,3,4,8,13,16,23,31,70
 76 |  Kenya (KEN),4,0,0,0,0,18,31,38,33,102
 77 |  Kosovo (KOS),1,0,0,0,0,2,1,0,0,1
 78 |  North Korea (PRK),9,0,1,1,2,19,16,17,23,56
 79 |  South Korea (KOR),18,31,25,14,70,35,121,112,104,337
 80 |  Kuwait (KUW),0,0,0,0,0,12,0,0,2,2
 81 |  Kyrgyzstan (KGZ),7,0,0,0,0,13,0,1,3,4
 82 |  Latvia (LAT),11,0,4,4,8,22,3,15,9,27
 83 |  Lebanon (LIB),17,0,0,0,0,34,0,2,2,4
 84 |  Liechtenstein (LIE),19,2,2,6,10,36,2,2,6,10
 85 |  Lithuania (LTU),9,0,0,0,0,18,6,7,12,25
 86 |  Luxembourg (LUX) [O],9,0,2,0,2,32,1,3,0,4
 87 | Kingdom of the Rock (ROCK),,,,,,,,,,
 88 |  Macedonia (MKD),6,0,0,0,0,12,0,0,1,1
 89 |  Malaysia (MAS) [MAS],1,0,0,0,0,14,0,7,4,11
 90 |  Mauritius (MRI),0,0,0,0,0,9,0,0,1,1
 91 |  Mexico (MEX),9,0,0,0,0,32,13,24,32,69
 92 |  Moldova (MDA),7,0,0,0,0,13,0,2,3,5
 93 |  Mongolia (MGL),14,0,0,0,0,27,2,10,14,26
 94 |  Montenegro (MNE),3,0,0,0,0,6,0,1,0,1
 95 |  Morocco (MAR),7,0,0,0,0,21,6,5,12,23
 96 |  Mozambique (MOZ),0,0,0,0,0,10,1,0,1,2
 97 |  Namibia (NAM),0,0,0,0,0,7,0,4,0,4
 98 |  Netherlands (NED) [Z],21,45,44,41,130,47,130,136,149,415
 99 |  Netherlands Antilles (AHO) [AHO] [I],2,0,0,0,0,15,0,1,0,1
100 |  New Zealand (NZL) [NZL],16,0,1,2,3,39,46,28,46,120
101 |  Niger (NIG),0,0,0,0,0,12,0,1,1,2
102 |  Nigeria (NGR),1,0,0,0,0,17,3,10,12,25
103 |  Norway (NOR) [Q],23,132,125,111,368,48,188,174,158,520
104 |  Pakistan (PAK),3,0,0,0,0,20,3,3,4,10
105 |  Panama (PAN),0,0,0,0,0,17,1,0,2,3
106 |  Paraguay (PAR),1,0,0,0,0,13,0,1,0,1
107 |  Peru (PER) [L],2,0,0,0,0,20,1,3,0,4
108 | Kingdom of the Stromlands (STORM),,,,,,,,,,
109 |  Philippines (PHI),5,0,0,0,0,26,0,3,7,10
110 |  Poland (POL),23,7,7,8,22,44,75,90,141,306
111 |  Portugal (POR),8,0,0,0,0,32,4,8,12,24
112 |  Puerto Rico (PUR),7,0,0,0,0,25,1,2,6,9
113 |  Qatar (QAT),0,0,0,0,0,9,0,1,4,5
114 |  Romania (ROU),21,0,0,1,1,42,89,95,123,307
115 |  Russia (RUS) [RUS],6,47,38,35,120,12,196,162,188,546
116 |  Russian Empire (RU1) [RU1],0,0,0,0,0,3,1,4,3,8
117 |  Soviet Union (URS) [URS],9,78,57,59,194,18,473,376,355,"1,204"
118 |  Saudi Arabia (KSA),0,0,0,0,0,11,0,1,2,3
119 |  Samoa (SAM),0,0,0,0,0,9,0,1,0,1
120 |  Senegal (SEN),5,0,0,0,0,19,0,1,0,1
121 |  Serbia (SRB) [SRB],3,0,0,0,0,7,3,6,6,15
122 |  Serbia and Montenegro (SCG) [SCG],1,0,0,0,0,2,0,2,0,2
123 |  Singapore (SIN),1,0,0,0,0,17,1,2,2,5
124 |  Slovakia (SVK) [SVK],7,3,4,1,8,13,12,16,8,36
125 |  Slovenia (SLO),8,2,5,10,17,15,7,13,20,40
126 |  South Africa (RSA),7,0,0,0,0,26,26,31,29,86
127 |  Spain (ESP) [Z],20,1,0,3,4,43,46,64,44,154
128 |  Sri Lanka (SRI) [SRI],0,0,0,0,0,17,0,2,0,2
129 |  Sudan (SUD),0,0,0,0,0,12,0,1,0,1
130 |  Suriname (SUR) [E],0,0,0,0,0,12,1,0,1,2
131 |  Sweden (SWE) [Z],23,57,46,55,158,50,202,216,234,652
132 |  Switzerland (SUI),23,55,46,52,153,51,105,121,119,345
133 |  Syria (SYR),0,0,0,0,0,13,1,1,1,3
134 |  Chinese Taipei (TPE) [TPE] [TPE2],12,0,0,0,0,26,5,7,12,24
135 |  Tajikistan (TJK),4,0,0,0,0,10,1,1,2,4
136 |  Tanzania (TAN) [TAN],0,0,0,0,0,13,0,2,0,2
137 |  Thailand (THA),4,0,0,0,0,20,9,8,16,33
138 |  Togo (TOG),2,0,0,0,0,12,0,0,1,1
139 |  Tonga (TGA),2,0,0,0,0,11,0,1,0,1
140 |  Trinidad and Tobago (TRI) [TRI],3,0,0,0,0,20,2,6,11,19
141 |  Tunisia (TUN),0,0,0,0,0,14,4,2,7,13
142 |  Turkey (TUR),17,0,0,0,0,39,39,24,28,91
143 |  Uganda (UGA),0,0,0,0,0,15,2,3,2,7
144 |  Ukraine (UKR),7,3,1,4,8,13,38,31,60,129
145 |  United Arab Emirates (UAE),0,0,0,0,0,9,1,0,1,2
146 |  United States (USA) [P] [Q] [R] [Z],23,105,110,90,305,50,"1,127",905,795,"2,827"
147 |  Uruguay (URU),1,0,0,0,0,22,2,2,6,10
148 |  Uzbekistan (UZB),7,1,0,0,1,13,9,6,17,32
149 |  Venezuela (VEN),4,0,0,0,0,22,2,3,10,15
150 |  Vietnam (VIE),0,0,0,0,0,15,1,3,0,4
151 |  Virgin Islands (ISV),7,0,0,0,0,19,0,1,0,1
152 |  Yugoslavia (YUG) [YUG],16,0,3,1,4,34,28,34,32,94
153 |  Zambia (ZAM) [ZAM],0,0,0,0,0,13,0,1,1,2
154 | Republic of Gamers (GAME),,,,,,,,,,
155 |  Zimbabwe (ZIM) [ZIM],1,0,0,0,0,14,3,4,1,8
156 |  Unified Team (EUN) [EUN],1,9,6,8,23,2,54,44,37,135
157 |  Independent Olympic Athletes (IOA) [IOA],0,0,0,0,0,3,1,0,1,2
158 |  Independent Olympic Participants (IOP) [IOP],0,0,0,0,0,1,0,1,2,3
159 |  Olympic Athletes from Russia (OAR) [OAR],1,2,6,9,17,1,2,6,9,17
160 |  Mixed team (ZZX) [ZZX],0,0,0,0,0,3,8,5,4,17
161 | Totals,23,"1,060","1,058","1,050","3,168",51,"6,175","6,138","6,532","18,845"
162 | 


--------------------------------------------------------------------------------
/assignments/Process Mining Kickstarter - Exercises.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mysilver/COMP9321-Data-Services/b477e20a819d63a53c684d0cb5fb332dc5b63e6c/assignments/Process Mining Kickstarter - Exercises.pdf


--------------------------------------------------------------------------------
/assignments/Process Mining Kickstarter - Solution Manual.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mysilver/COMP9321-Data-Services/b477e20a819d63a53c684d0cb5fb332dc5b63e6c/assignments/Process Mining Kickstarter - Solution Manual.pdf


--------------------------------------------------------------------------------
/assignments/z1111111.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | 
 4 | def question_1():
 5 |     print("--------------- question_1 ---------------")
 6 |     pass
 7 | 
 8 | 
 9 | def question_2():
10 |     print("--------------- question_2 ---------------")
11 |     pass
12 | 
13 | 
14 | def question_3():
15 |     print("--------------- question_3 ---------------")
16 |     pass
17 | 
18 | 
19 | def question_4():
20 |     print("--------------- question_4 ---------------")
21 |     pass
22 | 
23 | 
24 | def question_5():
25 |     print("--------------- question_5 ---------------")
26 |     pass
27 | 
28 | 
29 | def question_6():
30 |     print("--------------- question_6 ---------------")
31 |     pass
32 | 
33 | 
34 | def question_7():
35 |     print("--------------- question_7 ---------------")
36 |     pass
37 | 
38 | 
39 | def question_8():
40 |     print("--------------- question_8 ---------------")
41 |     pass
42 | 
43 | 
44 | def question_9():
45 |     print("--------------- question_9 ---------------")
46 |     pass
47 | 
48 | 
49 | def question_10():
50 |     print("--------------- question_10 ---------------")
51 |     pass
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     question_1()
56 |     question_2()
57 |     question_3()
58 |     question_4()
59 |     question_5()
60 |     question_6()
61 |     question_7()
62 |     question_8()
63 |     question_9()
64 |     question_10()
65 | 


--------------------------------------------------------------------------------
/docs/Flyer_UNSW_Al-Banna.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mysilver/COMP9321-Data-Services/b477e20a819d63a53c684d0cb5fb332dc5b63e6c/docs/Flyer_UNSW_Al-Banna.pdf


--------------------------------------------------------------------------------
/docs/myExperience.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mysilver/COMP9321-Data-Services/b477e20a819d63a53c684d0cb5fb332dc5b63e6c/docs/myExperience.pdf


--------------------------------------------------------------------------------