├── .gitignore ├── 20t1 ├── assign3 │ ├── test.csv │ ├── training.csv │ └── validation.csv ├── credits.csv ├── movies.csv └── z1111111.py ├── 21t1 └── z1111111.py ├── 23T1 ├── YOUR_ZID_STARTING_WITH_Z.py ├── city_pairs.csv ├── datasets.zip └── seats.csv ├── 24T1 ├── ass1 │ ├── ds_jobs.csv │ ├── requirements.txt │ └── z1234567.py ├── ass2 │ └── zXXXXXXX.py └── ass3 │ ├── requirements.txt │ ├── test-marking.csv │ ├── test.csv │ └── train.csv ├── Ass1_ChoroplethMap ├── Olympics_dataset.csv ├── README.md ├── app.py └── requirements.txt ├── Jenkinsfile ├── README.md ├── Week10_Regression_and_Clustering ├── activity_1.py ├── activity_2.py ├── activity_3.py ├── diet.csv └── iris.csv ├── Week11_Preprocessing ├── activity_1.py ├── activity_2.py └── activity_3.py ├── Week2_DataAccess ├── Demographic_Statistics_By_Zip_Code.csv ├── activity_1.py ├── activity_2.py ├── activity_3.py └── activity_4.py ├── Week3_Data_Cleansing ├── Books.csv ├── City.csv ├── activity_1.py ├── activity_2.py ├── activity_3.py └── activity_4.py ├── Week4_Visualization ├── Books.csv ├── activity_1.py ├── activity_2.py ├── activity_3.py ├── activity_4.py └── iris.csv ├── Week5_Flask ├── Books.csv ├── activity_1.py ├── activity_2.py └── activity_3.py ├── Week6_Flask2 ├── Books.csv ├── activity_1.py ├── activity_2.py └── activity_3.py ├── Week7_Client ├── activity_1.py ├── activity_2.py ├── activity_3.py └── activity_4.py ├── Week7_GraphQL ├── activity_1.py ├── activity_2.py └── activity_3.py ├── Week8_Authentication ├── Books.csv ├── activity_1.py ├── activity_1_client.py ├── activity_2.py └── activity_3.py ├── Week9_Classification ├── activity_1.py ├── activity_2.py ├── activity_3.py └── iris.csv ├── assignments ├── Countries-Continents.csv ├── Getting Started Academic Cloud.pdf ├── Olympics_dataset1.csv ├── Olympics_dataset2.csv ├── Process Mining Kickstarter - Exercises.pdf ├── Process Mining Kickstarter - Solution Manual.pdf └── z1111111.py └── docs ├── Flyer_UNSW_Al-Banna.pdf └── myExperience.pdf /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | .idea/ 106 | data/ 107 | -------------------------------------------------------------------------------- /20t1/z1111111.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import json 3 | import matplotlib.pyplot as plt 4 | import pandas as pd 5 | import sys 6 | import os 7 | 8 | studentid = os.path.basename(sys.modules[__name__].__file__) 9 | 10 | 11 | ################################################# 12 | # Your personal methods can be here ... 13 | ################################################# 14 | 15 | 16 | def log(question, output_df, other): 17 | print("--------------- {}----------------".format(question)) 18 | if other is not None: 19 | print(question, other) 20 | if output_df is not None: 21 | print(output_df.head(5).to_string()) 22 | 23 | 24 | def question_1(movies, credits): 25 | """ 26 | :param movies: the path for the movie.csv file 27 | :param credits: the path for the credits.csv file 28 | :return: df1 29 | Data Type: Dataframe 30 | Please read the assignment specs to know how to create the output dataframe 31 | """ 32 | 33 | ################################################# 34 | # Your code goes here ... 35 | ################################################# 36 | 37 | log("QUESTION 1", output_df=df1, other=df1.shape) 38 | return df1 39 | 40 | 41 | def question_2(df1): 42 | """ 43 | :param df1: the dataframe created in question 1 44 | :return: df2 45 | Data Type: Dataframe 46 | Please read the assignment specs to know how to create the output dataframe 47 | """ 48 | 49 | ################################################# 50 | # Your code goes here ... 51 | ################################################# 52 | 53 | log("QUESTION 2", output_df=df2, other=(len(df2.columns), sorted(df2.columns))) 54 | return df2 55 | 56 | 57 | def question_3(df2): 58 | """ 59 | :param df2: the dataframe created in question 2 60 | :return: df3 61 | Data Type: Dataframe 62 | Please read the assignment specs to know how to create the output dataframe 63 | """ 64 | 65 | ################################################# 66 | # Your code goes here ... 67 | ################################################# 68 | 69 | log("QUESTION 3", output_df=df3, other=df3.index.name) 70 | return df3 71 | 72 | 73 | def question_4(df3): 74 | """ 75 | :param df3: the dataframe created in question 3 76 | :return: df4 77 | Data Type: Dataframe 78 | Please read the assignment specs to know how to create the output dataframe 79 | """ 80 | 81 | ################################################# 82 | # Your code goes here ... 83 | ################################################# 84 | 85 | log("QUESTION 4", output_df=df4, other=(df4['budget'].min(), df4['budget'].max(), df4['budget'].mean())) 86 | return df4 87 | 88 | 89 | def question_5(df4): 90 | """ 91 | :param df4: the dataframe created in question 4 92 | :return: df5 93 | Data Type: Dataframe 94 | Please read the assignment specs to know how to create the output dataframe 95 | """ 96 | 97 | ################################################# 98 | # Your code goes here ... 99 | ################################################# 100 | 101 | log("QUESTION 5", output_df=df5, 102 | other=(df5['success_impact'].min(), df5['success_impact'].max(), df5['success_impact'].mean())) 103 | return df5 104 | 105 | 106 | def question_6(df5): 107 | """ 108 | :param df5: the dataframe created in question 5 109 | :return: df6 110 | Data Type: Dataframe 111 | Please read the assignment specs to know how to create the output dataframe 112 | """ 113 | 114 | ################################################# 115 | # Your code goes here ... 116 | ################################################# 117 | 118 | log("QUESTION 6", output_df=df6, other=(df6['popularity'].min(), df6['popularity'].max(), df6['popularity'].mean())) 119 | return df6 120 | 121 | 122 | def question_7(df6): 123 | """ 124 | :param df6: the dataframe created in question 6 125 | :return: df7 126 | Data Type: Dataframe 127 | Please read the assignment specs to know how to create the output dataframe 128 | """ 129 | 130 | ################################################# 131 | # Your code goes here ... 132 | ################################################# 133 | 134 | log("QUESTION 7", output_df=df7, other=df7['popularity'].dtype) 135 | return df7 136 | 137 | 138 | def question_8(df7): 139 | """ 140 | :param df7: the dataframe created in question 7 141 | :return: df8 142 | Data Type: Dataframe 143 | Please read the assignment specs to know how to create the output dataframe 144 | """ 145 | 146 | ################################################# 147 | # Your code goes here ... 148 | ################################################# 149 | 150 | log("QUESTION 8", output_df=df8, other=df8["cast"].head(10).values) 151 | return df8 152 | 153 | 154 | def question_9(df8): 155 | """ 156 | :param df9: the dataframe created in question 8 157 | :return: movies 158 | Data Type: List of strings (movie titles) 159 | Please read the assignment specs to know how to create the output 160 | """ 161 | 162 | ################################################# 163 | # Your code goes here ... 164 | ################################################# 165 | 166 | log("QUESTION 9", output_df=None, other=movies) 167 | return movies 168 | 169 | 170 | def question_10(df8): 171 | """ 172 | :param df8: the dataframe created in question 8 173 | :return: df10 174 | Data Type: Dataframe 175 | Please read the assignment specs to know how to create the output dataframe 176 | """ 177 | 178 | ################################################# 179 | # Your code goes here ... 180 | ################################################# 181 | 182 | log("QUESTION 10", output_df=df10, other=df10["release_date"].head(5).to_string().replace("\n", " ")) 183 | return df10 184 | 185 | 186 | def question_11(df10): 187 | """ 188 | :param df10: the dataframe created in question 10 189 | :return: nothing, but saves the figure on the disk 190 | """ 191 | 192 | ################################################# 193 | # Your code goes here ... 194 | ################################################# 195 | 196 | plt.savefig("{}-Q11.png".format(studentid)) 197 | 198 | 199 | def question_12(df10): 200 | """ 201 | :param df10: the dataframe created in question 10 202 | :return: nothing, but saves the figure on the disk 203 | """ 204 | 205 | ################################################# 206 | # Your code goes here ... 207 | ################################################# 208 | 209 | plt.savefig("{}-Q12.png".format(studentid)) 210 | 211 | 212 | def question_13(df10): 213 | """ 214 | :param df10: the dataframe created in question 10 215 | :return: nothing, but saves the figure on the disk 216 | """ 217 | 218 | ################################################# 219 | # Your code goes here ... 220 | ################################################# 221 | 222 | plt.savefig("{}-Q13.png".format(studentid)) 223 | 224 | 225 | if __name__ == "__main__": 226 | df1 = question_1("movies.csv", "credits.csv") 227 | df2 = question_2(df1) 228 | df3 = question_3(df2) 229 | df4 = question_4(df3) 230 | df5 = question_5(df4) 231 | df6 = question_6(df5) 232 | df7 = question_7(df6) 233 | df8 = question_8(df7) 234 | movies = question_9(df8) 235 | df10 = question_10(df8) 236 | question_11(df10) 237 | question_12(df10) 238 | question_13(df10) 239 | -------------------------------------------------------------------------------- /21t1/z1111111.py: -------------------------------------------------------------------------------- 1 | import json 2 | import matplotlib.pyplot as plt 3 | import pandas as pd 4 | import sys 5 | import os 6 | import numpy as np 7 | import math 8 | import re 9 | 10 | studentid = os.path.basename(sys.modules[__name__].__file__) 11 | 12 | 13 | def log(question, output_df, other): 14 | print("--------------- {}----------------".format(question)) 15 | 16 | if other is not None: 17 | print(question, other) 18 | if output_df is not None: 19 | df = output_df.head(5).copy(True) 20 | for c in df.columns: 21 | df[c] = df[c].apply(lambda a: a[:20] if isinstance(a, str) else a) 22 | 23 | df.columns = [a[:10] + "..." for a in df.columns] 24 | print(df.to_string()) 25 | 26 | 27 | def question_1(exposure, countries): 28 | """ 29 | :param exposure: the path for the exposure.csv file 30 | :param countries: the path for the Countries.csv file 31 | :return: df1 32 | Data Type: Dataframe 33 | Please read the assignment specs to know how to create the output dataframe 34 | """ 35 | 36 | ################################################# 37 | # Your code goes here ... 38 | ################################################# 39 | 40 | log("QUESTION 1", output_df=df1, other=df1.shape) 41 | return df1 42 | 43 | 44 | def question_2(df1): 45 | """ 46 | :param df1: the dataframe created in question 1 47 | :return: df2 48 | Data Type: Dataframe 49 | Please read the assignment specs to know how to create the output dataframe 50 | """ 51 | 52 | ################################################# 53 | # Your code goes here ... 54 | ################################################# 55 | 56 | log("QUESTION 2", output_df=df2[["avg_latitude", "avg_longitude"]], other=df2.shape) 57 | return df2 58 | 59 | 60 | def question_3(df2): 61 | """ 62 | :param df2: the dataframe created in question 2 63 | :return: df3 64 | Data Type: Dataframe 65 | Please read the assignment specs to know how to create the output dataframe 66 | """ 67 | 68 | ################################################# 69 | # Your code goes here ... 70 | ################################################# 71 | 72 | log("QUESTION 3", output_df=df3[['distance_to_Wuhan']], other=df3.shape) 73 | return df3 74 | 75 | 76 | def question_4(df2, continents): 77 | """ 78 | :param df2: the dataframe created in question 2 79 | :param continents: the path for the Countries-Continents.csv file 80 | :return: df4 81 | Data Type: Dataframe 82 | Please read the assignment specs to know how to create the output dataframe 83 | """ 84 | 85 | ################################################# 86 | # Your code goes here ... 87 | ################################################# 88 | 89 | log("QUESTION 4", output_df=df4, other=df4.shape) 90 | return df4 91 | 92 | 93 | def question_5(df2): 94 | """ 95 | :param df2: the dataframe created in question 2 96 | :return: df5 97 | Data Type: dataframe 98 | Please read the assignment specs to know how to create the output dataframe 99 | """ 100 | ################################################# 101 | # Your code goes here ... 102 | ################################################# 103 | 104 | log("QUESTION 5", output_df=df5, other=df5.shape) 105 | return df5 106 | 107 | 108 | def question_6(df2): 109 | """ 110 | :param df2: the dataframe created in question 2 111 | :return: cities_lst 112 | Data Type: list 113 | Please read the assignment specs to know how to create the output dataframe 114 | """ 115 | cities_lst = [] 116 | ################################################# 117 | # Your code goes here ... 118 | ################################################# 119 | 120 | log("QUESTION 6", output_df=None, other=cities_lst) 121 | return cities_lst 122 | 123 | 124 | def question_7(df2): 125 | """ 126 | :param df2: the dataframe created in question 2 127 | :return: df7 128 | Data Type: Dataframe 129 | Please read the assignment specs to know how to create the output dataframe 130 | """ 131 | 132 | ################################################# 133 | # Your code goes here ... 134 | ################################################# 135 | 136 | log("QUESTION 7", output_df=df7, other=df7.shape) 137 | return df7 138 | 139 | 140 | def question_8(df2, continents): 141 | """ 142 | :param df2: the dataframe created in question 2 143 | :param continents: the path for the Countries-Continents.csv file 144 | :return: nothing, but saves the figure on the disk 145 | """ 146 | 147 | ################################################# 148 | # Your code goes here ... 149 | ################################################# 150 | 151 | plt.savefig("{}-Q11.png".format(studentid)) 152 | 153 | 154 | def question_9(df2): 155 | """ 156 | :param df2: the dataframe created in question 2 157 | :return: nothing, but saves the figure on the disk 158 | """ 159 | 160 | ################################################# 161 | # Your code goes here ... 162 | ################################################# 163 | 164 | plt.savefig("{}-Q12.png".format(studentid)) 165 | 166 | 167 | def question_10(df2, continents): 168 | """ 169 | :param df2: the dataframe created in question 2 170 | :return: nothing, but saves the figure on the disk 171 | :param continents: the path for the Countries-Continents.csv file 172 | """ 173 | 174 | ################################################# 175 | # Your code goes here ... 176 | ################################################# 177 | 178 | plt.savefig("{}-Q13.png".format(studentid)) 179 | 180 | 181 | if __name__ == "__main__": 182 | df1 = question_1("exposure.csv", "Countries.csv") 183 | df2 = question_2(df1.copy(True)) 184 | df3 = question_3(df2.copy(True)) 185 | df4 = question_4(df2.copy(True), "Countries-Continents.csv") 186 | df5 = question_5(df2.copy(True)) 187 | lst = question_6(df2.copy(True)) 188 | df7 = question_7(df2.copy(True)) 189 | question_8(df2.copy(True), "Countries-Continents.csv") 190 | question_9(df2.copy(True)) 191 | question_10(df2.copy(True), "Countries-Continents.csv") 192 | -------------------------------------------------------------------------------- /23T1/YOUR_ZID_STARTING_WITH_Z.py: -------------------------------------------------------------------------------- 1 | import json 2 | import matplotlib.pyplot as plt 3 | import pandas as pd 4 | import sys 5 | import os 6 | import numpy as np 7 | import math 8 | import re 9 | 10 | studentid = os.path.basename(sys.modules[__name__].__file__) 11 | 12 | 13 | def log(question, output_df, other): 14 | print("--------------- {}----------------".format(question)) 15 | 16 | if other is not None: 17 | print(question, other) 18 | if output_df is not None: 19 | df = output_df.head(5).copy(True) 20 | for c in df.columns: 21 | df[c] = df[c].apply(lambda a: a[:20] if isinstance(a, str) else a) 22 | 23 | df.columns = [a[:10] + "..." for a in df.columns] 24 | print(df.to_string()) 25 | 26 | 27 | def question_1(city_pairs): 28 | """ 29 | :return: df1 30 | Data Type: Dataframe 31 | Please read the assignment specs to know how to create the output dataframe 32 | """ 33 | 34 | ################################################# 35 | # Your code goes here ... 36 | ################################################# 37 | 38 | log("QUESTION 1", output_df=df1[["AustralianPort", "ForeignPort", "passenger_in_out", "freight_in_out", "mail_in_out"]], other=df1.shape) 39 | return df1 40 | 41 | 42 | def question_2(df1): 43 | """ 44 | :param df1: the dataframe created in question 1 45 | :return: dataframe df2 46 | Please read the assignment specs to know how to create the output dataframe 47 | """ 48 | 49 | ################################################# 50 | # Your code goes here ... 51 | ################################################# 52 | 53 | log("QUESTION 2", output_df=df2, other=df2.shape) 54 | return df2 55 | 56 | 57 | def question_3(df1): 58 | """ 59 | :param df1: the dataframe created in question 1 60 | :return: df3 61 | Data Type: Dataframe 62 | Please read the assignment specs to know how to create the output dataframe 63 | """ 64 | ################################################# 65 | # Your code goes here ... 66 | ################################################# 67 | 68 | log("QUESTION 3", output_df=df3, other=df3.shape) 69 | return df3 70 | 71 | 72 | def question_4(df1): 73 | """ 74 | :param df1: the dataframe created in question 1 75 | :return: df4 76 | Data Type: Dataframe 77 | Please read the assignment specs to know how to create the output dataframe 78 | """ 79 | 80 | ################################################# 81 | # Your code goes here ... 82 | ################################################# 83 | 84 | log("QUESTION 4", output_df=df4, other=df4.shape) 85 | return df4 86 | 87 | 88 | def question_5(seats): 89 | """ 90 | :param seats : the path to dataset 91 | :return: df5 92 | Data Type: dataframe 93 | Please read the assignment specs to know how to create the output dataframe 94 | """ 95 | ################################################# 96 | # Your code goes here ... 97 | ################################################# 98 | 99 | log("QUESTION 5", output_df=df5, other=df5.shape) 100 | return df5 101 | 102 | 103 | def question_6(df5): 104 | """ 105 | :param df5: the dataframe created in question 5 106 | :return: df6 107 | """ 108 | 109 | ################################################# 110 | # Your code goes here ... 111 | ################################################# 112 | 113 | log("QUESTION 6", output_df=df6, other=df6.shape) 114 | return df6 115 | 116 | 117 | def question_7(seats, city_pairs): 118 | """ 119 | :param seats: the path to dataset 120 | :param city_pairs : the path to dataset 121 | :return: nothing, but saves the figure on the disk 122 | """ 123 | 124 | ################################################# 125 | # Your code goes here ... 126 | ################################################# 127 | 128 | plt.savefig("{}-Q7.png".format(studentid)) 129 | 130 | 131 | if __name__ == "__main__": 132 | df1 = question_1("city_pairs.csv") 133 | df2 = question_2(df1.copy(True)) 134 | df3 = question_3(df1.copy(True)) 135 | df4 = question_4(df1.copy(True)) 136 | df5 = question_5("seats.csv") 137 | df6 = question_6(df5.copy(True)) 138 | question_7("seats.csv", "city_pairs.csv") 139 | -------------------------------------------------------------------------------- /23T1/datasets.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mysilver/COMP9321-Data-Services/b477e20a819d63a53c684d0cb5fb332dc5b63e6c/23T1/datasets.zip -------------------------------------------------------------------------------- /24T1/ass1/requirements.txt: -------------------------------------------------------------------------------- 1 | contourpy==1.2.0 2 | cycler==0.12.1 3 | fonttools==4.49.0 4 | kiwisolver==1.4.5 5 | lxml==5.1.0 6 | matplotlib==3.8.2 7 | numpy==1.26.0 8 | packaging==23.2 9 | pandas==2.2.0 10 | pillow==10.2.0 11 | pyparsing==3.1.1 12 | python-dateutil==2.8.2 13 | pytz==2024.1 14 | rapidfuzz==3.6.1 15 | six==1.16.0 16 | thefuzz==0.22.1 17 | tzdata==2024.1 18 | -------------------------------------------------------------------------------- /24T1/ass1/z1234567.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Third-party libraries 5 | # NOTE: You may **only** use the following third-party libraries: 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | import pandas as pd 9 | from thefuzz import fuzz 10 | from thefuzz import process 11 | # NOTE: It isn't necessary to use all of these to complete the assignment, 12 | # but you are free to do so, should you choose. 13 | 14 | # Standard libraries 15 | # NOTE: You may use **any** of the Python 3.11 or Python 3.12 standard libraries: 16 | # https://docs.python.org/3.11/library/index.html 17 | # https://docs.python.org/3.12/library/index.html 18 | from pathlib import Path 19 | # ... import your standard libraries here ... 20 | 21 | 22 | ###################################################### 23 | # NOTE: DO NOT MODIFY THE LINE BELOW ... 24 | ###################################################### 25 | studentid = Path(__file__).stem 26 | 27 | ###################################################### 28 | # NOTE: DO NOT MODIFY THE FUNCTION BELOW ... 29 | ###################################################### 30 | def log(question, output_df, other): 31 | print(f"--------------- {question}----------------") 32 | 33 | if other is not None: 34 | print(question, other) 35 | if output_df is not None: 36 | df = output_df.head(5).copy(True) 37 | for c in df.columns: 38 | df[c] = df[c].apply(lambda a: a[:20] if isinstance(a, str) else a) 39 | 40 | df.columns = [a[:10] + "..." for a in df.columns] 41 | print(df.to_string()) 42 | 43 | 44 | ###################################################### 45 | # NOTE: YOU MAY ADD ANY HELPER FUNCTIONS BELOW ... 46 | ###################################################### 47 | 48 | 49 | 50 | ###################################################### 51 | # QUESTIONS TO COMPLETE BELOW ... 52 | ###################################################### 53 | 54 | ###################################################### 55 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ... 56 | ###################################################### 57 | def question_1(jobs_csv): 58 | """Read the data science jobs CSV file into a DataFrame. 59 | 60 | See the assignment spec for more details. 61 | 62 | Args: 63 | jobs_csv (str): Path to the jobs CSV file. 64 | 65 | Returns: 66 | DataFrame: The jobs DataFrame. 67 | """ 68 | 69 | ###################################################### 70 | # TODO: Your code goes here ... 71 | ###################################################### 72 | 73 | 74 | 75 | ###################################################### 76 | # NOTE: DO NOT MODIFY THE CODE BELOW ... 77 | ###################################################### 78 | log("QUESTION 1", output_df=df, other=df.shape) 79 | return df 80 | 81 | 82 | 83 | ###################################################### 84 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ... 85 | ###################################################### 86 | def question_2(cost_csv, cost_url): 87 | """Read the cost of living CSV into a DataFrame. If the CSV file does not 88 | exist, scrape it from the specified URL and save it to the CSV file. 89 | 90 | See the assignment spec for more details. 91 | 92 | Args: 93 | cost_csv (str): Path to the cost of living CSV file. 94 | cost_url (str): URL of the cost of living page. 95 | 96 | Returns: 97 | DataFrame: The cost of living DataFrame. 98 | """ 99 | 100 | ###################################################### 101 | # TODO: Your code goes here ... 102 | ###################################################### 103 | 104 | 105 | 106 | ###################################################### 107 | # NOTE: DO NOT MODIFY THE CODE BELOW ... 108 | ###################################################### 109 | log("QUESTION 2", output_df=df, other=df.shape) 110 | return df 111 | 112 | 113 | ###################################################### 114 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ... 115 | ###################################################### 116 | def question_3(currency_csv, currency_url): 117 | """Read the currency conversion rates CSV into a DataFrame. If the CSV 118 | file does not exist, scrape it from the specified URL and save it to 119 | the CSV file. 120 | 121 | See the assignment spec for more details. 122 | 123 | Args: 124 | cost_csv (str): Path to the currency conversion rates CSV file. 125 | cost_url (str): URL of the currency conversion rates page. 126 | 127 | Returns: 128 | DataFrame: The currency conversion rates DataFrame. 129 | """ 130 | 131 | ###################################################### 132 | # TODO: Your code goes here ... 133 | ###################################################### 134 | 135 | 136 | 137 | ###################################################### 138 | # NOTE: DO NOT MODIFY THE CODE BELOW ... 139 | ###################################################### 140 | log("QUESTION 3", output_df=df, other=df.shape) 141 | return df 142 | 143 | 144 | ###################################################### 145 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ... 146 | ###################################################### 147 | def question_4(country_csv, country_url): 148 | """Read the country codes CSV into a DataFrame. If the CSV file does not 149 | exist, it will be scrape the data from the specified URL and save it to the 150 | CSV file. 151 | 152 | See the assignment spec for more details. 153 | 154 | Args: 155 | cost_csv (str): Path to the country codes CSV file. 156 | cost_url (str): URL of the country codes page. 157 | 158 | Returns: 159 | DataFrame: The country codes DataFrame. 160 | """ 161 | 162 | ###################################################### 163 | # TODO: Your code goes here ... 164 | ###################################################### 165 | 166 | 167 | 168 | ###################################################### 169 | # NOTE: DO NOT MODIFY THE CODE BELOW ... 170 | ###################################################### 171 | log("QUESTION 4", output_df=df, other=df.shape) 172 | return df 173 | 174 | 175 | ###################################################### 176 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ... 177 | ###################################################### 178 | def question_5(jobs_df): 179 | """Summarise some dimensions of the jobs DataFrame. 180 | 181 | See the assignment spec for more details. 182 | 183 | Args: 184 | jobs_df (DataFrame): The jobs DataFrame returned in question 1. 185 | 186 | Returns: 187 | DataFrame: The summary DataFrame. 188 | """ 189 | 190 | ###################################################### 191 | # TODO: Your code goes here ... 192 | ###################################################### 193 | 194 | 195 | 196 | ###################################################### 197 | # NOTE: DO NOT MODIFY THE CODE BELOW ... 198 | ###################################################### 199 | log("QUESTION 5", output_df=df, other=df.shape) 200 | return df 201 | 202 | 203 | ###################################################### 204 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ... 205 | ###################################################### 206 | def question_6(jobs_df): 207 | """Add an experience rating column to the jobs DataFrame. 208 | 209 | See the assignment spec for more details. 210 | 211 | Args: 212 | jobs_df (DataFrame): The jobs DataFrame returned in question 1. 213 | 214 | Returns: 215 | DataFrame: The jobs DataFrame with the experience rating column added. 216 | """ 217 | 218 | ###################################################### 219 | # TODO: Your code goes here ... 220 | ###################################################### 221 | 222 | 223 | 224 | ###################################################### 225 | # NOTE: DO NOT MODIFY THE CODE BELOW ... 226 | ###################################################### 227 | log("QUESTION 6", output_df=df, other=df.shape) 228 | return df 229 | 230 | 231 | ###################################################### 232 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ... 233 | ###################################################### 234 | def question_7(jobs_df, country_df): 235 | """Merge the jobs and country codes DataFrames. 236 | 237 | See the assignment spec for more details. 238 | 239 | Args: 240 | jobs_df (DataFrame): The jobs DataFrame returned in question 6. 241 | country_df (DataFrame): The country codes DataFrame returned in 242 | question 4. 243 | 244 | Returns: 245 | DataFrame: The merged DataFrame. 246 | """ 247 | 248 | ###################################################### 249 | # TODO: Your code goes here ... 250 | ###################################################### 251 | 252 | 253 | 254 | ###################################################### 255 | # NOTE: DO NOT MODIFY THE CODE BELOW ... 256 | ###################################################### 257 | log("QUESTION 7", output_df=df, other=df.shape) 258 | return df 259 | 260 | 261 | ###################################################### 262 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ... 263 | ###################################################### 264 | def question_8(jobs_df, currency_df): 265 | """Add an Australian dollar salary column to the jobs DataFrame. 266 | 267 | See the assignment spec for more details. 268 | 269 | Args: 270 | jobs_df (DataFrame): The jobs DataFrame returned in question 7. 271 | currency_df (DataFrame): The currency conversion rates DataFrame 272 | returned in question 3. 273 | 274 | Returns: 275 | DataFrame: The jobs DataFrame with the Australian dollar salary column 276 | added. 277 | """ 278 | 279 | ###################################################### 280 | # TODO: Your code goes here ... 281 | ###################################################### 282 | 283 | 284 | 285 | 286 | ###################################################### 287 | # NOTE: DO NOT MODIFY THE CODE BELOW ... 288 | ###################################################### 289 | log("QUESTION 8", output_df=df, other=df.shape) 290 | return df 291 | 292 | 293 | ###################################################### 294 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ... 295 | ###################################################### 296 | def question_9(cost_df): 297 | """Re-scale the cost of living DataFrame to be relative to Australia. 298 | 299 | See the assignment spec for more details. 300 | 301 | Args: 302 | cost_df (DataFrame): The cost of living DataFrame returned in question 2. 303 | 304 | Returns: 305 | DataFrame: The re-scaled cost of living DataFrame. 306 | """ 307 | 308 | ###################################################### 309 | # TODO: Your code goes here ... 310 | ###################################################### 311 | 312 | 313 | 314 | ###################################################### 315 | # NOTE: DO NOT MODIFY THE CODE BELOW ... 316 | ###################################################### 317 | log("QUESTION 9", output_df=df, other=df.shape) 318 | return df 319 | 320 | 321 | ###################################################### 322 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ... 323 | ###################################################### 324 | def question_10(jobs_df, cost_df): 325 | """Merge the jobs and cost of living DataFrames. 326 | 327 | See the assignment spec for more details. 328 | 329 | Args: 330 | jobs_df (DataFrame): The jobs DataFrame returned in question 8. 331 | cost_df (DataFrame): The cost of living DataFrame returned in question 9. 332 | 333 | Returns: 334 | DataFrame: The merged DataFrame. 335 | """ 336 | 337 | ###################################################### 338 | # TODO: Your code goes here ... 339 | ###################################################### 340 | 341 | 342 | 343 | ###################################################### 344 | # NOTE: DO NOT MODIFY THE CODE BELOW ... 345 | ###################################################### 346 | log("QUESTION 10", output_df=df, other=df.shape) 347 | return df 348 | 349 | 350 | ###################################################### 351 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ... 352 | ###################################################### 353 | def question_11(jobs_df): 354 | """Create a pivot table of the average salary in AUD by country and 355 | experience rating. 356 | 357 | See the assignment spec for more details. 358 | 359 | Args: 360 | jobs_df (DataFrame): The jobs DataFrame returned in question 10. 361 | 362 | Returns: 363 | DataFrame: The pivot table. 364 | """ 365 | 366 | ###################################################### 367 | # TODO: Your code goes here ... 368 | ###################################################### 369 | 370 | 371 | 372 | ###################################################### 373 | # NOTE: DO NOT MODIFY THE CODE BELOW ... 374 | ###################################################### 375 | log("QUESTION 11", output_df=None, other=df) 376 | return df 377 | 378 | 379 | ###################################################### 380 | # NOTE: DO NOT MODIFY THE FUNCTION SIGNATURE BELOW ... 381 | ###################################################### 382 | def question_12(jobs_df): 383 | """Create a visualisation of data science jobs to help inform a decision 384 | about where to live, based (minimally) on salary and cost of living. 385 | 386 | See the assignment spec for more details. 387 | 388 | Args: 389 | jobs_df (DataFrame): The jobs DataFrame returned in question 10. 390 | """ 391 | 392 | ###################################################### 393 | # TODO: Your code goes here ... 394 | ###################################################### 395 | 396 | 397 | ###################################################### 398 | # NOTE: DO NOT MODIFY THE CODE BELOW ... 399 | ###################################################### 400 | plt.savefig(f"{studentid}-Q12.png") 401 | 402 | 403 | ###################################################### 404 | # NOTE: DO NOT MODIFY THE MAIN FUNCTION BELOW ... 405 | ###################################################### 406 | if __name__ == "__main__": 407 | # data ingestion and cleaning 408 | df1 = question_1("ds_jobs.csv") 409 | df2 = question_2("cost_of_living.csv", 410 | "https://www.cse.unsw.edu.au/~cs9321/24T1/ass1/cost_of_living.html") 411 | df3 = question_3("exchange_rates.csv", 412 | "https://www.cse.unsw.edu.au/~cs9321/24T1/ass1/exchange_rates.html") 413 | df4 = question_4("country_codes.csv", 414 | "https://www.cse.unsw.edu.au/~cs9321/24T1/ass1/country_codes.html") 415 | 416 | # data exploration 417 | df5 = question_5(df1.copy(True)) 418 | 419 | # data manipulation 420 | df6 = question_6(df1.copy(True)) 421 | df7 = question_7(df6.copy(True), df4.copy(True)) 422 | df8 = question_8(df7.copy(True), df3.copy(True)) 423 | df9 = question_9(df2.copy(True)) 424 | df10 = question_10(df8.copy(True), df9.copy(True)) 425 | df11 = question_11(df10.copy(True)) 426 | 427 | # data visualisation 428 | question_12(df10.copy(True)) 429 | -------------------------------------------------------------------------------- /24T1/ass2/zXXXXXXX.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | COMP9321 24T1 Assignment 2 6 | Data publication as a RESTful service API 7 | 8 | Getting Started 9 | --------------- 10 | 11 | 1. You MUST rename this file according to your zID, e.g., z1234567.py. 12 | 13 | 2. To ensure your submission can be marked correctly, you're strongly encouraged 14 | to create a new virtual environment for this assignment. Please see the 15 | instructions in the assignment 1 specification to create and activate a 16 | virtual environment. 17 | 18 | 3. Once you have activated your virtual environment, you need to install the 19 | following, required packages: 20 | 21 | pip install python-dotenv==1.0.1 22 | pip install google-generativeai==0.4.1 23 | 24 | You may also use any of the packages we've used in the weekly labs. 25 | The most likely ones you'll want to install are: 26 | 27 | pip install flask==3.0.2 28 | pip install flask_restx==1.3.0 29 | pip install requests==2.31.0 30 | 31 | 4. Create a file called `.env` in the same directory as this file. This file 32 | will contain the Google API key you generatea in the next step. 33 | 34 | 5. Go to the following page, click on the link to "Get an API key", and follow 35 | the instructions to generate an API key: 36 | 37 | https://ai.google.dev/tutorials/python_quickstart 38 | 39 | 6. Add the following line to your `.env` file, replacing `your-api-key` with 40 | the API key you generated, and save the file: 41 | 42 | GOOGLE_API_KEY=your-api-key 43 | 44 | 7. You can now start implementing your solution. You are free to edit this file how you like, but keep it readable 45 | such that a marker can read and understand your code if necessary for partial marks. 46 | 47 | Submission 48 | ---------- 49 | 50 | You need to submit this Python file and a `requirements.txt` file. 51 | 52 | The `requirements.txt` file should list all the Python packages your code relies 53 | on, and their versions. You can generate this file by running the following 54 | command while your virtual environment is active: 55 | 56 | pip freeze > requirements.txt 57 | 58 | You can submit the two files using the following command when connected to CSE, 59 | and assuming the files are in the current directory (remember to replace `zid` 60 | with your actual zID, i.e. the name of this file after renaming it): 61 | 62 | give cs9321 assign2 zid.py requirements.txt 63 | 64 | You can also submit through WebCMS3, using the tab at the top of the assignment 65 | page. 66 | 67 | """ 68 | 69 | # You can import more modules from the standard library here if you need them 70 | # (which you will, e.g. sqlite3). 71 | import os 72 | from pathlib import Path 73 | 74 | # You can import more third-party packages here if you need them, provided 75 | # that they've been used in the weekly labs, or specified in this assignment, 76 | # and their versions match. 77 | from dotenv import load_dotenv # Needed to load the environment variables from the .env file 78 | import google.generativeai as genai # Needed to access the Generative AI API 79 | 80 | 81 | studentid = Path(__file__).stem # Will capture your zID from the filename. 82 | db_file = f"{studentid}.db" # Use this variable when referencing the SQLite database file. 83 | txt_file = f"{studentid}.txt" # Use this variable when referencing the txt file for Q7. 84 | 85 | 86 | # Load the environment variables from the .env file 87 | load_dotenv() 88 | 89 | # Configure the API key 90 | genai.configure(api_key=os.environ["GOOGLE_API_KEY"]) 91 | 92 | # Create a Gemini Pro model 93 | gemini = genai.GenerativeModel('gemini-pro') 94 | 95 | if __name__ == "__main__": 96 | # Here's a quick example of using the Generative AI API: 97 | question = "Give me some facts about UNSW!" 98 | response = gemini.generate_content(question) 99 | print(question) 100 | print(response.text) 101 | -------------------------------------------------------------------------------- /24T1/ass3/requirements.txt: -------------------------------------------------------------------------------- 1 | anyio==4.3.0 2 | appnope==0.1.4 3 | argon2-cffi==23.1.0 4 | argon2-cffi-bindings==21.2.0 5 | arrow==1.3.0 6 | asttokens==2.4.1 7 | async-lru==2.0.4 8 | attrs==23.2.0 9 | Babel==2.14.0 10 | beautifulsoup4==4.12.3 11 | bleach==6.1.0 12 | certifi==2024.2.2 13 | cffi==1.16.0 14 | charset-normalizer==3.3.2 15 | comm==0.2.2 16 | contourpy==1.2.0 17 | cycler==0.12.1 18 | debugpy==1.8.1 19 | decorator==5.1.1 20 | defusedxml==0.7.1 21 | dnspython==2.5.0 22 | executing==2.0.1 23 | fastjsonschema==2.19.1 24 | fonttools==4.49.0 25 | fqdn==1.5.1 26 | h11==0.14.0 27 | httpcore==1.0.4 28 | httpx==0.27.0 29 | idna==3.6 30 | imbalanced-learn==0.12.0 31 | imblearn==0.0 32 | ipykernel==6.29.3 33 | ipython==8.22.2 34 | isoduration==20.11.0 35 | jedi==0.19.1 36 | Jinja2==3.1.3 37 | joblib==1.3.2 38 | json5==0.9.24 39 | jsonpointer==2.4 40 | jsonschema==4.21.1 41 | jsonschema-specifications==2023.12.1 42 | jupyter-events==0.9.1 43 | jupyter-lsp==2.2.4 44 | jupyter_client==8.6.1 45 | jupyter_core==5.7.2 46 | jupyter_server==2.13.0 47 | jupyter_server_terminals==0.5.3 48 | jupyterlab==4.1.5 49 | jupyterlab_pygments==0.3.0 50 | jupyterlab_server==2.25.4 51 | kiwisolver==1.4.5 52 | lightgbm==4.3.0 53 | lxml==5.1.0 54 | MarkupSafe==2.1.5 55 | matplotlib==3.8.2 56 | matplotlib-inline==0.1.6 57 | mistune==3.0.2 58 | nbclient==0.10.0 59 | nbconvert==7.16.2 60 | nbformat==5.10.3 61 | nest-asyncio==1.6.0 62 | notebook==7.1.2 63 | notebook_shim==0.2.4 64 | numpy==1.26.0 65 | overrides==7.7.0 66 | packaging==23.2 67 | pandas==2.2.0 68 | pandocfilters==1.5.1 69 | parso==0.8.3 70 | pexpect==4.9.0 71 | pillow==10.2.0 72 | platformdirs==4.2.0 73 | prometheus_client==0.20.0 74 | prompt-toolkit==3.0.43 75 | psutil==5.9.8 76 | ptyprocess==0.7.0 77 | pure-eval==0.2.2 78 | pyarrow==15.0.1 79 | pycparser==2.21 80 | Pygments==2.17.2 81 | pymongo==4.6.1 82 | pyparsing==3.1.1 83 | python-dateutil==2.8.2 84 | python-json-logger==2.0.7 85 | pytz==2024.1 86 | PyYAML==6.0.1 87 | pyzmq==25.1.2 88 | rapidfuzz==3.6.1 89 | referencing==0.34.0 90 | requests==2.31.0 91 | rfc3339-validator==0.1.4 92 | rfc3986-validator==0.1.1 93 | rpds-py==0.18.0 94 | scikit-learn==1.4.1.post1 95 | scipy==1.12.0 96 | seaborn==0.13.2 97 | Send2Trash==1.8.2 98 | six==1.16.0 99 | sniffio==1.3.1 100 | soupsieve==2.5 101 | stack-data==0.6.3 102 | terminado==0.18.1 103 | thefuzz==0.22.1 104 | threadpoolctl==3.3.0 105 | tinycss2==1.2.1 106 | tornado==6.4 107 | traitlets==5.14.2 108 | types-python-dateutil==2.9.0.20240316 109 | tzdata==2024.1 110 | uri-template==1.3.0 111 | urllib3==2.2.1 112 | wcwidth==0.2.13 113 | webcolors==1.13 114 | webencodings==0.5.1 115 | websocket-client==1.7.0 116 | xgboost==2.0.3 117 | -------------------------------------------------------------------------------- /Ass1_ChoroplethMap/Olympics_dataset.csv: -------------------------------------------------------------------------------- 1 | Country,Num_games_s,Gold_s,Silver_s,Bronze_s,Total_s,Num_games_w,Gold_w,Silver_w,Bronze_w,Total_w,Num_games_t,Gold_t,Silver_t,Bronze_t,Total_t 2 | Afghanistan,14.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0,2.0,2.0 3 | Algeria,13.0,5.0,4.0,8.0,17.0,3.0,0.0,0.0,0.0,0.0,16.0,5.0,4.0,8.0,17.0 4 | Argentina,24.0,21.0,25.0,28.0,74.0,19.0,0.0,0.0,0.0,0.0,43.0,21.0,25.0,28.0,74.0 5 | Armenia,6.0,2.0,6.0,6.0,14.0,7.0,0.0,0.0,0.0,0.0,13.0,2.0,6.0,6.0,14.0 6 | Australasia,2.0,3.0,4.0,5.0,12.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,4.0,5.0,12.0 7 | Australia,26.0,147.0,163.0,187.0,497.0,19.0,5.0,5.0,5.0,15.0,45.0,152.0,168.0,192.0,512.0 8 | Austria,27.0,18.0,33.0,36.0,87.0,23.0,64.0,81.0,87.0,232.0,50.0,82.0,114.0,123.0,319.0 9 | Azerbaijan,6.0,7.0,11.0,25.0,43.0,6.0,0.0,0.0,0.0,0.0,12.0,7.0,11.0,25.0,43.0 10 | Bahamas,16.0,6.0,2.0,6.0,14.0,0.0,0.0,0.0,0.0,0.0,16.0,6.0,2.0,6.0,14.0 11 | Bahrain,9.0,2.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,9.0,2.0,1.0,0.0,3.0 12 | Barbados,12.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,1.0,1.0 13 | Belarus,6.0,12.0,27.0,39.0,78.0,7.0,8.0,5.0,5.0,18.0,13.0,20.0,32.0,44.0,96.0 14 | Belgium,26.0,40.0,53.0,55.0,148.0,21.0,1.0,2.0,3.0,6.0,47.0,41.0,55.0,58.0,154.0 15 | Bermuda,18.0,0.0,0.0,1.0,1.0,8.0,0.0,0.0,0.0,0.0,26.0,0.0,0.0,1.0,1.0 16 | Bohemia,3.0,0.0,1.0,3.0,4.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,1.0,3.0,4.0 17 | Botswana,10.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,1.0,0.0,1.0 18 | Brazil,22.0,30.0,36.0,62.0,128.0,8.0,0.0,0.0,0.0,0.0,30.0,30.0,36.0,62.0,128.0 19 | British West Indies,1.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,2.0 20 | Bulgaria,20.0,51.0,87.0,80.0,218.0,20.0,1.0,2.0,3.0,6.0,40.0,52.0,89.0,83.0,224.0 21 | Burundi,6.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,6.0,1.0,1.0,0.0,2.0 22 | Cameroon,14.0,3.0,1.0,2.0,6.0,1.0,0.0,0.0,0.0,0.0,15.0,3.0,1.0,2.0,6.0 23 | Canada,26.0,64.0,102.0,136.0,302.0,23.0,73.0,64.0,62.0,199.0,49.0,137.0,166.0,198.0,501.0 24 | Chile,23.0,2.0,7.0,4.0,13.0,17.0,0.0,0.0,0.0,0.0,40.0,2.0,7.0,4.0,13.0 25 | China,10.0,224.0,167.0,155.0,546.0,11.0,13.0,28.0,21.0,62.0,21.0,237.0,195.0,176.0,608.0 26 | Colombia,19.0,5.0,9.0,14.0,28.0,2.0,0.0,0.0,0.0,0.0,21.0,5.0,9.0,14.0,28.0 27 | Costa Rica,15.0,1.0,1.0,2.0,4.0,6.0,0.0,0.0,0.0,0.0,21.0,1.0,1.0,2.0,4.0 28 | Ivory Coast,13.0,1.0,1.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,13.0,1.0,1.0,1.0,3.0 29 | Croatia,7.0,11.0,10.0,12.0,33.0,8.0,4.0,6.0,1.0,11.0,15.0,15.0,16.0,13.0,44.0 30 | Cuba,20.0,78.0,68.0,79.0,225.0,0.0,0.0,0.0,0.0,0.0,20.0,78.0,68.0,79.0,225.0 31 | Cyprus,10.0,0.0,1.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,21.0,0.0,1.0,0.0,1.0 32 | Czech Republic,6.0,15.0,17.0,24.0,56.0,7.0,9.0,11.0,11.0,31.0,13.0,24.0,28.0,35.0,87.0 33 | Czechoslovakia,16.0,49.0,49.0,45.0,143.0,16.0,2.0,8.0,15.0,25.0,32.0,51.0,57.0,60.0,168.0 34 | Denmark,27.0,45.0,74.0,75.0,194.0,14.0,0.0,1.0,0.0,1.0,41.0,45.0,75.0,75.0,195.0 35 | Djibouti,8.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,1.0,1.0 36 | Dominican Republic,14.0,3.0,2.0,2.0,7.0,0.0,0.0,0.0,0.0,0.0,14.0,3.0,2.0,2.0,7.0 37 | Ecuador,14.0,1.0,1.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,15.0,1.0,1.0,0.0,2.0 38 | Egypt,22.0,7.0,10.0,15.0,32.0,1.0,0.0,0.0,0.0,0.0,23.0,7.0,10.0,15.0,32.0 39 | Eritrea,5.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,1.0,1.0 40 | Estonia,12.0,9.0,9.0,16.0,34.0,10.0,4.0,2.0,1.0,7.0,22.0,13.0,11.0,17.0,41.0 41 | Ethiopia,13.0,22.0,11.0,20.0,53.0,2.0,0.0,0.0,0.0,0.0,15.0,22.0,11.0,20.0,53.0 42 | Fiji,14.0,1.0,0.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0,17.0,1.0,0.0,0.0,1.0 43 | Finland,25.0,101.0,85.0,117.0,303.0,23.0,43.0,63.0,61.0,167.0,48.0,144.0,148.0,178.0,470.0 44 | France,28.0,212.0,241.0,263.0,716.0,23.0,36.0,35.0,53.0,124.0,51.0,248.0,276.0,316.0,840.0 45 | Gabon,10.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,1.0,0.0,1.0 46 | Georgia,6.0,8.0,8.0,17.0,33.0,7.0,0.0,0.0,0.0,0.0,13.0,8.0,8.0,17.0,33.0 47 | Germany,16.0,191.0,194.0,230.0,615.0,12.0,92.0,88.0,60.0,240.0,28.0,283.0,282.0,290.0,855.0 48 | United Team of Germany,3.0,28.0,54.0,36.0,118.0,3.0,8.0,6.0,5.0,19.0,6.0,36.0,60.0,41.0,137.0 49 | East Germany,5.0,153.0,129.0,127.0,409.0,6.0,39.0,36.0,35.0,110.0,11.0,192.0,165.0,162.0,519.0 50 | West Germany,5.0,56.0,67.0,81.0,204.0,6.0,11.0,15.0,13.0,39.0,11.0,67.0,82.0,94.0,243.0 51 | Ghana,14.0,0.0,1.0,3.0,4.0,2.0,0.0,0.0,0.0,0.0,16.0,0.0,1.0,3.0,4.0 52 | Great Britain,28.0,263.0,295.0,291.0,849.0,23.0,11.0,4.0,16.0,31.0,51.0,274.0,299.0,307.0,880.0 53 | Greece,28.0,33.0,43.0,40.0,116.0,19.0,0.0,0.0,0.0,0.0,47.0,33.0,43.0,40.0,116.0 54 | Grenada,9.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,9.0,1.0,1.0,0.0,2.0 55 | Guatemala,14.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,15.0,0.0,1.0,0.0,1.0 56 | Guyana,17.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,17.0,0.0,0.0,1.0,1.0 57 | Haiti,15.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,15.0,0.0,1.0,1.0,2.0 58 | Hong Kong,16.0,1.0,1.0,1.0,3.0,5.0,0.0,0.0,0.0,0.0,21.0,1.0,1.0,1.0,3.0 59 | Hungary,26.0,175.0,147.0,169.0,491.0,23.0,1.0,2.0,4.0,7.0,49.0,176.0,149.0,173.0,498.0 60 | Iceland,20.0,0.0,2.0,2.0,4.0,18.0,0.0,0.0,0.0,0.0,38.0,0.0,2.0,2.0,4.0 61 | India,24.0,9.0,7.0,12.0,28.0,10.0,0.0,0.0,0.0,0.0,34.0,9.0,7.0,12.0,28.0 62 | Indonesia,15.0,7.0,13.0,12.0,32.0,0.0,0.0,0.0,0.0,0.0,15.0,7.0,13.0,12.0,32.0 63 | Iran,16.0,19.0,22.0,28.0,69.0,11.0,0.0,0.0,0.0,0.0,27.0,19.0,22.0,28.0,69.0 64 | Iraq,14.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0,1.0,1.0 65 | Ireland,21.0,9.0,10.0,12.0,31.0,7.0,0.0,0.0,0.0,0.0,28.0,9.0,10.0,12.0,31.0 66 | Israel,16.0,1.0,1.0,7.0,9.0,7.0,0.0,0.0,0.0,0.0,23.0,1.0,1.0,7.0,9.0 67 | Italy,27.0,206.0,178.0,193.0,577.0,23.0,40.0,36.0,48.0,124.0,50.0,246.0,214.0,241.0,701.0 68 | Jamaica,17.0,22.0,35.0,20.0,77.0,8.0,0.0,0.0,0.0,0.0,25.0,22.0,35.0,20.0,77.0 69 | Japan,22.0,142.0,135.0,162.0,439.0,21.0,14.0,22.0,22.0,58.0,43.0,156.0,157.0,184.0,497.0 70 | Jordan,10.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,10.0,1.0,0.0,0.0,1.0 71 | Kazakhstan,6.0,15.0,20.0,27.0,62.0,7.0,1.0,3.0,4.0,8.0,13.0,16.0,23.0,31.0,70.0 72 | Kenya,14.0,31.0,38.0,33.0,102.0,4.0,0.0,0.0,0.0,0.0,18.0,31.0,38.0,33.0,102.0 73 | Kosovo,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,1.0 74 | North Korea,10.0,16.0,16.0,22.0,54.0,9.0,0.0,1.0,1.0,2.0,19.0,16.0,17.0,23.0,56.0 75 | South Korea,17.0,90.0,87.0,90.0,267.0,18.0,31.0,25.0,14.0,70.0,35.0,121.0,112.0,104.0,337.0 76 | Kuwait,12.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,2.0,2.0 77 | Kyrgyzstan,6.0,0.0,1.0,3.0,4.0,7.0,0.0,0.0,0.0,0.0,13.0,0.0,1.0,3.0,4.0 78 | Latvia,11.0,3.0,11.0,5.0,19.0,11.0,0.0,4.0,4.0,8.0,22.0,3.0,15.0,9.0,27.0 79 | Lebanon,17.0,0.0,2.0,2.0,4.0,17.0,0.0,0.0,0.0,0.0,34.0,0.0,2.0,2.0,4.0 80 | Liechtenstein,17.0,0.0,0.0,0.0,0.0,19.0,2.0,2.0,6.0,10.0,36.0,2.0,2.0,6.0,10.0 81 | Lithuania,9.0,6.0,7.0,12.0,25.0,9.0,0.0,0.0,0.0,0.0,18.0,6.0,7.0,12.0,25.0 82 | Luxembourg,23.0,1.0,1.0,0.0,2.0,9.0,0.0,2.0,0.0,2.0,32.0,1.0,3.0,0.0,4.0 83 | Macedonia,6.0,0.0,0.0,1.0,1.0,6.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,1.0,1.0 84 | Malaysia,13.0,0.0,7.0,4.0,11.0,1.0,0.0,0.0,0.0,0.0,14.0,0.0,7.0,4.0,11.0 85 | Mauritius,9.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,1.0,1.0 86 | Mexico,23.0,13.0,24.0,32.0,69.0,9.0,0.0,0.0,0.0,0.0,32.0,13.0,24.0,32.0,69.0 87 | Moldova,6.0,0.0,2.0,3.0,5.0,7.0,0.0,0.0,0.0,0.0,13.0,0.0,2.0,3.0,5.0 88 | Mongolia,13.0,2.0,10.0,14.0,26.0,14.0,0.0,0.0,0.0,0.0,27.0,2.0,10.0,14.0,26.0 89 | Montenegro,3.0,0.0,1.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0,6.0,0.0,1.0,0.0,1.0 90 | Morocco,14.0,6.0,5.0,12.0,23.0,7.0,0.0,0.0,0.0,0.0,21.0,6.0,5.0,12.0,23.0 91 | Mozambique,10.0,1.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,10.0,1.0,0.0,1.0,2.0 92 | Namibia,7.0,0.0,4.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,4.0,0.0,4.0 93 | Netherlands,26.0,85.0,92.0,108.0,285.0,21.0,45.0,44.0,41.0,130.0,47.0,130.0,136.0,149.0,415.0 94 | Netherlands Antilles,13.0,0.0,1.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,15.0,0.0,1.0,0.0,1.0 95 | New Zealand,23.0,46.0,27.0,44.0,117.0,16.0,0.0,1.0,2.0,3.0,39.0,46.0,28.0,46.0,120.0 96 | Niger,12.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0,1.0,1.0,2.0 97 | Nigeria,16.0,3.0,10.0,12.0,25.0,1.0,0.0,0.0,0.0,0.0,17.0,3.0,10.0,12.0,25.0 98 | Norway,25.0,56.0,49.0,47.0,152.0,23.0,132.0,125.0,111.0,368.0,48.0,188.0,174.0,158.0,520.0 99 | Pakistan,17.0,3.0,3.0,4.0,10.0,3.0,0.0,0.0,0.0,0.0,20.0,3.0,3.0,4.0,10.0 100 | Panama,17.0,1.0,0.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0,17.0,1.0,0.0,2.0,3.0 101 | Paraguay,12.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,13.0,0.0,1.0,0.0,1.0 102 | Peru,18.0,1.0,3.0,0.0,4.0,2.0,0.0,0.0,0.0,0.0,20.0,1.0,3.0,0.0,4.0 103 | Philippines,21.0,0.0,3.0,7.0,10.0,5.0,0.0,0.0,0.0,0.0,26.0,0.0,3.0,7.0,10.0 104 | Poland,21.0,68.0,83.0,133.0,284.0,23.0,7.0,7.0,8.0,22.0,44.0,75.0,90.0,141.0,306.0 105 | Portugal,24.0,4.0,8.0,12.0,24.0,8.0,0.0,0.0,0.0,0.0,32.0,4.0,8.0,12.0,24.0 106 | Puerto Rico,18.0,1.0,2.0,6.0,9.0,7.0,0.0,0.0,0.0,0.0,25.0,1.0,2.0,6.0,9.0 107 | Qatar,9.0,0.0,1.0,4.0,5.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,1.0,4.0,5.0 108 | Romania,21.0,89.0,95.0,122.0,306.0,21.0,0.0,0.0,1.0,1.0,42.0,89.0,95.0,123.0,307.0 109 | Russia,6.0,149.0,124.0,153.0,426.0,6.0,47.0,38.0,35.0,120.0,12.0,196.0,162.0,188.0,546.0 110 | Russian Empire,3.0,1.0,4.0,3.0,8.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0,4.0,3.0,8.0 111 | Soviet Union,9.0,395.0,319.0,296.0,1010.0,9.0,78.0,57.0,59.0,194.0,18.0,473.0,376.0,355.0,1204.0 112 | Saudi Arabia,11.0,0.0,1.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0,11.0,0.0,1.0,2.0,3.0 113 | Samoa,9.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,1.0,0.0,1.0 114 | Senegal,14.0,0.0,1.0,0.0,1.0,5.0,0.0,0.0,0.0,0.0,19.0,0.0,1.0,0.0,1.0 115 | Serbia,4.0,3.0,6.0,6.0,15.0,3.0,0.0,0.0,0.0,0.0,7.0,3.0,6.0,6.0,15.0 116 | Serbia and Montenegro,1.0,0.0,2.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,2.0,0.0,2.0 117 | Singapore,16.0,1.0,2.0,2.0,5.0,1.0,0.0,0.0,0.0,0.0,17.0,1.0,2.0,2.0,5.0 118 | Slovakia,6.0,9.0,12.0,7.0,28.0,7.0,3.0,4.0,1.0,8.0,13.0,12.0,16.0,8.0,36.0 119 | Slovenia,7.0,5.0,8.0,10.0,23.0,8.0,2.0,5.0,10.0,17.0,15.0,7.0,13.0,20.0,40.0 120 | South Africa,19.0,26.0,31.0,29.0,86.0,7.0,0.0,0.0,0.0,0.0,26.0,26.0,31.0,29.0,86.0 121 | Spain,23.0,45.0,64.0,41.0,150.0,20.0,1.0,0.0,3.0,4.0,43.0,46.0,64.0,44.0,154.0 122 | Sri Lanka,17.0,0.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,17.0,0.0,2.0,0.0,2.0 123 | Sudan,12.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0,1.0,0.0,1.0 124 | Suriname,12.0,1.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,12.0,1.0,0.0,1.0,2.0 125 | Sweden,27.0,145.0,170.0,179.0,494.0,23.0,57.0,46.0,55.0,158.0,50.0,202.0,216.0,234.0,652.0 126 | Switzerland,28.0,50.0,75.0,67.0,192.0,23.0,55.0,46.0,52.0,153.0,51.0,105.0,121.0,119.0,345.0 127 | Syria,13.0,1.0,1.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,13.0,1.0,1.0,1.0,3.0 128 | Chinese Taipei,14.0,5.0,7.0,12.0,24.0,12.0,0.0,0.0,0.0,0.0,26.0,5.0,7.0,12.0,24.0 129 | Tajikistan,6.0,1.0,1.0,2.0,4.0,4.0,0.0,0.0,0.0,0.0,10.0,1.0,1.0,2.0,4.0 130 | Tanzania,13.0,0.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,13.0,0.0,2.0,0.0,2.0 131 | Thailand,16.0,9.0,8.0,16.0,33.0,4.0,0.0,0.0,0.0,0.0,20.0,9.0,8.0,16.0,33.0 132 | Togo,10.0,0.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,1.0,1.0 133 | Tonga,9.0,0.0,1.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,11.0,0.0,1.0,0.0,1.0 134 | Trinidad and Tobago,17.0,2.0,6.0,11.0,19.0,3.0,0.0,0.0,0.0,0.0,20.0,2.0,6.0,11.0,19.0 135 | Tunisia,14.0,4.0,2.0,7.0,13.0,0.0,0.0,0.0,0.0,0.0,14.0,4.0,2.0,7.0,13.0 136 | Turkey,22.0,39.0,24.0,28.0,91.0,17.0,0.0,0.0,0.0,0.0,39.0,39.0,24.0,28.0,91.0 137 | Uganda,15.0,2.0,3.0,2.0,7.0,0.0,0.0,0.0,0.0,0.0,15.0,2.0,3.0,2.0,7.0 138 | Ukraine,6.0,35.0,30.0,56.0,121.0,7.0,3.0,1.0,4.0,8.0,13.0,38.0,31.0,60.0,129.0 139 | United Arab Emirates,9.0,1.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,9.0,1.0,0.0,1.0,2.0 140 | United States,27.0,1022.0,795.0,705.0,2522.0,23.0,105.0,110.0,90.0,305.0,50.0,1127.0,905.0,795.0,2827.0 141 | Uruguay,21.0,2.0,2.0,6.0,10.0,1.0,0.0,0.0,0.0,0.0,22.0,2.0,2.0,6.0,10.0 142 | Uzbekistan,6.0,8.0,6.0,17.0,31.0,7.0,1.0,0.0,0.0,1.0,13.0,9.0,6.0,17.0,32.0 143 | Venezuela,18.0,2.0,3.0,10.0,15.0,4.0,0.0,0.0,0.0,0.0,22.0,2.0,3.0,10.0,15.0 144 | Vietnam,15.0,1.0,3.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,15.0,1.0,3.0,0.0,4.0 145 | Virgin Islands,12.0,0.0,1.0,0.0,1.0,7.0,0.0,0.0,0.0,0.0,19.0,0.0,1.0,0.0,1.0 146 | Yugoslavia,18.0,28.0,31.0,31.0,90.0,16.0,0.0,3.0,1.0,4.0,34.0,28.0,34.0,32.0,94.0 147 | Zambia,13.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,13.0,0.0,1.0,1.0,2.0 148 | Zimbabwe,13.0,3.0,4.0,1.0,8.0,1.0,0.0,0.0,0.0,0.0,14.0,3.0,4.0,1.0,8.0 149 | Unified Team,1.0,45.0,38.0,29.0,112.0,1.0,9.0,6.0,8.0,23.0,2.0,54.0,44.0,37.0,135.0 150 | Independent Olympic Athletes,3.0,1.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0,0.0,1.0,2.0 151 | Independent Olympic Participants,1.0,0.0,1.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,2.0,3.0 152 | Olympic Athletes from Russia,0.0,0.0,0.0,0.0,0.0,1.0,2.0,6.0,9.0,17.0,1.0,2.0,6.0,9.0,17.0 153 | Mixed team,3.0,8.0,5.0,4.0,17.0,0.0,0.0,0.0,0.0,0.0,3.0,8.0,5.0,4.0,17.0 154 | -------------------------------------------------------------------------------- /Ass1_ChoroplethMap/README.md: -------------------------------------------------------------------------------- 1 | # Choropleth Map of Olympics Medals 2 | 3 | In this activity, we will use the [Plotly](https://plot.ly/) library to create an 4 | interactive choropleth map of Olympics medals. We will then make use of the 5 | [Dash](https://plot.ly/products/dash/) library to create a very simple web app to 6 | display the map. 7 | 8 | ## Getting Started 9 | 10 | Install the required packages using the following command 11 | 12 | ``` 13 | pip install -r requirements.txt 14 | ``` 15 | 16 | ## Creating an interactive choropleth map 17 | 18 | Here we use the Olympics dataset that has already been tidied up. 19 | We generally pass in two arguments into the plotly plot function 20 | 21 | * `data` is used to pass in the data to be plotted 22 | * `layout` is used to customise the layout such as setting the title and font 23 | 24 | ```python 25 | import pandas as pd 26 | import plotly.offline as py 27 | 28 | df = pd.read_csv("Olympics_dataset.csv", thousands=",") 29 | 30 | data = [dict( 31 | type="choropleth", 32 | locations=df["Country"], 33 | locationmode="country names", 34 | z=df["Total_t"], 35 | colorbar=dict(title="Number of medals") 36 | )] 37 | 38 | layout = dict( 39 | title="All-time Olympic Games medal" 40 | ) 41 | 42 | fig = dict(data=data, layout=layout) 43 | py.plot(fig, filename='cloropleth_map.html') 44 | ``` 45 | 46 | ### Customising the choropleth map 47 | 48 | We can change the colour scale by passing in a list of colour scales 49 | 50 | ```python 51 | data = [dict( 52 | type="choropleth", 53 | locations=df["Country"], 54 | locationmode="country names", 55 | z=df["Total_t"], 56 | colorscale=[[0, "rgb(5, 10, 172)"], [0.35, "rgb(40, 60, 190)"], [0.5, "rgb(70, 100, 245)"], 57 | [0.6, "rgb(90, 120, 245)"], [0.7, "rgb(106, 137, 247)"], [1, "rgb(220, 220, 220)"]], 58 | autocolorscale=False, 59 | reversescale=True, 60 | )] 61 | ``` 62 | 63 | You can look for more customisation on the [reference page](https://plot.ly/python/reference/#choropleth) for choropleth maps 64 | 65 | ## Creating a simple web app 66 | 67 | Using the `data` and `layout` variables from above, we can create a very basic web app with a few lines of code. 68 | Create a file named `app.py` with the following code 69 | 70 | ```python 71 | import dash 72 | import dash_core_components as dcc 73 | import dash_html_components as html 74 | 75 | app = dash.Dash(__name__) 76 | app.layout = html.Div([ 77 | dcc.Graph( 78 | id="medals_graph", 79 | figure={ 80 | "data": data, 81 | "layout": layout 82 | } 83 | ), 84 | ]) 85 | 86 | if __name__ == '__main__': 87 | app.run_server(debug=True) 88 | ``` 89 | 90 | Run it with `python app.py` and visit [http:127.0.0.1:8050/](http:127.0.0.1:8050/) in your web browser. 91 | You should be able to see your app 92 | 93 | ### Adding a basic callback function 94 | 95 | Let's add some radio buttons to show the medals in different Olympics games using `RadioItems`. 96 | Create a new file or replace `app.py` as follow 97 | 98 | ```python 99 | import pandas as pd 100 | import dash 101 | import dash_core_components as dcc 102 | import dash_html_components as html 103 | 104 | df = pd.read_csv("Olympics_dataset.csv", skipinitialspace=True, thousands=",") 105 | 106 | app = dash.Dash(__name__) 107 | app.layout = html.Div([ 108 | dcc.Graph(id="medals_graph"), 109 | 110 | html.Div([ 111 | html.H4("Games"), 112 | 113 | dcc.RadioItems( 114 | id="game_type", 115 | options=[ 116 | {"label": "Combined Total", "value": "combined"}, 117 | {"label": "Summer Games", "value": "summer"}, 118 | {"label": "Winter Games", "value": "winter"}], 119 | value="combined" 120 | )] 121 | ) 122 | ]) 123 | 124 | if __name__ == '__main__': 125 | app.run_server(debug=True) 126 | ``` 127 | 128 | And we need to create a callback function to handle the input from the radio buttons. 129 | We set the column name based on the radio button input to extract the relevant data from the dataframe. 130 | 131 | ```python 132 | @app.callback( 133 | dash.dependencies.Output("medals_graph", "figure"), 134 | [dash.dependencies.Input("game_type", "value")]) 135 | def update_figure(game_type): 136 | if game_type == "summer": 137 | column_name = "Total_s" 138 | elif game_type == "winter": 139 | column_name = "Total_w" 140 | else: 141 | column_name = "Total_t" 142 | 143 | data = [dict( 144 | type="choropleth", 145 | locations=df["Country"], 146 | locationmode="country names", 147 | z=df[column_name], 148 | colorbar=dict(title="Number of medals") 149 | )] 150 | 151 | layout = dict( 152 | title="All-time Olympic Games medal" 153 | ) 154 | 155 | return {"data": data, "layout": layout} 156 | ``` 157 | 158 | ### Challenge 159 | 160 | Add another set of radio buttons of medal types to choose from. 161 | [Here](https://comp9321-ass1-extra.herokuapp.com/) is an example of the final web app. 162 | 163 | ## References 164 | 165 | * https://plot.ly/python/choropleth-maps/ 166 | * https://dash.plot.ly/getting-started-part-2 -------------------------------------------------------------------------------- /Ass1_ChoroplethMap/app.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import dash 3 | import dash_core_components as dcc 4 | import dash_html_components as html 5 | import pandas as pd 6 | 7 | df = pd.read_csv("Olympics_dataset.csv", skipinitialspace=True, thousands=",") 8 | 9 | app = dash.Dash(__name__) 10 | server = app.server 11 | 12 | app.layout = html.Div([ 13 | dcc.Graph(id="medals_graph"), 14 | 15 | html.Div([ 16 | html.H4("Games"), 17 | 18 | dcc.RadioItems( 19 | id="count_type", 20 | options=[ 21 | {"label": "Combined Total", "value": "combined"}, 22 | {"label": "Summer Games", "value": "summer"}, 23 | {"label": "Winter Games", "value": "winter"}], 24 | value="combined" 25 | )], 26 | 27 | style={'width': '48%', 'display': 'inline-block'} 28 | ), 29 | 30 | html.Div([ 31 | html.H4("Medals"), 32 | 33 | dcc.RadioItems( 34 | id="medal_type", 35 | options=[ 36 | {"label": "All medals", "value": "all"}, 37 | {"label": "Gold", "value": "gold"}, 38 | {"label": "Silver", "value": "silver"}, 39 | {"label": "Bronze", "value": "bronze"} 40 | ], 41 | value="all" 42 | )], 43 | 44 | style={'width': '48%', 'float': 'right', 'display': 'inline-block'} 45 | ) 46 | ]) 47 | 48 | 49 | @app.callback( 50 | dash.dependencies.Output("medals_graph", "figure"), 51 | [dash.dependencies.Input("count_type", "value"), 52 | dash.dependencies.Input("medal_type", "value")]) 53 | def update_figure(count_type, medal_type): 54 | if medal_type == "all": 55 | column_name = "Total_" 56 | elif medal_type == "gold": 57 | column_name = "Gold_" 58 | elif medal_type == "silver": 59 | column_name = "Silver_" 60 | else: 61 | column_name = "Bronze_" 62 | 63 | if count_type == "summer": 64 | column_name += "s" 65 | elif count_type == "winter": 66 | column_name += "w" 67 | else: 68 | column_name += "t" 69 | 70 | data = [dict( 71 | type="choropleth", 72 | locations=df["Country"], 73 | locationmode="country names", 74 | z=df[column_name], 75 | colorscale=[[0, "rgb(5, 10, 172)"], [0.35, "rgb(40, 60, 190)"], [0.5, "rgb(70, 100, 245)"], 76 | [0.6, "rgb(90, 120, 245)"], [0.7, "rgb(106, 137, 247)"], [1, "rgb(220, 220, 220)"]], 77 | autocolorscale=False, 78 | reversescale=True, 79 | marker=dict( 80 | line=dict( 81 | color="rgb(180,180,180)", 82 | width=0.5 83 | )), 84 | colorbar=dict( 85 | autotick=False, 86 | title="Number of medals"), 87 | )] 88 | 89 | layout = dict( 90 | title="All-time Olympic Games medal", 91 | geo=dict( 92 | showframe=False, 93 | showcoastlines=False, 94 | projection=dict( 95 | type="Mercator" 96 | ) 97 | ) 98 | ) 99 | 100 | return {"data": data, "layout": layout} 101 | 102 | 103 | if __name__ == "__main__": 104 | app.run_server(debug=True) 105 | -------------------------------------------------------------------------------- /Ass1_ChoroplethMap/requirements.txt: -------------------------------------------------------------------------------- 1 | dash 2 | dash-renderer 3 | dash-core-components 4 | dash-html-components 5 | plotly 6 | pandas -------------------------------------------------------------------------------- /Jenkinsfile: -------------------------------------------------------------------------------- 1 | pipeline { 2 | agent any 3 | triggers { 4 | pollSCM('') //Empty quotes tells it to build on a push 5 | } 6 | stages { 7 | stage('Build') { 8 | steps { 9 | echo 'Building..' 10 | } 11 | } 12 | stage('Test') { 13 | steps { 14 | echo 'Testing..' 15 | } 16 | } 17 | stage('Deploy') { 18 | steps { 19 | echo 'Deploying....' 20 | } 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Code Repository for COMP9321, Data-Services-Engineering UNSW 2 | -------------------------------------------------------------------------------- /Week10_Regression_and_Clustering/activity_1.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn import linear_model 3 | from sklearn.metrics import mean_squared_error 4 | from sklearn.utils import shuffle 5 | 6 | 7 | def load_diet(diet_path, split_percentage): 8 | df = pd.read_csv(diet_path, index_col=0) 9 | 10 | df = shuffle(df) 11 | diet_x = df.drop('weight6weeks', axis=1).values 12 | diet_y = df['weight6weeks'].values 13 | 14 | # Split the dataset in train and test data 15 | # A random permutation, to split the data randomly 16 | 17 | split_point = int(len(diet_x) * split_percentage) 18 | diet_X_train = diet_x[:split_point] 19 | diet_y_train = diet_y[:split_point] 20 | diet_X_test = diet_x[split_point:] 21 | diet_y_test = diet_y[split_point:] 22 | 23 | return diet_X_train, diet_y_train, diet_X_test, diet_y_test 24 | 25 | 26 | if __name__ == "__main__": 27 | diet_X_train, diet_y_train, diet_X_test, diet_y_test = load_diet("diet.csv", split_percentage=0.7) 28 | model = linear_model.LinearRegression() 29 | # model = linear_model.BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True, 30 | # fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300, 31 | # normalize=False, tol=0.001, verbose=False) 32 | model.fit(diet_X_train, diet_y_train) 33 | 34 | y_pred = model.predict(diet_X_test) 35 | 36 | for i in range(len(diet_y_test)): 37 | print("Expected:", diet_y_test[i], "Predicted:", y_pred[i]) 38 | 39 | # The mean squared error 40 | print("Mean squared error: %.2f" 41 | % mean_squared_error(diet_y_test, y_pred)) 42 | -------------------------------------------------------------------------------- /Week10_Regression_and_Clustering/activity_2.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pandas as pd 3 | from sklearn.cluster import KMeans 4 | from sklearn.utils import shuffle 5 | 6 | 7 | def load_iris(iris_path): 8 | df = pd.read_csv(iris_path) 9 | 10 | df = shuffle(df) 11 | df_without_label = df.drop('species', axis=1) 12 | return df, df_without_label 13 | 14 | 15 | if __name__ == '__main__': 16 | csv_file = 'iris.csv' 17 | 18 | # Split the data into test and train parts 19 | df, df_without_label = load_iris(csv_file) 20 | # Fit a k-means estimator 21 | estimator = KMeans(n_clusters=3) 22 | estimator.fit(df_without_label) 23 | # Clusters are given in the labels_ attribute 24 | labels = estimator.labels_ 25 | df['cluster'] = pd.Series(labels, index=df.index) 26 | 27 | print(labels) 28 | # divide the dataset into three dataframes based on the species 29 | cluster_0_df = df.query('cluster == 0') 30 | cluster_1_df = df.query('cluster == 1') 31 | cluster_2_df = df.query('cluster == 2') 32 | 33 | fig, axes = plt.subplots(nrows=1, ncols=1) 34 | fig.set_size_inches(18.5, 10.5) 35 | fig.tight_layout() 36 | 37 | ax = cluster_0_df.plot.scatter(x='petal_length', y='petal_width', label='Cluster-0', color='blue', ax=axes) 38 | ax = cluster_1_df.plot.scatter(x='petal_length', y='petal_width', label='Cluster-1', color='red', ax=ax) 39 | ax = cluster_2_df.plot.scatter(x='petal_length', y='petal_width', label='Cluster-2', color='green', ax=ax) 40 | 41 | for i, label in enumerate(df['species']): 42 | 43 | label = label[0:4] 44 | ax.annotate(label, (list(df['petal_length'])[i], list(df['petal_width'])[i]), color='gray', fontsize=9, 45 | horizontalalignment='left', 46 | verticalalignment='bottom') 47 | 48 | plt.show() 49 | -------------------------------------------------------------------------------- /Week10_Regression_and_Clustering/activity_3.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pandas as pd 3 | from sklearn.cluster import KMeans, SpectralClustering, AgglomerativeClustering 4 | from sklearn.utils import shuffle 5 | 6 | 7 | def load_dataset(dataset_path): 8 | df = pd.read_csv(dataset_path) 9 | 10 | df = shuffle(df) 11 | df_without_label = df.drop('Diet', axis=1) 12 | return df, df_without_label 13 | 14 | 15 | if __name__ == '__main__': 16 | csv_file = 'diet.csv' 17 | 18 | # Split the data into test and train parts 19 | df, df_without_label = load_dataset(csv_file) 20 | # Fit a k-means estimator 21 | estimator = AgglomerativeClustering(n_clusters=3) 22 | estimator.fit(df_without_label) 23 | # Clusters are given in the labels_ attribute 24 | labels = estimator.labels_ 25 | df['cluster'] = pd.Series(labels, index=df.index) 26 | 27 | print(labels) 28 | # divide the dataset into three dataframes based on the species 29 | cluster_0_df = df.query('cluster == 0') 30 | cluster_1_df = df.query('cluster == 1') 31 | cluster_2_df = df.query('cluster == 2') 32 | 33 | fig, axes = plt.subplots(nrows=1, ncols=1) 34 | fig.set_size_inches(18.5, 10.5) 35 | fig.tight_layout() 36 | 37 | ax = cluster_0_df.plot.scatter(x='pre.weight', y='weight6weeks', label='Cluster-0', color='blue', ax=axes) 38 | ax = cluster_1_df.plot.scatter(x='pre.weight', y='weight6weeks', label='Cluster-1', color='red', ax=ax) 39 | ax = cluster_2_df.plot.scatter(x='pre.weight', y='weight6weeks', label='Cluster-2', color='green', ax=ax) 40 | 41 | for i, label in enumerate(df['Diet']): 42 | 43 | label = "Diet_" + str(label) 44 | ax.annotate(label, (list(df['pre.weight'])[i], list(df['weight6weeks'])[i]), color='gray', fontsize=9, 45 | horizontalalignment='left', 46 | verticalalignment='bottom') 47 | 48 | plt.show() 49 | -------------------------------------------------------------------------------- /Week10_Regression_and_Clustering/diet.csv: -------------------------------------------------------------------------------- 1 | Person,gender,Age,Height,pre.weight,Diet,weight6weeks 2 | 25,0,41,171,60,2,60 3 | 26,0,32,174,103,2,103 4 | 1,0,22,159,58,1,54.2 5 | 2,0,46,192,60,1,54 6 | 3,0,55,170,64,1,63.3 7 | 4,0,33,171,64,1,61.1 8 | 5,0,50,170,65,1,62.2 9 | 6,0,50,201,66,1,64 10 | 7,0,37,174,67,1,65 11 | 8,0,28,176,69,1,60.5 12 | 9,0,28,165,70,1,68.1 13 | 10,0,45,165,70,1,66.9 14 | 11,0,60,173,72,1,70.5 15 | 12,0,48,156,72,1,69 16 | 13,0,41,163,72,1,68.4 17 | 14,0,37,167,82,1,81.1 18 | 27,0,44,174,58,2,60.1 19 | 28,0,37,172,58,2,56 20 | 29,0,41,165,59,2,57.3 21 | 30,0,43,171,61,2,56.7 22 | 31,0,20,169,62,2,55 23 | 32,0,51,174,63,2,62.4 24 | 33,0,31,163,63,2,60.3 25 | 34,0,54,173,63,2,59.4 26 | 35,0,50,166,65,2,62 27 | 36,0,48,163,66,2,64 28 | 37,0,16,165,68,2,63.8 29 | 38,0,37,167,68,2,63.3 30 | 39,0,30,161,76,2,72.7 31 | 40,0,29,169,77,2,77.5 32 | 52,0,51,165,60,3,53 33 | 53,0,35,169,62,3,56.4 34 | 54,0,21,159,64,3,60.6 35 | 55,0,22,169,65,3,58.2 36 | 56,0,36,160,66,3,58.2 37 | 57,0,20,169,67,3,61.6 38 | 58,0,35,163,67,3,60.2 39 | 59,0,45,155,69,3,61.8 40 | 60,0,58,141,70,3,63 41 | 61,0,37,170,70,3,62.7 42 | 62,0,31,170,72,3,71.1 43 | 63,0,35,171,72,3,64.4 44 | 64,0,56,171,73,3,68.9 45 | 65,0,48,153,75,3,68.7 46 | 66,0,41,157,76,3,71 47 | 15,1,39,168,71,1,71.6 48 | 16,1,31,158,72,1,70.9 49 | 17,1,40,173,74,1,69.5 50 | 18,1,50,160,78,1,73.9 51 | 19,1,43,162,80,1,71 52 | 20,1,25,165,80,1,77.6 53 | 21,1,52,177,83,1,79.1 54 | 22,1,42,166,85,1,81.5 55 | 23,1,39,166,87,1,81.9 56 | 24,1,40,190,88,1,84.5 57 | 41,1,51,191,71,2,66.8 58 | 42,1,38,199,75,2,72.6 59 | 43,1,54,196,75,2,69.2 60 | 44,1,33,190,76,2,72.5 61 | 45,1,45,160,78,2,72.7 62 | 46,1,37,194,78,2,76.3 63 | 47,1,44,163,79,2,73.6 64 | 48,1,40,171,79,2,72.9 65 | 49,1,37,198,79,2,71.1 66 | 50,1,39,180,80,2,81.4 67 | 51,1,31,182,80,2,75.7 68 | 67,1,36,155,71,3,68.5 69 | 68,1,47,179,73,3,72.1 70 | 69,1,29,166,76,3,72.5 71 | 70,1,37,173,78,3,77.5 72 | 71,1,31,177,78,3,75.2 73 | 72,1,26,179,78,3,69.4 74 | 73,1,40,179,79,3,74.5 75 | 74,1,35,183,83,3,80.2 76 | 75,1,49,177,84,3,79.9 77 | 76,1,28,164,85,3,79.7 78 | 77,1,40,167,87,3,77.8 79 | 78,1,51,175,88,3,81.9 80 | -------------------------------------------------------------------------------- /Week10_Regression_and_Clustering/iris.csv: -------------------------------------------------------------------------------- 1 | sepal_length,sepal_width,petal_length,petal_width,species 2 | 5.1,3.5,1.4,0.2,setosa 3 | 4.9,3.0,1.4,0.2,setosa 4 | 4.7,3.2,1.3,0.2,setosa 5 | 4.6,3.1,1.5,0.2,setosa 6 | 5.0,3.6,1.4,0.2,setosa 7 | 5.4,3.9,1.7,0.4,setosa 8 | 4.6,3.4,1.4,0.3,setosa 9 | 5.0,3.4,1.5,0.2,setosa 10 | 4.4,2.9,1.4,0.2,setosa 11 | 4.9,3.1,1.5,0.1,setosa 12 | 5.4,3.7,1.5,0.2,setosa 13 | 4.8,3.4,1.6,0.2,setosa 14 | 4.8,3.0,1.4,0.1,setosa 15 | 4.3,3.0,1.1,0.1,setosa 16 | 5.8,4.0,1.2,0.2,setosa 17 | 5.7,4.4,1.5,0.4,setosa 18 | 5.4,3.9,1.3,0.4,setosa 19 | 5.1,3.5,1.4,0.3,setosa 20 | 5.7,3.8,1.7,0.3,setosa 21 | 5.1,3.8,1.5,0.3,setosa 22 | 5.4,3.4,1.7,0.2,setosa 23 | 5.1,3.7,1.5,0.4,setosa 24 | 4.6,3.6,1.0,0.2,setosa 25 | 5.1,3.3,1.7,0.5,setosa 26 | 4.8,3.4,1.9,0.2,setosa 27 | 5.0,3.0,1.6,0.2,setosa 28 | 5.0,3.4,1.6,0.4,setosa 29 | 5.2,3.5,1.5,0.2,setosa 30 | 5.2,3.4,1.4,0.2,setosa 31 | 4.7,3.2,1.6,0.2,setosa 32 | 4.8,3.1,1.6,0.2,setosa 33 | 5.4,3.4,1.5,0.4,setosa 34 | 5.2,4.1,1.5,0.1,setosa 35 | 5.5,4.2,1.4,0.2,setosa 36 | 4.9,3.1,1.5,0.1,setosa 37 | 5.0,3.2,1.2,0.2,setosa 38 | 5.5,3.5,1.3,0.2,setosa 39 | 4.9,3.1,1.5,0.1,setosa 40 | 4.4,3.0,1.3,0.2,setosa 41 | 5.1,3.4,1.5,0.2,setosa 42 | 5.0,3.5,1.3,0.3,setosa 43 | 4.5,2.3,1.3,0.3,setosa 44 | 4.4,3.2,1.3,0.2,setosa 45 | 5.0,3.5,1.6,0.6,setosa 46 | 5.1,3.8,1.9,0.4,setosa 47 | 4.8,3.0,1.4,0.3,setosa 48 | 5.1,3.8,1.6,0.2,setosa 49 | 4.6,3.2,1.4,0.2,setosa 50 | 5.3,3.7,1.5,0.2,setosa 51 | 5.0,3.3,1.4,0.2,setosa 52 | 7.0,3.2,4.7,1.4,versicolor 53 | 6.4,3.2,4.5,1.5,versicolor 54 | 6.9,3.1,4.9,1.5,versicolor 55 | 5.5,2.3,4.0,1.3,versicolor 56 | 6.5,2.8,4.6,1.5,versicolor 57 | 5.7,2.8,4.5,1.3,versicolor 58 | 6.3,3.3,4.7,1.6,versicolor 59 | 4.9,2.4,3.3,1.0,versicolor 60 | 6.6,2.9,4.6,1.3,versicolor 61 | 5.2,2.7,3.9,1.4,versicolor 62 | 5.0,2.0,3.5,1.0,versicolor 63 | 5.9,3.0,4.2,1.5,versicolor 64 | 6.0,2.2,4.0,1.0,versicolor 65 | 6.1,2.9,4.7,1.4,versicolor 66 | 5.6,2.9,3.6,1.3,versicolor 67 | 6.7,3.1,4.4,1.4,versicolor 68 | 5.6,3.0,4.5,1.5,versicolor 69 | 5.8,2.7,4.1,1.0,versicolor 70 | 6.2,2.2,4.5,1.5,versicolor 71 | 5.6,2.5,3.9,1.1,versicolor 72 | 5.9,3.2,4.8,1.8,versicolor 73 | 6.1,2.8,4.0,1.3,versicolor 74 | 6.3,2.5,4.9,1.5,versicolor 75 | 6.1,2.8,4.7,1.2,versicolor 76 | 6.4,2.9,4.3,1.3,versicolor 77 | 6.6,3.0,4.4,1.4,versicolor 78 | 6.8,2.8,4.8,1.4,versicolor 79 | 6.7,3.0,5.0,1.7,versicolor 80 | 6.0,2.9,4.5,1.5,versicolor 81 | 5.7,2.6,3.5,1.0,versicolor 82 | 5.5,2.4,3.8,1.1,versicolor 83 | 5.5,2.4,3.7,1.0,versicolor 84 | 5.8,2.7,3.9,1.2,versicolor 85 | 6.0,2.7,5.1,1.6,versicolor 86 | 5.4,3.0,4.5,1.5,versicolor 87 | 6.0,3.4,4.5,1.6,versicolor 88 | 6.7,3.1,4.7,1.5,versicolor 89 | 6.3,2.3,4.4,1.3,versicolor 90 | 5.6,3.0,4.1,1.3,versicolor 91 | 5.5,2.5,4.0,1.3,versicolor 92 | 5.5,2.6,4.4,1.2,versicolor 93 | 6.1,3.0,4.6,1.4,versicolor 94 | 5.8,2.6,4.0,1.2,versicolor 95 | 5.0,2.3,3.3,1.0,versicolor 96 | 5.6,2.7,4.2,1.3,versicolor 97 | 5.7,3.0,4.2,1.2,versicolor 98 | 5.7,2.9,4.2,1.3,versicolor 99 | 6.2,2.9,4.3,1.3,versicolor 100 | 5.1,2.5,3.0,1.1,versicolor 101 | 5.7,2.8,4.1,1.3,versicolor 102 | 6.3,3.3,6.0,2.5,virginica 103 | 5.8,2.7,5.1,1.9,virginica 104 | 7.1,3.0,5.9,2.1,virginica 105 | 6.3,2.9,5.6,1.8,virginica 106 | 6.5,3.0,5.8,2.2,virginica 107 | 7.6,3.0,6.6,2.1,virginica 108 | 4.9,2.5,4.5,1.7,virginica 109 | 7.3,2.9,6.3,1.8,virginica 110 | 6.7,2.5,5.8,1.8,virginica 111 | 7.2,3.6,6.1,2.5,virginica 112 | 6.5,3.2,5.1,2.0,virginica 113 | 6.4,2.7,5.3,1.9,virginica 114 | 6.8,3.0,5.5,2.1,virginica 115 | 5.7,2.5,5.0,2.0,virginica 116 | 5.8,2.8,5.1,2.4,virginica 117 | 6.4,3.2,5.3,2.3,virginica 118 | 6.5,3.0,5.5,1.8,virginica 119 | 7.7,3.8,6.7,2.2,virginica 120 | 7.7,2.6,6.9,2.3,virginica 121 | 6.0,2.2,5.0,1.5,virginica 122 | 6.9,3.2,5.7,2.3,virginica 123 | 5.6,2.8,4.9,2.0,virginica 124 | 7.7,2.8,6.7,2.0,virginica 125 | 6.3,2.7,4.9,1.8,virginica 126 | 6.7,3.3,5.7,2.1,virginica 127 | 7.2,3.2,6.0,1.8,virginica 128 | 6.2,2.8,4.8,1.8,virginica 129 | 6.1,3.0,4.9,1.8,virginica 130 | 6.4,2.8,5.6,2.1,virginica 131 | 7.2,3.0,5.8,1.6,virginica 132 | 7.4,2.8,6.1,1.9,virginica 133 | 7.9,3.8,6.4,2.0,virginica 134 | 6.4,2.8,5.6,2.2,virginica 135 | 6.3,2.8,5.1,1.5,virginica 136 | 6.1,2.6,5.6,1.4,virginica 137 | 7.7,3.0,6.1,2.3,virginica 138 | 6.3,3.4,5.6,2.4,virginica 139 | 6.4,3.1,5.5,1.8,virginica 140 | 6.0,3.0,4.8,1.8,virginica 141 | 6.9,3.1,5.4,2.1,virginica 142 | 6.7,3.1,5.6,2.4,virginica 143 | 6.9,3.1,5.1,2.3,virginica 144 | 5.8,2.7,5.1,1.9,virginica 145 | 6.8,3.2,5.9,2.3,virginica 146 | 6.7,3.3,5.7,2.5,virginica 147 | 6.7,3.0,5.2,2.3,virginica 148 | 6.3,2.5,5.0,1.9,virginica 149 | 6.5,3.0,5.2,2.0,virginica 150 | 6.2,3.4,5.4,2.3,virginica 151 | 5.9,3.0,5.1,1.8,virginica -------------------------------------------------------------------------------- /Week11_Preprocessing/activity_1.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import pandas as pd 3 | from sklearn.metrics import precision_score, accuracy_score, recall_score 4 | from sklearn.preprocessing import OrdinalEncoder 5 | from sklearn.tree import DecisionTreeClassifier 6 | 7 | 8 | def load_exposure(exposure_path, split_percentage): 9 | df = pd.read_csv(exposure_path, delimiter=";", encoding="ISO-8859-1").head(2000) 10 | 11 | df = df[[ 12 | 'GHRP', 13 | 'Aid dependence', 14 | 'Remittances', 15 | 'food import dependence ', 16 | 'primary commodity export dependence', 17 | 'tourism as percentage of GDP', 18 | 'tourism dependence', 19 | 'Foreign currency reserves', 20 | 'Foreign direct investment, net inflows percent of GDP', 21 | 'Foreign direct investment', 22 | 'Covid_19_Economic_exposure_index', 23 | 'Income classification according to WB' 24 | ]] 25 | columns = ["Remittances", "Aid dependence", "Foreign direct investment", 'Foreign currency reserves', 26 | 'Foreign direct investment, net inflows percent of GDP', 'tourism dependence', 27 | 'tourism as percentage of GDP', 'food import dependence ', 28 | 'primary commodity export dependence', 29 | 'Covid_19_Economic_exposure_index', ] 30 | 31 | df = df[df['Income classification according to WB'].notna()] 32 | 33 | for column in columns: 34 | df[column] = df[column].apply(lambda a: numpy.nan if a == "x" else float(str(a).replace(",", "."))) 35 | 36 | # Ordinal Encoders 37 | encGhrp = OrdinalEncoder() 38 | df['GHRP'] = df['GHRP'].fillna("Unknown") 39 | df['GHRP'] = encGhrp.fit_transform(df[['GHRP']]) 40 | df = df.fillna(0) 41 | 42 | exposure_x = df.drop('Income classification according to WB', axis=1).values 43 | exposure_y = df['Income classification according to WB'].values 44 | 45 | # Split exposure data in train and test data 46 | split_point = int(len(exposure_x) * split_percentage) 47 | exposure_X_train = exposure_x[:split_point] 48 | exposure_y_train = exposure_y[:split_point] 49 | exposure_X_test = exposure_x[split_point:] 50 | exposure_y_test = exposure_y[split_point:] 51 | 52 | return exposure_X_train, exposure_y_train, exposure_X_test, exposure_y_test 53 | 54 | 55 | if __name__ == '__main__': 56 | csv_file = 'exposure.csv' 57 | 58 | # Split the data into test and train parts 59 | exposure_X_train, exposure_y_train, exposure_X_test, exposure_y_test = load_exposure(csv_file, split_percentage=0.7) 60 | 61 | # train a classifier 62 | dt = DecisionTreeClassifier() 63 | dt.fit(exposure_X_train, exposure_y_train) 64 | 65 | # predict the test set 66 | predictions = dt.predict(exposure_X_test) 67 | 68 | print("precision:\t", precision_score(exposure_y_test, predictions, average=None)) 69 | print("recall:\t\t", recall_score(exposure_y_test, predictions, average=None)) 70 | print("accuracy:\t", accuracy_score(exposure_y_test, predictions)) 71 | -------------------------------------------------------------------------------- /Week11_Preprocessing/activity_2.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import pandas as pd 3 | from sklearn.impute import SimpleImputer 4 | from sklearn.metrics import precision_score, accuracy_score, recall_score 5 | from sklearn.preprocessing import OrdinalEncoder 6 | from sklearn.tree import DecisionTreeClassifier 7 | 8 | 9 | def load_exposure(exposure_path, split_percentage, strategy): 10 | df = pd.read_csv(exposure_path, delimiter=";", encoding="ISO-8859-1").head(2000) 11 | 12 | df = df[[ 13 | 'GHRP', 14 | 'Aid dependence', 15 | 'Remittances', 16 | 'food import dependence ', 17 | 'primary commodity export dependence', 18 | 'tourism as percentage of GDP', 19 | 'tourism dependence', 20 | 'Foreign currency reserves', 21 | 'Foreign direct investment, net inflows percent of GDP', 22 | 'Foreign direct investment', 23 | 'Covid_19_Economic_exposure_index', 24 | 'Income classification according to WB' 25 | ]] 26 | columns = ["Remittances", "Aid dependence", "Foreign direct investment", 'Foreign currency reserves', 27 | 'Foreign direct investment, net inflows percent of GDP', 'tourism dependence', 28 | 'tourism as percentage of GDP', 'food import dependence ', 29 | 'primary commodity export dependence', 30 | 'Covid_19_Economic_exposure_index', ] 31 | 32 | df = df[df['Income classification according to WB'].notna()] 33 | 34 | for column in columns: 35 | df[column] = df[column].apply(lambda a: numpy.nan if a == "x" else float(str(a).replace(",", "."))) 36 | 37 | # Ordinal Encoders 38 | encGhrp = OrdinalEncoder() 39 | df['GHRP'] = df['GHRP'].fillna("Unknown") 40 | df['GHRP'] = encGhrp.fit_transform(df[['GHRP']]) 41 | 42 | for column in columns: 43 | imputer = SimpleImputer(missing_values=numpy.nan, strategy=strategy) 44 | df[column] = imputer.fit_transform(df[[column]]) 45 | 46 | exposure_x = df.drop('Income classification according to WB', axis=1).values 47 | exposure_y = df['Income classification according to WB'].values 48 | 49 | # Split exposure data in train and test data 50 | split_point = int(len(exposure_x) * split_percentage) 51 | exposure_X_train = exposure_x[:split_point] 52 | exposure_y_train = exposure_y[:split_point] 53 | exposure_X_test = exposure_x[split_point:] 54 | exposure_y_test = exposure_y[split_point:] 55 | 56 | return exposure_X_train, exposure_y_train, exposure_X_test, exposure_y_test 57 | 58 | 59 | if __name__ == '__main__': 60 | csv_file = 'exposure.csv' 61 | 62 | # Split the data into test and train parts 63 | for strategy in ["mean", "median", "most_frequent", "constant"]: 64 | exposure_X_train, exposure_y_train, exposure_X_test, exposure_y_test \ 65 | = load_exposure(csv_file, split_percentage=0.7, strategy=strategy) 66 | 67 | # train a classifier 68 | dt = DecisionTreeClassifier() 69 | dt.fit(exposure_X_train, exposure_y_train) 70 | 71 | # predict the test set 72 | predictions = dt.predict(exposure_X_test) 73 | 74 | print("*************************** "+strategy+" ***********************************") 75 | print("precision:\t", precision_score(exposure_y_test, predictions, average=None)) 76 | print("recall:\t\t", recall_score(exposure_y_test, predictions, average=None)) 77 | print("accuracy:\t", accuracy_score(exposure_y_test, predictions)) 78 | 79 | -------------------------------------------------------------------------------- /Week11_Preprocessing/activity_3.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import pandas as pd 3 | from sklearn.impute import SimpleImputer 4 | from sklearn.metrics import precision_score, accuracy_score, recall_score 5 | from sklearn.preprocessing import OrdinalEncoder, PolynomialFeatures 6 | from sklearn.tree import DecisionTreeClassifier 7 | 8 | 9 | def load_exposure(exposure_path, split_percentage, strategy): 10 | df = pd.read_csv(exposure_path, delimiter=";", encoding="ISO-8859-1").head(2000) 11 | 12 | df = df[[ 13 | 'GHRP', 14 | 'Aid dependence', 15 | 'Remittances', 16 | 'food import dependence ', 17 | 'primary commodity export dependence', 18 | 'tourism as percentage of GDP', 19 | 'tourism dependence', 20 | 'Foreign currency reserves', 21 | 'Foreign direct investment, net inflows percent of GDP', 22 | 'Foreign direct investment', 23 | 'Covid_19_Economic_exposure_index', 24 | 'Income classification according to WB' 25 | ]] 26 | columns = ["Remittances", "Aid dependence", "Foreign direct investment", 'Foreign currency reserves', 27 | 'Foreign direct investment, net inflows percent of GDP', 'tourism dependence', 28 | 'tourism as percentage of GDP', 'food import dependence ', 29 | 'primary commodity export dependence', 30 | 'Covid_19_Economic_exposure_index', ] 31 | 32 | df = df[df['Income classification according to WB'].notna()] 33 | 34 | for column in columns: 35 | df[column] = df[column].apply(lambda a: numpy.nan if a == "x" else float(str(a).replace(",", "."))) 36 | 37 | # Ordinal Encoders 38 | encGhrp = OrdinalEncoder() 39 | df['GHRP'] = df['GHRP'].fillna("Unknown") 40 | df['GHRP'] = encGhrp.fit_transform(df[['GHRP']]) 41 | 42 | for column in columns: 43 | imputer = SimpleImputer(missing_values=numpy.nan, strategy=strategy) 44 | df[column] = imputer.fit_transform(df[[column]]) 45 | 46 | exposure_x = df.drop('Income classification according to WB', axis=1).values 47 | exposure_y = df['Income classification according to WB'].values 48 | 49 | # Split exposure data in train and test data 50 | split_point = int(len(exposure_x) * split_percentage) 51 | exposure_X_train = exposure_x[:split_point] 52 | exposure_y_train = exposure_y[:split_point] 53 | exposure_X_test = exposure_x[split_point:] 54 | exposure_y_test = exposure_y[split_point:] 55 | 56 | return exposure_X_train, exposure_y_train, exposure_X_test, exposure_y_test 57 | 58 | 59 | if __name__ == '__main__': 60 | csv_file = 'exposure.csv' 61 | 62 | # Split the data into test and train parts 63 | exposure_X_train, exposure_y_train, exposure_X_test, exposure_y_test \ 64 | = load_exposure(csv_file, split_percentage=0.7, strategy="most_frequent") 65 | 66 | # train a classifier 67 | dt = DecisionTreeClassifier() 68 | dt.fit(exposure_X_train, exposure_y_train) 69 | 70 | # predict the test set 71 | predictions = dt.predict(exposure_X_test) 72 | 73 | print("*************************** without PolynomialFeatures ***********************************") 74 | print("precision:\t", precision_score(exposure_y_test, predictions, average=None)) 75 | print("recall:\t\t", recall_score(exposure_y_test, predictions, average=None)) 76 | print("accuracy:\t", accuracy_score(exposure_y_test, predictions)) 77 | 78 | poly = PolynomialFeatures(1) 79 | exposure_X_train_scaled = poly.fit_transform(exposure_X_train) 80 | exposure_X_test_scaled = poly.fit_transform(exposure_X_test) 81 | dt = DecisionTreeClassifier() 82 | dt.fit(exposure_X_train_scaled, exposure_y_train) 83 | predictions = dt.predict(exposure_X_test_scaled) 84 | 85 | print("*************************** with PolynomialFeatures ***********************************") 86 | print("precision:\t", precision_score(exposure_y_test, predictions, average=None)) 87 | print("recall:\t\t", recall_score(exposure_y_test, predictions, average=None)) 88 | print("accuracy:\t", accuracy_score(exposure_y_test, predictions)) -------------------------------------------------------------------------------- /Week2_DataAccess/activity_1.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | def read_csv(csv_file): 5 | """ 6 | :param csv_file: the path of csv file 7 | :return: A dataframe out of the csv file 8 | """ 9 | return pd.read_csv(csv_file) 10 | 11 | 12 | def write_in_csv(dataframe, file): 13 | """ 14 | :param dataframe: The dataframe which must be written into a csv file 15 | :param file: where the csv must be stored 16 | """ 17 | dataframe.to_csv(file, sep=',', encoding='utf-8') 18 | 19 | 20 | def print_dataframe(dataframe, print_column=True, print_rows=True): 21 | # print column names 22 | if print_column: 23 | print(','.join(dataframe.columns)) 24 | 25 | # print rows one by one 26 | if print_rows: 27 | for row in dataframe.itertuples(index=False, name=None): 28 | row = ','.join(str(col) for col in row) 29 | print(row) 30 | 31 | if __name__ == '__main__': 32 | csv_file = 'Demographic_Statistics_By_Zip_Code.csv' # path to the downloaded csv file 33 | dataframe = read_csv(csv_file) 34 | 35 | print("Loading the csv file") 36 | print_dataframe(dataframe) 37 | 38 | print("Write the dataframe as a csv file") 39 | write_in_csv(dataframe, "Demographic_Statistics_New.csv") # path where the new csv file is stored 40 | -------------------------------------------------------------------------------- /Week2_DataAccess/activity_2.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import pandas as pd 3 | from pandas.io import sql 4 | 5 | 6 | def read_csv(csv_file): 7 | """ 8 | :param csv_file: the path of csv file 9 | :return: A dataframe out of the csv file 10 | """ 11 | return pd.read_csv(csv_file) 12 | 13 | 14 | def write_in_sqlite(dataframe, database_file, table_name): 15 | """ 16 | :param dataframe: The dataframe which must be written into the database 17 | :param database_file: where the database is stored 18 | :param table_name: the name of the table 19 | """ 20 | 21 | cnx = sqlite3.connect(database_file) 22 | sql.to_sql(dataframe, name=table_name, con=cnx) 23 | 24 | 25 | def read_from_sqlite(database_file, table_name): 26 | """ 27 | :param database_file: where the database is stored 28 | :param table_name: the name of the table 29 | :return: A Dataframe 30 | """ 31 | cnx = sqlite3.connect(database_file) 32 | return sql.read_sql('select * from ' + table_name, cnx) 33 | 34 | 35 | if __name__ == '__main__': 36 | table_name = "Demographic_Statistics" 37 | database_file = 'Demographic_Statistics.db' # name of sqlite db file that will be created 38 | csv_file = 'Demographic_Statistics_By_Zip_Code.csv' # path to the downloaded csv file 39 | loaded_df = read_csv(csv_file) 40 | 41 | print("Creating database") 42 | write_in_sqlite(loaded_df, database_file, table_name) 43 | 44 | print("Querying the database") 45 | queried_df = read_from_sqlite(database_file, table_name) 46 | 47 | pd.set_option('display.width', 1000) 48 | pd.options.display.max_colwidth = 3 49 | pd.set_option('display.max_columns', 7) 50 | 51 | print(queried_df.head(10)) 52 | -------------------------------------------------------------------------------- /Week2_DataAccess/activity_3.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pandas as pd 3 | from pymongo import MongoClient 4 | 5 | 6 | def read_csv(csv_file): 7 | """ 8 | :param csv_file: the path of csv file 9 | :return: A dataframe out of the csv file 10 | """ 11 | return pd.read_csv(csv_file) 12 | 13 | 14 | def print_dataframe(dataframe, print_column=True, print_rows=True): 15 | # print column names 16 | if print_column: 17 | print(','.join(dataframe.columns)) 18 | 19 | # print rows one by one 20 | if print_rows: 21 | for row in dataframe.itertuples(index=False, name=None): 22 | row = ','.join(str(col) for col in row) 23 | print(row) 24 | 25 | def write_in_mongodb(dataframe, mongo_host, mongo_port, db_name, collection): 26 | """ 27 | :param dataframe: 28 | :param mongo_host: Mongodb server address 29 | :param mongo_port: Mongodb server port number 30 | :param db_name: The name of the database 31 | :param collection: the name of the collection inside the database 32 | """ 33 | client = MongoClient(host=mongo_host, port=mongo_port) 34 | db = client[db_name] 35 | c = db[collection] 36 | # You can only store documents in mongodb; 37 | # so you need to convert rows inside the dataframe into a list of json objects 38 | records = json.loads(dataframe.T.to_json()).values() 39 | c.insert_many(records) 40 | 41 | 42 | 43 | def read_from_mongodb(mongo_host, mongo_port, db_name, collection): 44 | """ 45 | :param mongo_host: Mongodb server address 46 | :param mongo_port: Mongodb server port number 47 | :param db_name: The name of the database 48 | :param collection: the name of the collection inside the database 49 | :return: A dataframe which contains all documents inside the collection 50 | """ 51 | client = MongoClient(host=mongo_host, port=mongo_port) 52 | db = client[db_name] 53 | c = db[collection] 54 | return pd.DataFrame(list(c.find())) 55 | 56 | if __name__ == '__main__': 57 | 58 | db_name = 'comp9321' 59 | mongo_port = 27017 60 | mongo_host = 'localhost' 61 | 62 | csv_file = 'Demographic_Statistics_By_Zip_Code.csv' # path to the downloaded csv file 63 | df = read_csv(csv_file) 64 | collection = 'Demographic_Statistics' 65 | 66 | print("Writing into the mongodb") 67 | write_in_mongodb(df, mongo_host, mongo_port, db_name, collection) 68 | 69 | print("Querying the database") 70 | df = read_from_mongodb(mongo_host, mongo_port, db_name, collection) 71 | 72 | print_dataframe(df) 73 | -------------------------------------------------------------------------------- /Week2_DataAccess/activity_4.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import pandas as pd 3 | 4 | 5 | def get_json(url): 6 | """ 7 | :param url: RUL of the resouce 8 | :return: JSON 9 | """ 10 | resp = requests.get(url=url) 11 | data = resp.json() 12 | return data 13 | 14 | 15 | def json_to_dataframe(json_obj): 16 | """ 17 | Please Open the JSON using the given URL to be familiar with the 18 | structure of the expected JSON object 19 | 20 | The root element contains two main elements : data and meta; 21 | the former contains the statistics for a given zip code, and 22 | the latter contains the information about the columns 23 | :param json_obj: JSON object for the dataset 24 | :return: A dataframe 25 | """ 26 | # let's get the list of statistics for all zip codes 27 | json_data = json_obj 28 | 29 | return pd.DataFrame.from_records(json_data) 30 | 31 | if __name__ == '__main__': 32 | url = "https://raw.githubusercontent.com/joseluisq/json-datasets/master/json/operating-systems/macosx_releases.json" 33 | 34 | print("Fetch the json") 35 | json_obj = get_json(url) 36 | 37 | print("Convert the json object to a dataframe") 38 | df = json_to_dataframe(json_obj) 39 | print(df.to_string()) 40 | -------------------------------------------------------------------------------- /Week3_Data_Cleansing/Books.csv: -------------------------------------------------------------------------------- 1 | Identifier,Edition Statement,Place of Publication,Date of Publication,Publisher,Title,Author,Contributors,Corporate Author,Corporate Contributors,Former owner,Engraver,Issuance type,Flickr URL,Shelfmarks 2 | 000000206,,London,1879 [1878],S. Tinsley & Co.,Walter Forbes. [A novel.] By A. A,A. A.,"FORBES, Walter.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000206,British Library HMNTS 12641.b.30. 3 | 000000216,,London; Virtue & Yorston,1868,Virtue & Co.,"All for Greed. [A novel. The dedication signed: A. A. A., i.e. Marie Pauline Rose, Baroness Blaze de Bury.]","A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000216,British Library HMNTS 12626.cc.2. 4 | 000000218,,London,1869,"Bradbury, Evans & Co.","Love the Avenger. By the author of “All for Greed.” [The dedication signed: A. A. A., i.e. Marie Pauline Rose, Baroness Blaze de Bury.]","A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000218,British Library HMNTS 12625.dd.1. 5 | 000000472,,London,1851,James Darling,"Welsh Sketches, chiefly ecclesiastical, to the close of the twelfth century. By the author of “Proposals for Christian Union” (E. S. A. [i.e. Ernest Appleyard])","A., E. S.","Appleyard, Ernest Silvanus.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000472,British Library HMNTS 10369.bbb.15. 6 | 000000480,"A new edition, revised, etc.",London,1857,Wertheim & Macintosh,"[The World in which I live, and my place in it. By E. S. A. [i.e. Letitia Willgoss Stone.] Edited by ... J. H. Broome.]","A., E. S.","BROOME, John Henry.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000480,British Library HMNTS 9007.d.28. 7 | 000000481,"Fourth edition, revised, etc.",London,1875,William Macintosh,"[The World in which I live, and my place in it. By E. S. A. [i.e. Letitia Willgoss Stone.] Edited by ... J. H. Broome.]","A., E. S.","BROOME, John Henry.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000481,British Library HMNTS 9006.ee.10. 8 | 000000519,,London,1872,The Author,Lagonells. By the author of Darmayne (F. E. A. [i.e. Florence Emily Ashley]),"A., F. E.","ASHLEY, Florence Emily.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000519,British Library HMNTS 12637.e.3. 9 | 000000667,,"Puerto Rico",,,"The Coming of Spring, and other poems. By J. A. [i.e. J. Andrews.]","A., J.|A., J.","ANDREWS, J. - Writer of Verse",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000667,British Library HMNTS 011652.g.73. 10 | 000000874,,London],1676,,"A Warning to the inhabitants of England, and London in particular ... By M. A. [i.e. Mary Adams.]",Remaʿ.,"ADAMS, Mary.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000874,British Library HMNTS 11645.bb.42. 11 | 000001143,,London,1676,,A Satyr against Vertue. (A poem: supposed to be spoken by a Town-Hector. [By John Oldham. The preface signed: T. A.]),"A., T.","OLDHAM, John.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001143,British Library HMNTS 11602.ee.10.(2.) 12 | 000001280,,Coventry,1802,Printed by J. Turner,"An Account of the many and great Loans, Benefactions and Charities, belonging to the City of Coventry ... A new edition. [The dedication signed: AB, CD, EF, GH, &c. By Edward Jackson and Samuel Carte.]",,"CARTE, Samuel.|JACKSON, Edward - Rector of Southam, and CARTE (Samuel)",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001280,British Library HMNTS 1430.g.17. 13 | 000001808,,Christiania,1859,,"Erindringer som Bidrag til Norges Historie fra 1800-1815. Anden Udgave ... Udgivet med nogle Rettelser og Tillæg af Christian C. A. Lange. Med Forfatterens Portraet, og hans Biographi af Amtmand J. C. Aall","AALL, Jacob.","AALL, J. C.|LANGE, Christian Christoph Andreas.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001808,British Library HMNTS 9425.cc.37. 14 | 000001905,,Firenze,1888,,Gli Studi storici in terra d'Otranto ... Frammenti estratti in gran parte dall' Archivio Storico Italiano ... a cura e spese di L(uigi) G(iuseppe) D(e) S(imone),"AAR, Ermanno - pseud. [i.e. Luigi Giuseppe Oronzo Mariano Raffaele Francesco Fortunato Felice de Simone.]","S., L. G. D.|SIMONE, Luigi Giuseppe Oronzo Mariano Raffaele Francesco Fortunato Felice de.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001905,British Library HMNTS 10136.g.22. 15 | 000001929,,Amsterdam,"1676, 38-54",,De Aardbol. Magazijn van hedendaagsche land- en volkenkunde ... Met platen en kaarten. [Deel 4-9 by P. H. W.],,"WITKAMP, Pieter Harme.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001929,British Library HMNTS 10002.g.16-19. 16 | 000002836,,Savona,1888,,Cronache Savonesi dal 1500 al 1570 ... Accresciute di documenti inediti pubblicate e annotate dal dott. G. Assereto,"ABATE, Giovanni Agostino.","ASSERETO, Giovanni.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002836,British Library HMNTS 10136.h.24. 17 | 000002854,,London,1888,E. Moxon & Co.,"See-Saw; a novel ... Edited [or rather, written] by W. W. Reade","ABATI, Francesco.","READE, William Winwood.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002854,British Library HMNTS 12623.bbb.12. 18 | 000002956,,Paris,1860-63,,"Géodésie d'une partie de la Haute Éthiopie, revue et rédigée par R. Radau. fasc. 1-3","ABBADIE, Antoine Thompson d'.","RADAU, Rodolphe.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002956,British Library HMNTS 10096.i.19. 19 | 000002957,,Paris,1873,,[With eleven maps.],"ABBADIE, Antoine Thompson d'.","RADAU, Rodolphe.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002957,British Library HMNTS 10095.i.13. 20 | 000003017,"Nueva edicion, anotada ... y continuada ... por J. J. de Acosta y Calbo.",Puerto-Rico,1866,,"[Historia geográfica, civil y politica de la Isla de S. Juan Bautista de Puerto Rico, Dala a luz A. Valladares de Sotomayor.]","ABBAD Y LASIERRA, Agustín Íñigo - Bishop of Barbastro","ACOSTA Y CALBO, José Julian de.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000003017,British Library HMNTS 10480.h.18. 21 | 000003131,,New York,1899,W. Abbatt,"The Crisis of the Revolution, being the story of Arnold and André now for the first time collected from all sources, and illustrated with views of all places identified with it ... Illustrations from original photographs by E. S. Bennett, etc","ABBATT, William.","ANDRÉ, John - Major|ARNOLD, Benedict.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000003131,British Library HMNTS 9603.g.14. -------------------------------------------------------------------------------- /Week3_Data_Cleansing/City.csv: -------------------------------------------------------------------------------- 1 | City,Country 2 | London,England 3 | Puerto Rico,U.S.A 4 | Coventry,England 5 | Christiania,Denmark 6 | Firenze,Italy 7 | Amsterdam,Netherlands 8 | Savona,Italy 9 | Paris,France 10 | New York,U.S.A -------------------------------------------------------------------------------- /Week3_Data_Cleansing/activity_1.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | if __name__ == "__main__": 5 | columns_to_drop = ['Edition Statement', 6 | 'Corporate Author', 7 | 'Corporate Contributors', 8 | 'Former owner', 9 | 'Engraver', 10 | 'Contributors', 11 | 'Issuance type', 12 | 'Shelfmarks' 13 | ] 14 | csv_file = "Books.csv" 15 | df = pd.read_csv(csv_file) 16 | 17 | print("The percentage of NaN in the data per column:") 18 | num_of_rows = df.shape[0] 19 | for column in df: 20 | # df[column].isnull() : returns an array of True/False showing the cell is null or not 21 | percent = 100 * df[column].isnull().sum() / num_of_rows 22 | print(column, str(percent) + '%') 23 | 24 | print("****************************************") 25 | print("Dataframe before dropping the columns") 26 | print(df.to_string()) 27 | 28 | print("****************************************") 29 | print("Dataframe after dropping the columns") 30 | df.drop(columns_to_drop, inplace=True, axis=1) 31 | # Pandas' drop method is used to remove columns of a dataframe 32 | # Inplace=True indicates that the changes should be applied to the given dataframe instead of creating a new one 33 | # axis=1 : Whether to drop labels from the index (0 / 'index') or columns (1 / 'columns'). 34 | 35 | print(df.to_string()) 36 | print("****************************************") 37 | -------------------------------------------------------------------------------- /Week3_Data_Cleansing/activity_2.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | if __name__ == "__main__": 5 | csv_file = "Books.csv" 6 | df = pd.read_csv(csv_file) 7 | 8 | # Replace the cell value of "Place of Publication" with "London" if it contains "London", 9 | # and replace all '-' characters with space 10 | # We use the apply method which applies a lambda function to the cells of a dataframe 11 | df['Place of Publication'] = df['Place of Publication'].apply( 12 | lambda x: 'London' if 'London' in x else x.replace('-', ' ')) 13 | 14 | ################################################################################################################ 15 | # Here is also another approach using numpy.where # 16 | # import numpy as np # 17 | # london = df['Place of Publication'].str.contains('London') # 18 | # df['Place of Publication'] = np.where(london, 'London', df['Place of Publication'].str.replace('-', ' ')) # 19 | ################################################################################################################ 20 | print(df['Place of Publication']) 21 | 22 | # We use Pandas' extract method which for each subject string in the Series, 23 | # extracts groups from the first match of regular expression pat. 24 | new_date = df['Date of Publication'].str.extract(r'^(\d{4})', expand=False) 25 | # ^(\d{4}) : matches 4 digit numbers in the beginning of the string 26 | new_date = pd.to_numeric(new_date) 27 | df['Date of Publication'] = new_date 28 | print(df['Date of Publication']) 29 | 30 | # replace all NaN with 0 31 | new_date = new_date.fillna(0) 32 | df['Date of Publication'] = new_date 33 | print(df['Date of Publication']) 34 | -------------------------------------------------------------------------------- /Week3_Data_Cleansing/activity_3.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def clean(dataframe): 4 | dataframe['Place of Publication'] = dataframe['Place of Publication'].apply( 5 | lambda x: 'London' if 'London' in x else x.replace('-', ' ')) 6 | 7 | new_date = dataframe['Date of Publication'].str.extract(r'^(\d{4})', expand=False) 8 | new_date = pd.to_numeric(new_date) 9 | new_date = new_date.fillna(0) 10 | dataframe['Date of Publication'] = new_date 11 | 12 | return dataframe 13 | 14 | 15 | if __name__ == "__main__": 16 | csv_file = "Books.csv" 17 | df = pd.read_csv(csv_file) 18 | df = clean(df) 19 | 20 | # Replace the spaces with the underline character ('_') 21 | # Because panda's query method does not work well with column names which contains white spaces 22 | df.columns = [c.replace(' ', '_') for c in df.columns] 23 | 24 | # 25 | df = df.query('Date_of_Publication > 1866 and Place_of_Publication == "London"') 26 | 27 | print(df.to_string()) 28 | 29 | 30 | -------------------------------------------------------------------------------- /Week3_Data_Cleansing/activity_4.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | 5 | def clean(dataframe): 6 | dataframe['Place of Publication'] = dataframe['Place of Publication'].apply( 7 | lambda x: 'London' if 'London' in x else x.replace('-', ' ')) 8 | 9 | new_date = dataframe['Date of Publication'].str.extract(r'^(\d{4})', expand=False) 10 | new_date = pd.to_numeric(new_date) 11 | new_date = new_date.fillna(0) 12 | dataframe['Date of Publication'] = new_date 13 | 14 | return dataframe 15 | 16 | 17 | if __name__ == "__main__": 18 | csv_file = "Books.csv" 19 | books_df = pd.read_csv(csv_file) 20 | books_df = clean(books_df) 21 | # Replace the spaces with the underline character ('_') 22 | # Because panda's query method does not work well with column names which contains white spaces 23 | books_df.columns = [c.replace(' ', '_') for c in books_df.columns] 24 | 25 | city_df = pd.read_csv('City.csv') 26 | 27 | # merge the two dataframes 28 | df = pd.merge(books_df, city_df, how='left', left_on=['Place_of_Publication'], right_on=['City']) 29 | 30 | # Group by Country and keep the country as a column 31 | gb_df = df.groupby(['Country'], as_index=False) 32 | 33 | # Select a column (as far as it has values for all rows, you can select any column) 34 | df = gb_df['Identifier'].count() 35 | 36 | # print the dataframe which shows publication number by country 37 | print(df.to_string()) 38 | -------------------------------------------------------------------------------- /Week4_Visualization/Books.csv: -------------------------------------------------------------------------------- 1 | Identifier,Edition Statement,Place of Publication,Date of Publication,Publisher,Title,Author,Contributors,Corporate Author,Corporate Contributors,Former owner,Engraver,Issuance type,Flickr URL,Shelfmarks 2 | 000000206,,London,1879 [1878],S. Tinsley & Co.,Walter Forbes. [A novel.] By A. A,A. A.,"FORBES, Walter.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000206,British Library HMNTS 12641.b.30. 3 | 000000216,,London; Virtue & Yorston,1868,Virtue & Co.,"All for Greed. [A novel. The dedication signed: A. A. A., i.e. Marie Pauline Rose, Baroness Blaze de Bury.]","A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000216,British Library HMNTS 12626.cc.2. 4 | 000000218,,London,1869,"Bradbury, Evans & Co.","Love the Avenger. By the author of “All for Greed.” [The dedication signed: A. A. A., i.e. Marie Pauline Rose, Baroness Blaze de Bury.]","A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000218,British Library HMNTS 12625.dd.1. 5 | 000000472,,London,1851,James Darling,"Welsh Sketches, chiefly ecclesiastical, to the close of the twelfth century. By the author of “Proposals for Christian Union” (E. S. A. [i.e. Ernest Appleyard])","A., E. S.","Appleyard, Ernest Silvanus.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000472,British Library HMNTS 10369.bbb.15. 6 | 000000480,"A new edition, revised, etc.",London,1857,Wertheim & Macintosh,"[The World in which I live, and my place in it. By E. S. A. [i.e. Letitia Willgoss Stone.] Edited by ... J. H. Broome.]","A., E. S.","BROOME, John Henry.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000480,British Library HMNTS 9007.d.28. 7 | 000000481,"Fourth edition, revised, etc.",London,1875,William Macintosh,"[The World in which I live, and my place in it. By E. S. A. [i.e. Letitia Willgoss Stone.] Edited by ... J. H. Broome.]","A., E. S.","BROOME, John Henry.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000481,British Library HMNTS 9006.ee.10. 8 | 000000519,,London,1872,The Author,Lagonells. By the author of Darmayne (F. E. A. [i.e. Florence Emily Ashley]),"A., F. E.","ASHLEY, Florence Emily.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000519,British Library HMNTS 12637.e.3. 9 | 000000667,,"Puerto Rico",,,"The Coming of Spring, and other poems. By J. A. [i.e. J. Andrews.]","A., J.|A., J.","ANDREWS, J. - Writer of Verse",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000667,British Library HMNTS 011652.g.73. 10 | 000000874,,London],1676,,"A Warning to the inhabitants of England, and London in particular ... By M. A. [i.e. Mary Adams.]",Remaʿ.,"ADAMS, Mary.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000874,British Library HMNTS 11645.bb.42. 11 | 000001143,,London,1676,,A Satyr against Vertue. (A poem: supposed to be spoken by a Town-Hector. [By John Oldham. The preface signed: T. A.]),"A., T.","OLDHAM, John.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001143,British Library HMNTS 11602.ee.10.(2.) 12 | 000001280,,Coventry,1802,Printed by J. Turner,"An Account of the many and great Loans, Benefactions and Charities, belonging to the City of Coventry ... A new edition. [The dedication signed: AB, CD, EF, GH, &c. By Edward Jackson and Samuel Carte.]",,"CARTE, Samuel.|JACKSON, Edward - Rector of Southam, and CARTE (Samuel)",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001280,British Library HMNTS 1430.g.17. 13 | 000001808,,Christiania,1859,,"Erindringer som Bidrag til Norges Historie fra 1800-1815. Anden Udgave ... Udgivet med nogle Rettelser og Tillæg af Christian C. A. Lange. Med Forfatterens Portraet, og hans Biographi af Amtmand J. C. Aall","AALL, Jacob.","AALL, J. C.|LANGE, Christian Christoph Andreas.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001808,British Library HMNTS 9425.cc.37. 14 | 000001905,,Firenze,1888,,Gli Studi storici in terra d'Otranto ... Frammenti estratti in gran parte dall' Archivio Storico Italiano ... a cura e spese di L(uigi) G(iuseppe) D(e) S(imone),"AAR, Ermanno - pseud. [i.e. Luigi Giuseppe Oronzo Mariano Raffaele Francesco Fortunato Felice de Simone.]","S., L. G. D.|SIMONE, Luigi Giuseppe Oronzo Mariano Raffaele Francesco Fortunato Felice de.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001905,British Library HMNTS 10136.g.22. 15 | 000001929,,Amsterdam,"1676, 38-54",,De Aardbol. Magazijn van hedendaagsche land- en volkenkunde ... Met platen en kaarten. [Deel 4-9 by P. H. W.],,"WITKAMP, Pieter Harme.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001929,British Library HMNTS 10002.g.16-19. 16 | 000002836,,Savona,1888,,Cronache Savonesi dal 1500 al 1570 ... Accresciute di documenti inediti pubblicate e annotate dal dott. G. Assereto,"ABATE, Giovanni Agostino.","ASSERETO, Giovanni.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002836,British Library HMNTS 10136.h.24. 17 | 000002854,,London,1888,E. Moxon & Co.,"See-Saw; a novel ... Edited [or rather, written] by W. W. Reade","ABATI, Francesco.","READE, William Winwood.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002854,British Library HMNTS 12623.bbb.12. 18 | 000002956,,Paris,1860-63,,"Géodésie d'une partie de la Haute Éthiopie, revue et rédigée par R. Radau. fasc. 1-3","ABBADIE, Antoine Thompson d'.","RADAU, Rodolphe.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002956,British Library HMNTS 10096.i.19. 19 | 000002957,,Paris,1873,,[With eleven maps.],"ABBADIE, Antoine Thompson d'.","RADAU, Rodolphe.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002957,British Library HMNTS 10095.i.13. 20 | 000003017,"Nueva edicion, anotada ... y continuada ... por J. J. de Acosta y Calbo.",Puerto-Rico,1866,,"[Historia geográfica, civil y politica de la Isla de S. Juan Bautista de Puerto Rico, Dala a luz A. Valladares de Sotomayor.]","ABBAD Y LASIERRA, Agustín Íñigo - Bishop of Barbastro","ACOSTA Y CALBO, José Julian de.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000003017,British Library HMNTS 10480.h.18. 21 | 000003131,,New York,1899,W. Abbatt,"The Crisis of the Revolution, being the story of Arnold and André now for the first time collected from all sources, and illustrated with views of all places identified with it ... Illustrations from original photographs by E. S. Bennett, etc","ABBATT, William.","ANDRÉ, John - Major|ARNOLD, Benedict.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000003131,British Library HMNTS 9603.g.14. -------------------------------------------------------------------------------- /Week4_Visualization/activity_1.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pandas as pd 3 | 4 | 5 | def clean(df): 6 | # Let's Clean the data to get rid of exceptions 7 | df['Place of Publication'] = df['Place of Publication'].apply( 8 | lambda x: 'London' if 'London' in x else x.replace('-', ' ')) 9 | return df 10 | 11 | 12 | if __name__ == '__main__': 13 | csv_file = 'Books.csv' 14 | df = pd.read_csv(csv_file) 15 | 16 | # Cleaning is Optional; but it will increase the accuracy of the results 17 | df = clean(df) 18 | 19 | # value_counts: returns a Series containing counts of each category. 20 | unival = df['Place of Publication'].value_counts() 21 | unival.plot.pie(subplots=True) 22 | 23 | plt.show() 24 | -------------------------------------------------------------------------------- /Week4_Visualization/activity_2.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pandas as pd 3 | 4 | if __name__ == '__main__': 5 | csv_file = 'iris.csv' 6 | df = pd.read_csv(csv_file) 7 | 8 | df = df.groupby('species').mean() 9 | df.plot.bar() 10 | 11 | plt.show() 12 | -------------------------------------------------------------------------------- /Week4_Visualization/activity_3.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pandas as pd 3 | 4 | if __name__ == '__main__': 5 | csv_file = 'iris.csv' 6 | df = pd.read_csv(csv_file) 7 | 8 | # divide the dataset into three dataframes based on the species 9 | setosa_df = df.query('species == "setosa"') 10 | versicolor_df = df.query('species == "versicolor"') 11 | virginica_df = df.query('species == "virginica"') 12 | 13 | # Plot a scatter chart using x='sepal_length', y='sepal_width', and separate colors for each of the three dataframes 14 | ax = setosa_df.plot.scatter(x='sepal_length', y='sepal_width', label='setosa') 15 | ax = versicolor_df.plot.scatter(x='sepal_length', y='sepal_width', label='versicolor', color='green', ax=ax) 16 | ax = virginica_df.plot.scatter(x='sepal_length', y='sepal_width', label='virginica', color='red', ax=ax) 17 | 18 | # Plot a scatter chart using x='petal_length', y='petal_width', and separate colors for each of the three dataframes 19 | ax = setosa_df.plot.scatter(x='petal_length', y='petal_width', label='setosa') 20 | ax = versicolor_df.plot.scatter(x='petal_length', y='petal_width', label='versicolor', color='green', ax=ax) 21 | ax = virginica_df.plot.scatter(x='petal_length', y='petal_width', label='virginica', color='red', ax=ax) 22 | 23 | plt.show() 24 | -------------------------------------------------------------------------------- /Week4_Visualization/activity_4.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pandas as pd 3 | 4 | if __name__ == '__main__': 5 | csv_file = 'iris.csv' 6 | df = pd.read_csv(csv_file) 7 | 8 | # divide the dataset into three dataframes based on the species 9 | setosa_df = df.query('species == "setosa"') 10 | versicolor_df = df.query('species == "versicolor"') 11 | virginica_df = df.query('species == "virginica"') 12 | 13 | fig, axes = plt.subplots(nrows=1, ncols=2) 14 | 15 | # Plot a scatter chart using x='sepal_length', y='sepal_width', and separate colors for each of the three dataframes 16 | ax = setosa_df.plot.scatter(x='sepal_length', y='sepal_width', label='setosa', ax=axes[0]) 17 | ax = versicolor_df.plot.scatter(x='sepal_length', y='sepal_width', label='versicolor', color='green', ax=ax) 18 | ax = virginica_df.plot.scatter(x='sepal_length', y='sepal_width', label='virginica', color='red', ax=ax) 19 | 20 | # Plot a scatter chart using x='petal_length', y='petal_width', and separate colors for each of the three dataframes 21 | ax = setosa_df.plot.scatter(x='petal_length', y='petal_width', label='setosa', ax=axes[1]) 22 | ax = versicolor_df.plot.scatter(x='petal_length', y='petal_width', label='versicolor', color='green', ax=ax) 23 | ax = virginica_df.plot.scatter(x='petal_length', y='petal_width', label='virginica', color='red', ax=ax) 24 | 25 | 26 | plt.show() 27 | -------------------------------------------------------------------------------- /Week4_Visualization/iris.csv: -------------------------------------------------------------------------------- 1 | sepal_length,sepal_width,petal_length,petal_width,species 2 | 5.1,3.5,1.4,0.2,setosa 3 | 4.9,3.0,1.4,0.2,setosa 4 | 4.7,3.2,1.3,0.2,setosa 5 | 4.6,3.1,1.5,0.2,setosa 6 | 5.0,3.6,1.4,0.2,setosa 7 | 5.4,3.9,1.7,0.4,setosa 8 | 4.6,3.4,1.4,0.3,setosa 9 | 5.0,3.4,1.5,0.2,setosa 10 | 4.4,2.9,1.4,0.2,setosa 11 | 4.9,3.1,1.5,0.1,setosa 12 | 5.4,3.7,1.5,0.2,setosa 13 | 4.8,3.4,1.6,0.2,setosa 14 | 4.8,3.0,1.4,0.1,setosa 15 | 4.3,3.0,1.1,0.1,setosa 16 | 5.8,4.0,1.2,0.2,setosa 17 | 5.7,4.4,1.5,0.4,setosa 18 | 5.4,3.9,1.3,0.4,setosa 19 | 5.1,3.5,1.4,0.3,setosa 20 | 5.7,3.8,1.7,0.3,setosa 21 | 5.1,3.8,1.5,0.3,setosa 22 | 5.4,3.4,1.7,0.2,setosa 23 | 5.1,3.7,1.5,0.4,setosa 24 | 4.6,3.6,1.0,0.2,setosa 25 | 5.1,3.3,1.7,0.5,setosa 26 | 4.8,3.4,1.9,0.2,setosa 27 | 5.0,3.0,1.6,0.2,setosa 28 | 5.0,3.4,1.6,0.4,setosa 29 | 5.2,3.5,1.5,0.2,setosa 30 | 5.2,3.4,1.4,0.2,setosa 31 | 4.7,3.2,1.6,0.2,setosa 32 | 4.8,3.1,1.6,0.2,setosa 33 | 5.4,3.4,1.5,0.4,setosa 34 | 5.2,4.1,1.5,0.1,setosa 35 | 5.5,4.2,1.4,0.2,setosa 36 | 4.9,3.1,1.5,0.1,setosa 37 | 5.0,3.2,1.2,0.2,setosa 38 | 5.5,3.5,1.3,0.2,setosa 39 | 4.9,3.1,1.5,0.1,setosa 40 | 4.4,3.0,1.3,0.2,setosa 41 | 5.1,3.4,1.5,0.2,setosa 42 | 5.0,3.5,1.3,0.3,setosa 43 | 4.5,2.3,1.3,0.3,setosa 44 | 4.4,3.2,1.3,0.2,setosa 45 | 5.0,3.5,1.6,0.6,setosa 46 | 5.1,3.8,1.9,0.4,setosa 47 | 4.8,3.0,1.4,0.3,setosa 48 | 5.1,3.8,1.6,0.2,setosa 49 | 4.6,3.2,1.4,0.2,setosa 50 | 5.3,3.7,1.5,0.2,setosa 51 | 5.0,3.3,1.4,0.2,setosa 52 | 7.0,3.2,4.7,1.4,versicolor 53 | 6.4,3.2,4.5,1.5,versicolor 54 | 6.9,3.1,4.9,1.5,versicolor 55 | 5.5,2.3,4.0,1.3,versicolor 56 | 6.5,2.8,4.6,1.5,versicolor 57 | 5.7,2.8,4.5,1.3,versicolor 58 | 6.3,3.3,4.7,1.6,versicolor 59 | 4.9,2.4,3.3,1.0,versicolor 60 | 6.6,2.9,4.6,1.3,versicolor 61 | 5.2,2.7,3.9,1.4,versicolor 62 | 5.0,2.0,3.5,1.0,versicolor 63 | 5.9,3.0,4.2,1.5,versicolor 64 | 6.0,2.2,4.0,1.0,versicolor 65 | 6.1,2.9,4.7,1.4,versicolor 66 | 5.6,2.9,3.6,1.3,versicolor 67 | 6.7,3.1,4.4,1.4,versicolor 68 | 5.6,3.0,4.5,1.5,versicolor 69 | 5.8,2.7,4.1,1.0,versicolor 70 | 6.2,2.2,4.5,1.5,versicolor 71 | 5.6,2.5,3.9,1.1,versicolor 72 | 5.9,3.2,4.8,1.8,versicolor 73 | 6.1,2.8,4.0,1.3,versicolor 74 | 6.3,2.5,4.9,1.5,versicolor 75 | 6.1,2.8,4.7,1.2,versicolor 76 | 6.4,2.9,4.3,1.3,versicolor 77 | 6.6,3.0,4.4,1.4,versicolor 78 | 6.8,2.8,4.8,1.4,versicolor 79 | 6.7,3.0,5.0,1.7,versicolor 80 | 6.0,2.9,4.5,1.5,versicolor 81 | 5.7,2.6,3.5,1.0,versicolor 82 | 5.5,2.4,3.8,1.1,versicolor 83 | 5.5,2.4,3.7,1.0,versicolor 84 | 5.8,2.7,3.9,1.2,versicolor 85 | 6.0,2.7,5.1,1.6,versicolor 86 | 5.4,3.0,4.5,1.5,versicolor 87 | 6.0,3.4,4.5,1.6,versicolor 88 | 6.7,3.1,4.7,1.5,versicolor 89 | 6.3,2.3,4.4,1.3,versicolor 90 | 5.6,3.0,4.1,1.3,versicolor 91 | 5.5,2.5,4.0,1.3,versicolor 92 | 5.5,2.6,4.4,1.2,versicolor 93 | 6.1,3.0,4.6,1.4,versicolor 94 | 5.8,2.6,4.0,1.2,versicolor 95 | 5.0,2.3,3.3,1.0,versicolor 96 | 5.6,2.7,4.2,1.3,versicolor 97 | 5.7,3.0,4.2,1.2,versicolor 98 | 5.7,2.9,4.2,1.3,versicolor 99 | 6.2,2.9,4.3,1.3,versicolor 100 | 5.1,2.5,3.0,1.1,versicolor 101 | 5.7,2.8,4.1,1.3,versicolor 102 | 6.3,3.3,6.0,2.5,virginica 103 | 5.8,2.7,5.1,1.9,virginica 104 | 7.1,3.0,5.9,2.1,virginica 105 | 6.3,2.9,5.6,1.8,virginica 106 | 6.5,3.0,5.8,2.2,virginica 107 | 7.6,3.0,6.6,2.1,virginica 108 | 4.9,2.5,4.5,1.7,virginica 109 | 7.3,2.9,6.3,1.8,virginica 110 | 6.7,2.5,5.8,1.8,virginica 111 | 7.2,3.6,6.1,2.5,virginica 112 | 6.5,3.2,5.1,2.0,virginica 113 | 6.4,2.7,5.3,1.9,virginica 114 | 6.8,3.0,5.5,2.1,virginica 115 | 5.7,2.5,5.0,2.0,virginica 116 | 5.8,2.8,5.1,2.4,virginica 117 | 6.4,3.2,5.3,2.3,virginica 118 | 6.5,3.0,5.5,1.8,virginica 119 | 7.7,3.8,6.7,2.2,virginica 120 | 7.7,2.6,6.9,2.3,virginica 121 | 6.0,2.2,5.0,1.5,virginica 122 | 6.9,3.2,5.7,2.3,virginica 123 | 5.6,2.8,4.9,2.0,virginica 124 | 7.7,2.8,6.7,2.0,virginica 125 | 6.3,2.7,4.9,1.8,virginica 126 | 6.7,3.3,5.7,2.1,virginica 127 | 7.2,3.2,6.0,1.8,virginica 128 | 6.2,2.8,4.8,1.8,virginica 129 | 6.1,3.0,4.9,1.8,virginica 130 | 6.4,2.8,5.6,2.1,virginica 131 | 7.2,3.0,5.8,1.6,virginica 132 | 7.4,2.8,6.1,1.9,virginica 133 | 7.9,3.8,6.4,2.0,virginica 134 | 6.4,2.8,5.6,2.2,virginica 135 | 6.3,2.8,5.1,1.5,virginica 136 | 6.1,2.6,5.6,1.4,virginica 137 | 7.7,3.0,6.1,2.3,virginica 138 | 6.3,3.4,5.6,2.4,virginica 139 | 6.4,3.1,5.5,1.8,virginica 140 | 6.0,3.0,4.8,1.8,virginica 141 | 6.9,3.1,5.4,2.1,virginica 142 | 6.7,3.1,5.6,2.4,virginica 143 | 6.9,3.1,5.1,2.3,virginica 144 | 5.8,2.7,5.1,1.9,virginica 145 | 6.8,3.2,5.9,2.3,virginica 146 | 6.7,3.3,5.7,2.5,virginica 147 | 6.7,3.0,5.2,2.3,virginica 148 | 6.3,2.5,5.0,1.9,virginica 149 | 6.5,3.0,5.2,2.0,virginica 150 | 6.2,3.4,5.4,2.3,virginica 151 | 5.9,3.0,5.1,1.8,virginica -------------------------------------------------------------------------------- /Week5_Flask/Books.csv: -------------------------------------------------------------------------------- 1 | Identifier,Edition Statement,Place of Publication,Date of Publication,Publisher,Title,Author,Contributors,Corporate Author,Corporate Contributors,Former owner,Engraver,Issuance type,Flickr URL,Shelfmarks 2 | 000000206,,London,1879 [1878],S. Tinsley & Co.,Walter Forbes. [A novel.] By A. A,A. A.,"FORBES, Walter.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000206,British Library HMNTS 12641.b.30. 3 | 000000216,,London; Virtue & Yorston,1868,Virtue & Co.,"All for Greed. [A novel. The dedication signed: A. A. A., i.e. Marie Pauline Rose, Baroness Blaze de Bury.]","A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000216,British Library HMNTS 12626.cc.2. 4 | 000000218,,London,1869,"Bradbury, Evans & Co.","Love the Avenger. By the author of “All for Greed.” [The dedication signed: A. A. A., i.e. Marie Pauline Rose, Baroness Blaze de Bury.]","A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000218,British Library HMNTS 12625.dd.1. 5 | 000000472,,London,1851,James Darling,"Welsh Sketches, chiefly ecclesiastical, to the close of the twelfth century. By the author of “Proposals for Christian Union” (E. S. A. [i.e. Ernest Appleyard])","A., E. S.","Appleyard, Ernest Silvanus.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000472,British Library HMNTS 10369.bbb.15. 6 | 000000480,"A new edition, revised, etc.",London,1857,Wertheim & Macintosh,"[The World in which I live, and my place in it. By E. S. A. [i.e. Letitia Willgoss Stone.] Edited by ... J. H. Broome.]","A., E. S.","BROOME, John Henry.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000480,British Library HMNTS 9007.d.28. 7 | 000000481,"Fourth edition, revised, etc.",London,1875,William Macintosh,"[The World in which I live, and my place in it. By E. S. A. [i.e. Letitia Willgoss Stone.] Edited by ... J. H. Broome.]","A., E. S.","BROOME, John Henry.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000481,British Library HMNTS 9006.ee.10. 8 | 000000519,,London,1872,The Author,Lagonells. By the author of Darmayne (F. E. A. [i.e. Florence Emily Ashley]),"A., F. E.","ASHLEY, Florence Emily.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000519,British Library HMNTS 12637.e.3. 9 | 000000667,,"Puerto Rico",,,"The Coming of Spring, and other poems. By J. A. [i.e. J. Andrews.]","A., J.|A., J.","ANDREWS, J. - Writer of Verse",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000667,British Library HMNTS 011652.g.73. 10 | 000000874,,London],1676,,"A Warning to the inhabitants of England, and London in particular ... By M. A. [i.e. Mary Adams.]",Remaʿ.,"ADAMS, Mary.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000874,British Library HMNTS 11645.bb.42. 11 | 000001143,,London,1676,,A Satyr against Vertue. (A poem: supposed to be spoken by a Town-Hector. [By John Oldham. The preface signed: T. A.]),"A., T.","OLDHAM, John.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001143,British Library HMNTS 11602.ee.10.(2.) 12 | 000001280,,Coventry,1802,Printed by J. Turner,"An Account of the many and great Loans, Benefactions and Charities, belonging to the City of Coventry ... A new edition. [The dedication signed: AB, CD, EF, GH, &c. By Edward Jackson and Samuel Carte.]",,"CARTE, Samuel.|JACKSON, Edward - Rector of Southam, and CARTE (Samuel)",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001280,British Library HMNTS 1430.g.17. 13 | 000001808,,Christiania,1859,,"Erindringer som Bidrag til Norges Historie fra 1800-1815. Anden Udgave ... Udgivet med nogle Rettelser og Tillæg af Christian C. A. Lange. Med Forfatterens Portraet, og hans Biographi af Amtmand J. C. Aall","AALL, Jacob.","AALL, J. C.|LANGE, Christian Christoph Andreas.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001808,British Library HMNTS 9425.cc.37. 14 | 000001905,,Firenze,1888,,Gli Studi storici in terra d'Otranto ... Frammenti estratti in gran parte dall' Archivio Storico Italiano ... a cura e spese di L(uigi) G(iuseppe) D(e) S(imone),"AAR, Ermanno - pseud. [i.e. Luigi Giuseppe Oronzo Mariano Raffaele Francesco Fortunato Felice de Simone.]","S., L. G. D.|SIMONE, Luigi Giuseppe Oronzo Mariano Raffaele Francesco Fortunato Felice de.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001905,British Library HMNTS 10136.g.22. 15 | 000001929,,Amsterdam,"1676, 38-54",,De Aardbol. Magazijn van hedendaagsche land- en volkenkunde ... Met platen en kaarten. [Deel 4-9 by P. H. W.],,"WITKAMP, Pieter Harme.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001929,British Library HMNTS 10002.g.16-19. 16 | 000002836,,Savona,1888,,Cronache Savonesi dal 1500 al 1570 ... Accresciute di documenti inediti pubblicate e annotate dal dott. G. Assereto,"ABATE, Giovanni Agostino.","ASSERETO, Giovanni.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002836,British Library HMNTS 10136.h.24. 17 | 000002854,,London,1888,E. Moxon & Co.,"See-Saw; a novel ... Edited [or rather, written] by W. W. Reade","ABATI, Francesco.","READE, William Winwood.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002854,British Library HMNTS 12623.bbb.12. 18 | 000002956,,Paris,1860-63,,"Géodésie d'une partie de la Haute Éthiopie, revue et rédigée par R. Radau. fasc. 1-3","ABBADIE, Antoine Thompson d'.","RADAU, Rodolphe.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002956,British Library HMNTS 10096.i.19. 19 | 000002957,,Paris,1873,,[With eleven maps.],"ABBADIE, Antoine Thompson d'.","RADAU, Rodolphe.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002957,British Library HMNTS 10095.i.13. 20 | 000003017,"Nueva edicion, anotada ... y continuada ... por J. J. de Acosta y Calbo.",Puerto-Rico,1866,,"[Historia geográfica, civil y politica de la Isla de S. Juan Bautista de Puerto Rico, Dala a luz A. Valladares de Sotomayor.]","ABBAD Y LASIERRA, Agustín Íñigo - Bishop of Barbastro","ACOSTA Y CALBO, José Julian de.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000003017,British Library HMNTS 10480.h.18. 21 | 000003131,,New York,1899,W. Abbatt,"The Crisis of the Revolution, being the story of Arnold and André now for the first time collected from all sources, and illustrated with views of all places identified with it ... Illustrations from original photographs by E. S. Bennett, etc","ABBATT, William.","ANDRÉ, John - Major|ARNOLD, Benedict.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000003131,British Library HMNTS 9603.g.14. -------------------------------------------------------------------------------- /Week5_Flask/activity_1.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from flask import Flask 3 | from flask_restx import Resource, Api 4 | 5 | app = Flask(__name__) 6 | api = Api(app) 7 | 8 | 9 | @api.route('/books/') 10 | class Books(Resource): 11 | def get(self, id): 12 | if id not in df.index: 13 | api.abort(404, "Book {} doesn't exist".format(id)) 14 | 15 | book = dict(df.loc[id]) 16 | return book 17 | 18 | 19 | if __name__ == '__main__': 20 | columns_to_drop = ['Edition Statement', 21 | 'Corporate Author', 22 | 'Corporate Contributors', 23 | 'Former owner', 24 | 'Engraver', 25 | 'Contributors', 26 | 'Issuance type', 27 | 'Shelfmarks' 28 | ] 29 | csv_file = "Books.csv" 30 | df = pd.read_csv(csv_file) 31 | 32 | # drop unnecessary columns 33 | df.drop(columns_to_drop, inplace=True, axis=1) 34 | 35 | # clean the date of publication & convert it to numeric data 36 | new_date = df['Date of Publication'].str.extract(r'^(\d{4})', expand=False) 37 | new_date = pd.to_numeric(new_date) 38 | new_date = new_date.fillna(0) 39 | df['Date of Publication'] = new_date 40 | 41 | # replace spaces in the name of columns 42 | df.columns = [c.replace(' ', '_') for c in df.columns] 43 | 44 | # set the index column; this will help us to find books with their ids 45 | df.set_index('Identifier', inplace=True) 46 | 47 | # run the application 48 | app.run(debug=True) 49 | -------------------------------------------------------------------------------- /Week5_Flask/activity_2.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from flask import Flask 3 | from flask_restx import Resource, Api 4 | 5 | app = Flask(__name__) 6 | api = Api(app) 7 | 8 | 9 | @api.route('/books/') 10 | class Books(Resource): 11 | def get(self, id): 12 | if id not in df.index: 13 | api.abort(404, "Book {} doesn't exist".format(id)) 14 | 15 | book = dict(df.loc[id]) 16 | return book 17 | 18 | def delete(self, id): 19 | if id not in df.index: 20 | api.abort(404, "Book {} doesn't exist".format(id)) 21 | 22 | df.drop(id, inplace=True) 23 | return {"message": "Book {} is removed.".format(id)}, 200 24 | 25 | 26 | if __name__ == '__main__': 27 | columns_to_drop = ['Edition Statement', 28 | 'Corporate Author', 29 | 'Corporate Contributors', 30 | 'Former owner', 31 | 'Engraver', 32 | 'Contributors', 33 | 'Issuance type', 34 | 'Shelfmarks' 35 | ] 36 | csv_file = "Books.csv" 37 | df = pd.read_csv(csv_file) 38 | 39 | # drop unnecessary columns 40 | df.drop(columns_to_drop, inplace=True, axis=1) 41 | 42 | # clean the date of publication & convert it to numeric data 43 | new_date = df['Date of Publication'].str.extract(r'^(\d{4})', expand=False) 44 | new_date = pd.to_numeric(new_date) 45 | new_date = new_date.fillna(0) 46 | df['Date of Publication'] = new_date 47 | 48 | # replace spaces in the name of columns 49 | df.columns = [c.replace(' ', '_') for c in df.columns] 50 | 51 | # set the index column; this will help us to find books with their ids 52 | df.set_index('Identifier', inplace=True) 53 | 54 | # run the application 55 | app.run(debug=True) 56 | -------------------------------------------------------------------------------- /Week5_Flask/activity_3.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from flask import Flask 3 | from flask import request 4 | from flask_restx import Resource, Api 5 | from flask_restx import fields 6 | 7 | app = Flask(__name__) 8 | api = Api(app) 9 | 10 | # The following is the schema of Book 11 | book_model = api.model('Book', { 12 | 'Flickr_URL': fields.String, 13 | 'Publisher': fields.String, 14 | 'Author': fields.String, 15 | 'Title': fields.String, 16 | 'Date_of_Publication': fields.Integer, 17 | 'Identifier': fields.Integer, 18 | 'Place_of_Publication': fields.String 19 | }) 20 | 21 | 22 | @api.route('/books/') 23 | class Books(Resource): 24 | def get(self, id): 25 | if id not in df.index: 26 | api.abort(404, "Book {} doesn't exist".format(id)) 27 | 28 | book = dict(df.loc[id]) 29 | return book 30 | 31 | def delete(self, id): 32 | if id not in df.index: 33 | api.abort(404, "Book {} doesn't exist".format(id)) 34 | 35 | df.drop(id, inplace=True) 36 | return {"message": "Book {} is removed.".format(id)}, 200 37 | 38 | @api.expect(book_model) 39 | def put(self, id): 40 | 41 | if id not in df.index: 42 | api.abort(404, "Book {} doesn't exist".format(id)) 43 | 44 | # get the payload and convert it to a JSON 45 | book = request.json 46 | 47 | # Book ID cannot be changed 48 | if 'Identifier' in book and id != book['Identifier']: 49 | return {"message": "Identifier cannot be changed".format(id)}, 400 50 | 51 | # Update the values 52 | for key in book: 53 | if key not in book_model.keys(): 54 | # unexpected column 55 | return {"message": "Property {} is invalid".format(key)}, 400 56 | df.loc[id, key] = book[key] 57 | 58 | # df.append(book, ignore_index=True) 59 | return {"message": "Book {} has been successfully updated".format(id)}, 200 60 | 61 | 62 | if __name__ == '__main__': 63 | columns_to_drop = ['Edition Statement', 64 | 'Corporate Author', 65 | 'Corporate Contributors', 66 | 'Former owner', 67 | 'Engraver', 68 | 'Contributors', 69 | 'Issuance type', 70 | 'Shelfmarks' 71 | ] 72 | csv_file = "Books.csv" 73 | df = pd.read_csv(csv_file) 74 | 75 | # drop unnecessary columns 76 | df.drop(columns_to_drop, inplace=True, axis=1) 77 | 78 | # clean the date of publication & convert it to numeric data 79 | new_date = df['Date of Publication'].str.extract(r'^(\d{4})', expand=False) 80 | new_date = pd.to_numeric(new_date) 81 | new_date = new_date.fillna(0) 82 | df['Date of Publication'] = new_date 83 | 84 | # replace spaces in the name of columns 85 | df.columns = [c.replace(' ', '_') for c in df.columns] 86 | 87 | # set the index column; this will help us to find books with their ids 88 | df.set_index('Identifier', inplace=True) 89 | 90 | # run the application 91 | app.run(debug=True) 92 | -------------------------------------------------------------------------------- /Week6_Flask2/Books.csv: -------------------------------------------------------------------------------- 1 | Identifier,Edition Statement,Place of Publication,Date of Publication,Publisher,Title,Author,Contributors,Corporate Author,Corporate Contributors,Former owner,Engraver,Issuance type,Flickr URL,Shelfmarks 2 | 000000206,,London,1879 [1878],S. Tinsley & Co.,Walter Forbes. [A novel.] By A. A,A. A.,"FORBES, Walter.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000206,British Library HMNTS 12641.b.30. 3 | 000000216,,London; Virtue & Yorston,1868,Virtue & Co.,"All for Greed. [A novel. The dedication signed: A. A. A., i.e. Marie Pauline Rose, Baroness Blaze de Bury.]","A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000216,British Library HMNTS 12626.cc.2. 4 | 000000218,,London,1869,"Bradbury, Evans & Co.","Love the Avenger. By the author of “All for Greed.” [The dedication signed: A. A. A., i.e. Marie Pauline Rose, Baroness Blaze de Bury.]","A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000218,British Library HMNTS 12625.dd.1. 5 | 000000472,,London,1851,James Darling,"Welsh Sketches, chiefly ecclesiastical, to the close of the twelfth century. By the author of “Proposals for Christian Union” (E. S. A. [i.e. Ernest Appleyard])","A., E. S.","Appleyard, Ernest Silvanus.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000472,British Library HMNTS 10369.bbb.15. 6 | 000000480,"A new edition, revised, etc.",London,1857,Wertheim & Macintosh,"[The World in which I live, and my place in it. By E. S. A. [i.e. Letitia Willgoss Stone.] Edited by ... J. H. Broome.]","A., E. S.","BROOME, John Henry.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000480,British Library HMNTS 9007.d.28. 7 | 000000481,"Fourth edition, revised, etc.",London,1875,William Macintosh,"[The World in which I live, and my place in it. By E. S. A. [i.e. Letitia Willgoss Stone.] Edited by ... J. H. Broome.]","A., E. S.","BROOME, John Henry.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000481,British Library HMNTS 9006.ee.10. 8 | 000000519,,London,1872,The Author,Lagonells. By the author of Darmayne (F. E. A. [i.e. Florence Emily Ashley]),"A., F. E.","ASHLEY, Florence Emily.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000519,British Library HMNTS 12637.e.3. 9 | 000000667,,"Puerto Rico",,,"The Coming of Spring, and other poems. By J. A. [i.e. J. Andrews.]","A., J.|A., J.","ANDREWS, J. - Writer of Verse",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000667,British Library HMNTS 011652.g.73. 10 | 000000874,,London],1676,,"A Warning to the inhabitants of England, and London in particular ... By M. A. [i.e. Mary Adams.]",Remaʿ.,"ADAMS, Mary.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000874,British Library HMNTS 11645.bb.42. 11 | 000001143,,London,1676,,A Satyr against Vertue. (A poem: supposed to be spoken by a Town-Hector. [By John Oldham. The preface signed: T. A.]),"A., T.","OLDHAM, John.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001143,British Library HMNTS 11602.ee.10.(2.) 12 | 000001280,,Coventry,1802,Printed by J. Turner,"An Account of the many and great Loans, Benefactions and Charities, belonging to the City of Coventry ... A new edition. [The dedication signed: AB, CD, EF, GH, &c. By Edward Jackson and Samuel Carte.]",,"CARTE, Samuel.|JACKSON, Edward - Rector of Southam, and CARTE (Samuel)",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001280,British Library HMNTS 1430.g.17. 13 | 000001808,,Christiania,1859,,"Erindringer som Bidrag til Norges Historie fra 1800-1815. Anden Udgave ... Udgivet med nogle Rettelser og Tillæg af Christian C. A. Lange. Med Forfatterens Portraet, og hans Biographi af Amtmand J. C. Aall","AALL, Jacob.","AALL, J. C.|LANGE, Christian Christoph Andreas.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001808,British Library HMNTS 9425.cc.37. 14 | 000001905,,Firenze,1888,,Gli Studi storici in terra d'Otranto ... Frammenti estratti in gran parte dall' Archivio Storico Italiano ... a cura e spese di L(uigi) G(iuseppe) D(e) S(imone),"AAR, Ermanno - pseud. [i.e. Luigi Giuseppe Oronzo Mariano Raffaele Francesco Fortunato Felice de Simone.]","S., L. G. D.|SIMONE, Luigi Giuseppe Oronzo Mariano Raffaele Francesco Fortunato Felice de.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001905,British Library HMNTS 10136.g.22. 15 | 000001929,,Amsterdam,"1676, 38-54",,De Aardbol. Magazijn van hedendaagsche land- en volkenkunde ... Met platen en kaarten. [Deel 4-9 by P. H. W.],,"WITKAMP, Pieter Harme.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001929,British Library HMNTS 10002.g.16-19. 16 | 000002836,,Savona,1888,,Cronache Savonesi dal 1500 al 1570 ... Accresciute di documenti inediti pubblicate e annotate dal dott. G. Assereto,"ABATE, Giovanni Agostino.","ASSERETO, Giovanni.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002836,British Library HMNTS 10136.h.24. 17 | 000002854,,London,1888,E. Moxon & Co.,"See-Saw; a novel ... Edited [or rather, written] by W. W. Reade","ABATI, Francesco.","READE, William Winwood.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002854,British Library HMNTS 12623.bbb.12. 18 | 000002956,,Paris,1860-63,,"Géodésie d'une partie de la Haute Éthiopie, revue et rédigée par R. Radau. fasc. 1-3","ABBADIE, Antoine Thompson d'.","RADAU, Rodolphe.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002956,British Library HMNTS 10096.i.19. 19 | 000002957,,Paris,1873,,[With eleven maps.],"ABBADIE, Antoine Thompson d'.","RADAU, Rodolphe.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002957,British Library HMNTS 10095.i.13. 20 | 000003017,"Nueva edicion, anotada ... y continuada ... por J. J. de Acosta y Calbo.",Puerto-Rico,1866,,"[Historia geográfica, civil y politica de la Isla de S. Juan Bautista de Puerto Rico, Dala a luz A. Valladares de Sotomayor.]","ABBAD Y LASIERRA, Agustín Íñigo - Bishop of Barbastro","ACOSTA Y CALBO, José Julian de.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000003017,British Library HMNTS 10480.h.18. 21 | 000003131,,New York,1899,W. Abbatt,"The Crisis of the Revolution, being the story of Arnold and André now for the first time collected from all sources, and illustrated with views of all places identified with it ... Illustrations from original photographs by E. S. Bennett, etc","ABBATT, William.","ANDRÉ, John - Major|ARNOLD, Benedict.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000003131,British Library HMNTS 9603.g.14. -------------------------------------------------------------------------------- /Week6_Flask2/activity_1.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pandas as pd 4 | from flask import Flask 5 | from flask import request 6 | from flask_restx import Resource, Api 7 | from flask_restx import fields 8 | from flask_restx import inputs 9 | from flask_restx import reqparse 10 | 11 | app = Flask(__name__) 12 | api = Api(app) 13 | 14 | # The following is the schema of Book 15 | book_model = api.model('Book', { 16 | 'Flickr_URL': fields.String, 17 | 'Publisher': fields.String, 18 | 'Author': fields.String, 19 | 'Title': fields.String, 20 | 'Date_of_Publication': fields.Integer, 21 | 'Identifier': fields.Integer, 22 | 'Place_of_Publication': fields.String 23 | }) 24 | 25 | parser = reqparse.RequestParser() 26 | parser.add_argument('order', choices=list(column for column in book_model.keys())) 27 | parser.add_argument('ascending', type=inputs.boolean) 28 | 29 | 30 | @api.route('/books') 31 | class BooksList(Resource): 32 | 33 | @api.expect(parser) 34 | def get(self): 35 | # get books as JSON string 36 | args = parser.parse_args() 37 | 38 | # retrieve the query parameters 39 | order_by = args.get('order') 40 | ascending = args.get('ascending', True) 41 | 42 | if order_by: 43 | df.sort_values(by=order_by, inplace=True, ascending=ascending) 44 | 45 | json_str = df.to_json(orient='index') 46 | 47 | # convert the string JSON to a real JSON 48 | ds = json.loads(json_str) 49 | ret = [] 50 | 51 | for idx in ds: 52 | book = ds[idx] 53 | book['Identifier'] = int(idx) 54 | ret.append(book) 55 | 56 | return ret 57 | 58 | 59 | @api.route('/books/') 60 | class Books(Resource): 61 | 62 | def get(self, id): 63 | if id not in df.index: 64 | api.abort(404, "Book {} doesn't exist".format(id)) 65 | 66 | book = dict(df.loc[id]) 67 | return book 68 | 69 | def delete(self, id): 70 | if id not in df.index: 71 | api.abort(404, "Book {} doesn't exist".format(id)) 72 | 73 | df.drop(id, inplace=True) 74 | return {"message": "Book {} is removed.".format(id)}, 200 75 | 76 | @api.expect(book_model) 77 | def put(self, id): 78 | 79 | if id not in df.index: 80 | api.abort(404, "Book {} doesn't exist".format(id)) 81 | 82 | # get the payload and convert it to a JSON 83 | book = request.json 84 | 85 | # Book ID cannot be changed 86 | if 'Identifier' in book and id != book['Identifier']: 87 | return {"message": "Identifier cannot be changed".format(id)}, 400 88 | 89 | # Update the values 90 | for key in book: 91 | if key not in book_model.keys(): 92 | # unexpected column 93 | return {"message": "Property {} is invalid".format(key)}, 400 94 | df.loc[id, key] = book[key] 95 | 96 | df.append(book, ignore_index=True) 97 | return {"message": "Book {} has been successfully updated".format(id)}, 200 98 | 99 | 100 | if __name__ == '__main__': 101 | columns_to_drop = ['Edition Statement', 102 | 'Corporate Author', 103 | 'Corporate Contributors', 104 | 'Former owner', 105 | 'Engraver', 106 | 'Contributors', 107 | 'Issuance type', 108 | 'Shelfmarks' 109 | ] 110 | csv_file = "Books.csv" 111 | df = pd.read_csv(csv_file) 112 | 113 | # drop unnecessary columns 114 | df.drop(columns_to_drop, inplace=True, axis=1) 115 | 116 | # clean the date of publication & convert it to numeric data 117 | new_date = df['Date of Publication'].str.extract(r'^(\d{4})', expand=False) 118 | new_date = pd.to_numeric(new_date) 119 | new_date = new_date.fillna(0) 120 | df['Date of Publication'] = new_date 121 | 122 | # replace spaces in the name of columns 123 | df.columns = [c.replace(' ', '_') for c in df.columns] 124 | 125 | # set the index column; this will help us to find books with their ids 126 | df.set_index('Identifier', inplace=True) 127 | 128 | # run the application 129 | app.run(debug=True) 130 | -------------------------------------------------------------------------------- /Week6_Flask2/activity_2.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pandas as pd 4 | from flask import Flask 5 | from flask import request 6 | from flask_restx import Resource, Api 7 | from flask_restx import fields 8 | from flask_restx import inputs 9 | from flask_restx import reqparse 10 | 11 | app = Flask(__name__) 12 | api = Api(app) 13 | 14 | # The following is the schema of Book 15 | book_model = api.model('Book', { 16 | 'Flickr_URL': fields.String, 17 | 'Publisher': fields.String, 18 | 'Author': fields.String, 19 | 'Title': fields.String, 20 | 'Date_of_Publication': fields.Integer, 21 | 'Identifier': fields.Integer, 22 | 'Place_of_Publication': fields.String 23 | }) 24 | 25 | parser = reqparse.RequestParser() 26 | parser.add_argument('order', choices=list(column for column in book_model.keys())) 27 | parser.add_argument('ascending', type=inputs.boolean) 28 | 29 | 30 | @api.route('/books') 31 | class BooksList(Resource): 32 | 33 | @api.expect(parser) 34 | def get(self): 35 | # get books as JSON string 36 | args = parser.parse_args() 37 | 38 | # retrieve the query parameters 39 | order_by = args.get('order') 40 | ascending = args.get('ascending', True) 41 | 42 | if order_by: 43 | df.sort_values(by=order_by, inplace=True, ascending=ascending) 44 | 45 | json_str = df.to_json(orient='index') 46 | 47 | # convert the string JSON to a real JSON 48 | ds = json.loads(json_str) 49 | ret = [] 50 | 51 | for idx in ds: 52 | book = ds[idx] 53 | book['Identifier'] = int(idx) 54 | ret.append(book) 55 | 56 | return ret 57 | 58 | @api.expect(book_model, validate=True) 59 | def post(self): 60 | book = request.json 61 | 62 | if 'Identifier' not in book: 63 | return {"message": "Missing Identifier"}, 400 64 | 65 | id = book['Identifier'] 66 | 67 | # check if the given identifier does not exist 68 | if id in df.index: 69 | return {"message": "A book with Identifier={} is already in the dataset".format(id)}, 400 70 | 71 | # Put the values into the dataframe 72 | for key in book: 73 | if key not in book_model.keys(): 74 | # unexpected column 75 | return {"message": "Property {} is invalid".format(key)}, 400 76 | df.loc[id, key] = book[key] 77 | 78 | # df.append(book, ignore_index=True) 79 | return {"message": "Book {} is created".format(id)}, 201 80 | 81 | 82 | @api.route('/books/') 83 | class Books(Resource): 84 | def get(self, id): 85 | if id not in df.index: 86 | api.abort(404, "Book {} doesn't exist".format(id)) 87 | 88 | book = dict(df.loc[id]) 89 | return book 90 | 91 | def delete(self, id): 92 | if id not in df.index: 93 | api.abort(404, "Book {} doesn't exist".format(id)) 94 | 95 | df.drop(id, inplace=True) 96 | return {"message": "Book {} is removed.".format(id)}, 200 97 | 98 | @api.expect(book_model) 99 | def put(self, id): 100 | 101 | if id not in df.index: 102 | api.abort(404, "Book {} doesn't exist".format(id)) 103 | 104 | # get the payload and convert it to a JSON 105 | book = request.json 106 | 107 | # Book ID cannot be changed 108 | if 'Identifier' in book and id != book['Identifier']: 109 | return {"message": "Identifier cannot be changed".format(id)}, 400 110 | 111 | # Update the values 112 | for key in book: 113 | if key not in book_model.keys(): 114 | # unexpected column 115 | return {"message": "Property {} is invalid".format(key)}, 400 116 | df.loc[id, key] = book[key] 117 | 118 | df.append(book, ignore_index=True) 119 | return {"message": "Book {} has been successfully updated".format(id)}, 200 120 | 121 | 122 | if __name__ == '__main__': 123 | columns_to_drop = ['Edition Statement', 124 | 'Corporate Author', 125 | 'Corporate Contributors', 126 | 'Former owner', 127 | 'Engraver', 128 | 'Contributors', 129 | 'Issuance type', 130 | 'Shelfmarks' 131 | ] 132 | csv_file = "Books.csv" 133 | df = pd.read_csv(csv_file) 134 | 135 | # drop unnecessary columns 136 | df.drop(columns_to_drop, inplace=True, axis=1) 137 | 138 | # clean the date of publication & convert it to numeric data 139 | new_date = df['Date of Publication'].str.extract(r'^(\d{4})', expand=False) 140 | new_date = pd.to_numeric(new_date) 141 | new_date = new_date.fillna(0) 142 | df['Date of Publication'] = new_date 143 | 144 | # replace spaces in the name of columns 145 | df.columns = [c.replace(' ', '_') for c in df.columns] 146 | 147 | # set the index column; this will help us to find books with their ids 148 | df.set_index('Identifier', inplace=True) 149 | 150 | # run the application 151 | app.run(debug=True) 152 | -------------------------------------------------------------------------------- /Week6_Flask2/activity_3.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pandas as pd 4 | from flask import Flask 5 | from flask import request 6 | from flask_restx import Resource, Api 7 | from flask_restx import fields 8 | from flask_restx import inputs 9 | from flask_restx import reqparse 10 | 11 | app = Flask(__name__) 12 | api = Api(app, 13 | default="Books", # Default namespace 14 | title="Book Dataset", # Documentation Title 15 | description="This is just a simple example to show how publish data as a service.") # Documentation Description 16 | 17 | # The following is the schema of Book 18 | book_model = api.model('Book', { 19 | 'Flickr_URL': fields.String(example="jeee"), 20 | 'Publisher': fields.String, 21 | 'Author': fields.String, 22 | 'Title': fields.String, 23 | 'Date_of_Publication': fields.Integer, 24 | 'Identifier': fields.Integer, 25 | 'Place_of_Publication': fields.String 26 | }) 27 | 28 | parser = reqparse.RequestParser() 29 | parser.add_argument('order', choices=list(column for column in book_model.keys())) 30 | parser.add_argument('ascending', type=inputs.boolean) 31 | 32 | 33 | @api.route('/books') 34 | class BooksList(Resource): 35 | 36 | @api.response(200, 'Successful') 37 | @api.doc(description="Get all books") 38 | @api.expect(parser) 39 | def get(self): 40 | # get books as JSON string 41 | args = parser.parse_args() 42 | 43 | # retrieve the query parameters 44 | order_by = args.get('order') 45 | ascending = args.get('ascending', True) 46 | 47 | if order_by: 48 | df.sort_values(by=order_by, inplace=True, ascending=ascending) 49 | 50 | json_str = df.to_json(orient='index') 51 | 52 | # convert the string JSON to a real JSON 53 | ds = json.loads(json_str) 54 | ret = [] 55 | 56 | for idx in ds: 57 | book = ds[idx] 58 | book['Identifier'] = int(idx) 59 | ret.append(book) 60 | 61 | return ret 62 | 63 | @api.response(201, 'Book Created Successfully') 64 | @api.response(400, 'Validation Error') 65 | @api.doc(description="Add a new book") 66 | @api.expect(book_model, validate=True) 67 | def post(self): 68 | book = request.json 69 | 70 | if 'Identifier' not in book: 71 | return {"message": "Missing Identifier"}, 400 72 | 73 | id = book['Identifier'] 74 | 75 | # check if the given identifier does not exist 76 | if id in df.index: 77 | return {"message": "A book with Identifier={} is already in the dataset".format(id)}, 400 78 | 79 | # Put the values into the dataframe 80 | for key in book: 81 | if key not in book_model.keys(): 82 | # unexpected column 83 | return {"message": "Property {} is invalid".format(key)}, 400 84 | df.loc[id, key] = book[key] 85 | 86 | # df.append(book, ignore_index=True) 87 | return {"message": "Book {} is created".format(id)}, 201 88 | 89 | 90 | @api.route('/books/') 91 | @api.param('id', 'The Book identifier') 92 | class Books(Resource): 93 | @api.response(404, 'Book was not found') 94 | @api.response(200, 'Successful') 95 | @api.doc(description="Get a book by its ID") 96 | def get(self, id): 97 | if id not in df.index: 98 | api.abort(404, "Book {} doesn't exist".format(id)) 99 | 100 | book = dict(df.loc[id]) 101 | return book 102 | 103 | @api.response(404, 'Book was not found') 104 | @api.response(200, 'Successful') 105 | @api.doc(description="Delete a book by its ID") 106 | def delete(self, id): 107 | if id not in df.index: 108 | api.abort(404, "Book {} doesn't exist".format(id)) 109 | 110 | df.drop(id, inplace=True) 111 | return {"message": "Book {} is removed.".format(id)}, 200 112 | 113 | @api.response(404, 'Book was not found') 114 | @api.response(400, 'Validation Error') 115 | @api.response(200, 'Successful') 116 | @api.expect(book_model, validate=True) 117 | @api.doc(description="Update a book by its ID") 118 | def put(self, id): 119 | 120 | if id not in df.index: 121 | api.abort(404, "Book {} doesn't exist".format(id)) 122 | 123 | # get the payload and convert it to a JSON 124 | book = request.json 125 | 126 | # Book ID cannot be changed 127 | if 'Identifier' in book and id != book['Identifier']: 128 | return {"message": "Identifier cannot be changed".format(id)}, 400 129 | 130 | # Update the values 131 | for key in book: 132 | if key not in book_model.keys(): 133 | # unexpected column 134 | return {"message": "Property {} is invalid".format(key)}, 400 135 | df.loc[id, key] = book[key] 136 | 137 | df._append(book, ignore_index=True) 138 | return {"message": "Book {} has been successfully updated".format(id)}, 200 139 | 140 | 141 | if __name__ == '__main__': 142 | columns_to_drop = ['Edition Statement', 143 | 'Corporate Author', 144 | 'Corporate Contributors', 145 | 'Former owner', 146 | 'Engraver', 147 | 'Contributors', 148 | 'Issuance type', 149 | 'Shelfmarks' 150 | ] 151 | csv_file = "Books.csv" 152 | df = pd.read_csv(csv_file) 153 | 154 | # drop unnecessary columns 155 | df.drop(columns_to_drop, inplace=True, axis=1) 156 | 157 | # clean the date of publication & convert it to numeric data 158 | new_date = df['Date of Publication'].str.extract(r'^(\d{4})', expand=False) 159 | new_date = pd.to_numeric(new_date) 160 | new_date = new_date.fillna(0) 161 | df['Date of Publication'] = new_date 162 | 163 | # replace spaces in the name of columns 164 | df.columns = [c.replace(' ', '_') for c in df.columns] 165 | 166 | # set the index column; this will help us to find books with their ids 167 | df.set_index('Identifier', inplace=True) 168 | 169 | # run the application 170 | app.run(debug=True, port="5000") 171 | -------------------------------------------------------------------------------- /Week7_Client/activity_1.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | 4 | def print_book(book): 5 | print("Book {") 6 | for key in book.keys(): 7 | attr = str(key) 8 | # if isinstance(book[key], str): 9 | # val = str(book[key].encode('utf-8')) 10 | # else: 11 | val = str(book[key]) 12 | 13 | print("\t" + attr + ":" + val) 14 | print("}") 15 | 16 | 17 | if __name__ == '__main__': 18 | 19 | r = requests.get("http://127.0.0.1:5000/books", params={'order': 'Date_of_Publication', 'ascending':True}) 20 | print("Status Code:" + str(r.status_code)) 21 | books = r.json() 22 | for i in range(1, 5): 23 | print_book(books[i]) 24 | -------------------------------------------------------------------------------- /Week7_Client/activity_2.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | if __name__ == '__main__': 4 | book = { 5 | "Date_of_Publication": 2018, 6 | "Publisher": "UNSW", 7 | "Author": "Nobody", 8 | "Title": "Nothing", 9 | "Flickr_URL": "http://somewhere", 10 | "Identifier": 2, 11 | "Place_of_Publication": "Sydney" 12 | } 13 | 14 | r = requests.post("http://127.0.0.1:5000/books", json=book) 15 | 16 | print("Status Code:" + str(r.status_code)) 17 | resp = r.json() 18 | 19 | print(resp['message']) 20 | -------------------------------------------------------------------------------- /Week7_Client/activity_3.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | 4 | def print_book(book): 5 | print("Book {") 6 | for key in book.keys(): 7 | attr = str(key) 8 | val = str(book[key]) 9 | print("\t" + attr + ":" + val) 10 | 11 | print("}") 12 | 13 | 14 | def get_book(id): 15 | r = requests.get("http://127.0.0.1:5000/books/" + str(id)) 16 | book = r.json() 17 | print("Get status Code:" + str(r.status_code)) 18 | if r.ok: 19 | print_book(book) 20 | return book 21 | else: 22 | print('Error:' + book['message']) 23 | 24 | 25 | if __name__ == '__main__': 26 | 27 | print("***** Book information before update *****") 28 | book = get_book('206') 29 | 30 | # update the book information 31 | print("***** Updating Book Information *****") 32 | book['Author'] = 'Nobody' 33 | book['Identifier'] = 206 34 | book['Date_of_Publication']= 1879 35 | print(book) 36 | r = requests.put("http://127.0.0.1:5000/books/206", json=book) 37 | print("Put status Code:" + str(r.status_code)) 38 | print(r.json()['message']) 39 | 40 | print("***** Book information after update *****") 41 | book = get_book('206') 42 | 43 | -------------------------------------------------------------------------------- /Week7_Client/activity_4.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | 4 | def print_book(book): 5 | print("Book {") 6 | for key in book.keys(): 7 | attr = str(key) 8 | val = str(book[key]) 9 | print("\t" + attr + ":" + val) 10 | print("}") 11 | 12 | 13 | def get_book(id): 14 | r = requests.get("http://127.0.0.1:5000/books/" + str(id)) 15 | book = r.json() 16 | print("Get status Code:" + str(r.status_code)) 17 | if r.ok: 18 | print_book(book) 19 | return book 20 | else: 21 | print('Error:' + book['message']) 22 | 23 | 24 | def remove_book(id): 25 | r = requests.delete("http://127.0.0.1:5000/books/"+id) 26 | print("Delete status Code:" + str(r.status_code)) 27 | print(r.json()['message']) 28 | 29 | if __name__ == '__main__': 30 | 31 | print("***** Book information before update *****") 32 | book = get_book('206') 33 | 34 | # update the book information 35 | print("***** Deleting Book *****") 36 | remove_book('206') 37 | 38 | print("***** Book information after Delete *****") 39 | book = get_book('206') 40 | 41 | -------------------------------------------------------------------------------- /Week7_GraphQL/activity_1.py: -------------------------------------------------------------------------------- 1 | from ariadne.constants import PLAYGROUND_HTML 2 | from flask import Flask, request, jsonify 3 | from ariadne import gql, QueryType, MutationType, make_executable_schema, graphql_sync 4 | 5 | # Define types using Schema Definition Language (https://graphql.org/learn/schema/) 6 | type_defs = gql( 7 | """ 8 | type Books { 9 | title: String! 10 | description: String! 11 | author: String! 12 | } 13 | 14 | type Query { 15 | books: [Books] 16 | } 17 | """ 18 | ) 19 | 20 | query = QueryType() 21 | 22 | # Define resolvers 23 | @query.field("books") 24 | def books(*_): 25 | return [book.to_json() for book in books_db.values()] 26 | 27 | 28 | # Create executable schema 29 | schema = make_executable_schema(type_defs, [query]) 30 | 31 | # initialize flask app 32 | app = Flask(__name__) 33 | books_db = dict() 34 | 35 | 36 | class Books: 37 | def __init__(self, id=None, title=None, description=None, author=None): 38 | self.id = id 39 | self.title = title 40 | self.description = description 41 | self.author = author 42 | 43 | def to_json(self): 44 | return { 45 | "title": self.title, 46 | "description": self.description, 47 | "author": self.author, 48 | } 49 | 50 | 51 | # Create a GraphQL Playground UI for the GraphQL schema 52 | @app.route("/graphql", methods=["GET"]) 53 | def graphql_playground(): 54 | return PLAYGROUND_HTML 55 | 56 | 57 | # Create a GraphQL endpoint for executing GraphQL queries 58 | @app.route("/graphql", methods=["POST"]) 59 | def graphql_server(): 60 | data = request.get_json() 61 | success, result = graphql_sync(schema, data, context_value={"request": request}) 62 | status_code = 200 if success else 400 63 | return jsonify(result), status_code 64 | 65 | 66 | # Run the app 67 | if __name__ == "__main__": 68 | books_db[1] = Books(len(books_db) + 1,"Data Services", "A Fake Book", "No Body") 69 | books_db[2] = Books(len(books_db) + 1,"Advance Data Services", "A Fake Book", "No Body") 70 | app.run(debug=True) 71 | -------------------------------------------------------------------------------- /Week7_GraphQL/activity_2.py: -------------------------------------------------------------------------------- 1 | from ariadne.constants import PLAYGROUND_HTML 2 | from flask import Flask, request, jsonify 3 | from ariadne import gql, QueryType, MutationType, make_executable_schema, graphql_sync 4 | 5 | # Define type definitions (schema) using SDL 6 | type_defs = gql( 7 | """ 8 | type Books { 9 | title: String! 10 | description: String! 11 | author: String! 12 | } 13 | 14 | type Query { 15 | books: [Books] 16 | } 17 | 18 | type Mutation{add_book(title: String!, description: String!, author: String!): Books} 19 | """ 20 | ) 21 | 22 | query = QueryType() 23 | mutation = MutationType() 24 | 25 | 26 | # Define resolvers 27 | @query.field("books") 28 | def books(*_): 29 | return [book.to_json() for book in books_db.values()] 30 | 31 | 32 | @mutation.field("add_book") 33 | def add_book(_, info, title, description, author): 34 | book = Books(len(books_db) + 1, title, description, author) 35 | book.save() 36 | return book.to_json() 37 | 38 | 39 | # Create executable schema 40 | schema = make_executable_schema(type_defs, [query, mutation]) 41 | 42 | # initialize flask app 43 | app = Flask(__name__) 44 | books_db = dict() 45 | 46 | 47 | class Books: 48 | def __init__(self, id=None, title=None, description=None, author=None): 49 | self.id = id 50 | self.title = title 51 | self.description = description 52 | self.author = author 53 | 54 | def to_json(self): 55 | return { 56 | "title": self.title, 57 | "description": self.description, 58 | "author": self.author, 59 | } 60 | 61 | def save(self): 62 | if self.id is None: 63 | self.id = len(books_db) + 1 64 | books_db[self.id] = self 65 | 66 | 67 | # Create a GraphQL Playground UI for the GraphQL schema 68 | @app.route("/graphql", methods=["GET"]) 69 | def graphql_playground(): 70 | return PLAYGROUND_HTML 71 | 72 | 73 | # Create a GraphQL endpoint for executing GraphQL queries 74 | @app.route("/graphql", methods=["POST"]) 75 | def graphql_server(): 76 | data = request.get_json() 77 | success, result = graphql_sync(schema, data, context_value={"request": request}) 78 | status_code = 200 if success else 400 79 | return jsonify(result), status_code 80 | 81 | 82 | # Run the app 83 | if __name__ == "__main__": 84 | app.run(debug=True) 85 | -------------------------------------------------------------------------------- /Week7_GraphQL/activity_3.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | 4 | import flask_migrate.cli 5 | from ariadne.constants import PLAYGROUND_HTML 6 | from flask_migrate import Migrate 7 | from flask_sqlalchemy import SQLAlchemy 8 | from flask import Flask, request, jsonify 9 | from ariadne import gql, QueryType, MutationType, make_executable_schema, graphql_sync 10 | 11 | # Define type definitions (schema) using SDL 12 | type_defs = gql( 13 | """ 14 | type Books { 15 | title: String! 16 | description: String! 17 | author: String! 18 | } 19 | 20 | type Query { 21 | books: [Books] 22 | } 23 | 24 | type Mutation{add_book(title: String!, description: String!, author: String!): Books} 25 | """ 26 | ) 27 | 28 | query = QueryType() 29 | mutation = MutationType() 30 | 31 | 32 | # Define resolvers 33 | @query.field("books") 34 | def books(*_): 35 | return [book.to_json() for book in Books.query.all()] 36 | 37 | @mutation.field("add_book") 38 | def add_book(_, info, title, description, author): 39 | book = Books(len(books_db) + 1, title, description, author) 40 | book.save() 41 | return book.to_json() 42 | 43 | 44 | # Create executable schema 45 | schema = make_executable_schema(type_defs, [query, mutation]) 46 | 47 | # initialize flask app 48 | app = Flask(__name__) 49 | app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///db.sqlite3" 50 | app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False 51 | db = SQLAlchemy(app) 52 | migrate = Migrate(app, db) 53 | books_db = dict() 54 | 55 | 56 | class Books(db.Model): 57 | id = db.Column(db.Integer, primary_key=True) 58 | title = db.Column(db.String(100), nullable=False) 59 | author = db.Column(db.String(100), nullable=False) 60 | description = db.Column(db.String(255), nullable=False) 61 | 62 | def __init__(self, id=None, title=None, description=None, author=None): 63 | self.id = id 64 | self.title = title 65 | self.description = description 66 | self.author = author 67 | 68 | def to_json(self): 69 | return { 70 | "title": self.title, 71 | "description": self.description, 72 | "author": self.author, 73 | } 74 | 75 | def save(self): 76 | db.session.add(self) 77 | db.session.commit() 78 | 79 | 80 | # Create a GraphQL Playground UI for the GraphQL schema 81 | @app.route("/graphql", methods=["GET"]) 82 | def graphql_playground(): 83 | return PLAYGROUND_HTML 84 | 85 | 86 | # Create a GraphQL endpoint for executing GraphQL queries 87 | @app.route("/graphql", methods=["POST"]) 88 | def graphql_server(): 89 | data = request.get_json() 90 | success, result = graphql_sync(schema, data, context_value={"request": request}) 91 | status_code = 200 if success else 400 92 | return jsonify(result), status_code 93 | 94 | 95 | # Run the app 96 | if __name__ == "__main__": 97 | 98 | """ 99 | Run the following commands in a terminal before running the application to setup the database 100 | """ 101 | # cd to the directory 102 | # export FLASK_APP = activity_3.py 103 | # flask db init 104 | # flask db migrate 105 | # flask db upgrade 106 | 107 | app.run(debug=True) 108 | -------------------------------------------------------------------------------- /Week8_Authentication/Books.csv: -------------------------------------------------------------------------------- 1 | Identifier,Edition Statement,Place of Publication,Date of Publication,Publisher,Title,Author,Contributors,Corporate Author,Corporate Contributors,Former owner,Engraver,Issuance type,Flickr URL,Shelfmarks 2 | 000000206,,London,1879 [1878],S. Tinsley & Co.,Walter Forbes. [A novel.] By A. A,A. A.,"FORBES, Walter.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000206,British Library HMNTS 12641.b.30. 3 | 000000216,,London; Virtue & Yorston,1868,Virtue & Co.,"All for Greed. [A novel. The dedication signed: A. A. A., i.e. Marie Pauline Rose, Baroness Blaze de Bury.]","A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000216,British Library HMNTS 12626.cc.2. 4 | 000000218,,London,1869,"Bradbury, Evans & Co.","Love the Avenger. By the author of “All for Greed.” [The dedication signed: A. A. A., i.e. Marie Pauline Rose, Baroness Blaze de Bury.]","A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000218,British Library HMNTS 12625.dd.1. 5 | 000000472,,London,1851,James Darling,"Welsh Sketches, chiefly ecclesiastical, to the close of the twelfth century. By the author of “Proposals for Christian Union” (E. S. A. [i.e. Ernest Appleyard])","A., E. S.","Appleyard, Ernest Silvanus.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000472,British Library HMNTS 10369.bbb.15. 6 | 000000480,"A new edition, revised, etc.",London,1857,Wertheim & Macintosh,"[The World in which I live, and my place in it. By E. S. A. [i.e. Letitia Willgoss Stone.] Edited by ... J. H. Broome.]","A., E. S.","BROOME, John Henry.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000480,British Library HMNTS 9007.d.28. 7 | 000000481,"Fourth edition, revised, etc.",London,1875,William Macintosh,"[The World in which I live, and my place in it. By E. S. A. [i.e. Letitia Willgoss Stone.] Edited by ... J. H. Broome.]","A., E. S.","BROOME, John Henry.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000481,British Library HMNTS 9006.ee.10. 8 | 000000519,,London,1872,The Author,Lagonells. By the author of Darmayne (F. E. A. [i.e. Florence Emily Ashley]),"A., F. E.","ASHLEY, Florence Emily.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000519,British Library HMNTS 12637.e.3. 9 | 000000667,,"Puerto Rico",,,"The Coming of Spring, and other poems. By J. A. [i.e. J. Andrews.]","A., J.|A., J.","ANDREWS, J. - Writer of Verse",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000667,British Library HMNTS 011652.g.73. 10 | 000000874,,London],1676,,"A Warning to the inhabitants of England, and London in particular ... By M. A. [i.e. Mary Adams.]",Remaʿ.,"ADAMS, Mary.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000000874,British Library HMNTS 11645.bb.42. 11 | 000001143,,London,1676,,A Satyr against Vertue. (A poem: supposed to be spoken by a Town-Hector. [By John Oldham. The preface signed: T. A.]),"A., T.","OLDHAM, John.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001143,British Library HMNTS 11602.ee.10.(2.) 12 | 000001280,,Coventry,1802,Printed by J. Turner,"An Account of the many and great Loans, Benefactions and Charities, belonging to the City of Coventry ... A new edition. [The dedication signed: AB, CD, EF, GH, &c. By Edward Jackson and Samuel Carte.]",,"CARTE, Samuel.|JACKSON, Edward - Rector of Southam, and CARTE (Samuel)",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001280,British Library HMNTS 1430.g.17. 13 | 000001808,,Christiania,1859,,"Erindringer som Bidrag til Norges Historie fra 1800-1815. Anden Udgave ... Udgivet med nogle Rettelser og Tillæg af Christian C. A. Lange. Med Forfatterens Portraet, og hans Biographi af Amtmand J. C. Aall","AALL, Jacob.","AALL, J. C.|LANGE, Christian Christoph Andreas.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001808,British Library HMNTS 9425.cc.37. 14 | 000001905,,Firenze,1888,,Gli Studi storici in terra d'Otranto ... Frammenti estratti in gran parte dall' Archivio Storico Italiano ... a cura e spese di L(uigi) G(iuseppe) D(e) S(imone),"AAR, Ermanno - pseud. [i.e. Luigi Giuseppe Oronzo Mariano Raffaele Francesco Fortunato Felice de Simone.]","S., L. G. D.|SIMONE, Luigi Giuseppe Oronzo Mariano Raffaele Francesco Fortunato Felice de.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001905,British Library HMNTS 10136.g.22. 15 | 000001929,,Amsterdam,"1676, 38-54",,De Aardbol. Magazijn van hedendaagsche land- en volkenkunde ... Met platen en kaarten. [Deel 4-9 by P. H. W.],,"WITKAMP, Pieter Harme.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000001929,British Library HMNTS 10002.g.16-19. 16 | 000002836,,Savona,1888,,Cronache Savonesi dal 1500 al 1570 ... Accresciute di documenti inediti pubblicate e annotate dal dott. G. Assereto,"ABATE, Giovanni Agostino.","ASSERETO, Giovanni.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002836,British Library HMNTS 10136.h.24. 17 | 000002854,,London,1888,E. Moxon & Co.,"See-Saw; a novel ... Edited [or rather, written] by W. W. Reade","ABATI, Francesco.","READE, William Winwood.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002854,British Library HMNTS 12623.bbb.12. 18 | 000002956,,Paris,1860-63,,"Géodésie d'une partie de la Haute Éthiopie, revue et rédigée par R. Radau. fasc. 1-3","ABBADIE, Antoine Thompson d'.","RADAU, Rodolphe.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002956,British Library HMNTS 10096.i.19. 19 | 000002957,,Paris,1873,,[With eleven maps.],"ABBADIE, Antoine Thompson d'.","RADAU, Rodolphe.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000002957,British Library HMNTS 10095.i.13. 20 | 000003017,"Nueva edicion, anotada ... y continuada ... por J. J. de Acosta y Calbo.",Puerto-Rico,1866,,"[Historia geográfica, civil y politica de la Isla de S. Juan Bautista de Puerto Rico, Dala a luz A. Valladares de Sotomayor.]","ABBAD Y LASIERRA, Agustín Íñigo - Bishop of Barbastro","ACOSTA Y CALBO, José Julian de.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000003017,British Library HMNTS 10480.h.18. 21 | 000003131,,New York,1899,W. Abbatt,"The Crisis of the Revolution, being the story of Arnold and André now for the first time collected from all sources, and illustrated with views of all places identified with it ... Illustrations from original photographs by E. S. Bennett, etc","ABBATT, William.","ANDRÉ, John - Major|ARNOLD, Benedict.",,,,,monographic,http://www.flickr.com/photos/britishlibrary/tags/sysnum000003131,British Library HMNTS 9603.g.14. -------------------------------------------------------------------------------- /Week8_Authentication/activity_1.py: -------------------------------------------------------------------------------- 1 | import json 2 | from functools import wraps 3 | 4 | import pandas as pd 5 | from flask import Flask 6 | from flask import request 7 | from flask_restx import Resource, Api 8 | from flask_restx import abort 9 | from flask_restx import fields 10 | from flask_restx import inputs 11 | from flask_restx import reqparse 12 | 13 | app = Flask(__name__) 14 | api = Api(app, 15 | default="Books", # Default namespace 16 | title="Book Dataset", # Documentation Title 17 | description="This is just a simple example to show how publish data as a service.") # Documentation Description 18 | 19 | 20 | def requires_auth(f): 21 | @wraps(f) 22 | def decorated(*args, **kwargs): 23 | auth = request.authorization 24 | if not auth: 25 | abort(401) 26 | 27 | if not (auth.username == 'admin' and 'admin' == auth.password): 28 | abort(401) 29 | 30 | return f(*args, **kwargs) 31 | 32 | return decorated 33 | 34 | 35 | # The following is the schema of Book 36 | book_model = api.model('Book', { 37 | 'Flickr_URL': fields.String, 38 | 'Publisher': fields.String, 39 | 'Author': fields.String, 40 | 'Title': fields.String, 41 | 'Date_of_Publication': fields.Integer, 42 | 'Identifier': fields.Integer, 43 | 'Place_of_Publication': fields.String 44 | }) 45 | 46 | parser = reqparse.RequestParser() 47 | parser.add_argument('order', choices=list(column for column in book_model.keys())) 48 | parser.add_argument('ascending', type=inputs.boolean) 49 | 50 | 51 | @api.route('/books') 52 | class BooksList(Resource): 53 | @api.response(200, 'Successful') 54 | @api.doc(description="Get all books") 55 | @requires_auth 56 | def get(self): 57 | # get books as JSON string 58 | args = parser.parse_args() 59 | 60 | # retrieve the query parameters 61 | order_by = args.get('order') 62 | ascending = args.get('ascending', True) 63 | 64 | if order_by: 65 | df.sort_values(by=order_by, inplace=True, ascending=ascending) 66 | 67 | json_str = df.to_json(orient='index') 68 | 69 | # convert the string JSON to a real JSON 70 | ds = json.loads(json_str) 71 | ret = [] 72 | 73 | for idx in ds: 74 | book = ds[idx] 75 | book['Identifier'] = int(idx) 76 | ret.append(book) 77 | 78 | return ret 79 | 80 | @api.response(201, 'Book Created Successfully') 81 | @api.response(400, 'Validation Error') 82 | @api.doc(description="Add a new book") 83 | @api.expect(book_model, validate=True) 84 | @requires_auth 85 | def post(self): 86 | book = request.json 87 | 88 | if 'Identifier' not in book: 89 | return {"message": "Missing Identifier"}, 400 90 | 91 | id = book['Identifier'] 92 | 93 | # check if the given identifier does not exist 94 | if id in df.index: 95 | return {"message": "A book with Identifier={} is already in the dataset".format(id)}, 400 96 | 97 | # Put the values into the dataframe 98 | for key in book: 99 | if key not in book_model.keys(): 100 | # unexpected column 101 | return {"message": "Property {} is invalid".format(key)}, 400 102 | df.loc[id, key] = book[key] 103 | 104 | # df.append(book, ignore_index=True) 105 | return {"message": "Book {} is created".format(id)}, 201 106 | 107 | 108 | @api.route('/books/') 109 | @api.param('id', 'The Book identifier') 110 | class Books(Resource): 111 | @api.response(404, 'Book was not found') 112 | @api.response(200, 'Successful') 113 | @api.doc(description="Get a book by its ID") 114 | @requires_auth 115 | def get(self, id): 116 | if id not in df.index: 117 | api.abort(404, "Book {} doesn't exist".format(id)) 118 | 119 | book = dict(df.loc[id]) 120 | return book 121 | 122 | @api.response(404, 'Book was not found') 123 | @api.response(200, 'Successful') 124 | @api.doc(description="Delete a book by its ID") 125 | @requires_auth 126 | def delete(self, id): 127 | if id not in df.index: 128 | api.abort(404, "Book {} doesn't exist".format(id)) 129 | 130 | df.drop(id, inplace=True) 131 | return {"message": "Book {} is removed.".format(id)}, 200 132 | 133 | @api.response(404, 'Book was not found') 134 | @api.response(400, 'Validation Error') 135 | @api.response(200, 'Successful') 136 | @api.expect(book_model, validate=True) 137 | @api.doc(description="Update a book by its ID") 138 | @requires_auth 139 | def put(self, id): 140 | 141 | if id not in df.index: 142 | api.abort(404, "Book {} doesn't exist".format(id)) 143 | 144 | # get the payload and convert it to a JSON 145 | book = request.json 146 | 147 | # Book ID cannot be changed 148 | if 'Identifier' in book and id != book['Identifier']: 149 | return {"message": "Identifier cannot be changed".format(id)}, 400 150 | 151 | # Update the values 152 | for key in book: 153 | if key not in book_model.keys(): 154 | # unexpected column 155 | return {"message": "Property {} is invalid".format(key)}, 400 156 | df.loc[id, key] = book[key] 157 | 158 | df.append(book, ignore_index=True) 159 | return {"message": "Book {} has been successfully updated".format(id)}, 200 160 | 161 | 162 | if __name__ == '__main__': 163 | columns_to_drop = ['Edition Statement', 164 | 'Corporate Author', 165 | 'Corporate Contributors', 166 | 'Former owner', 167 | 'Engraver', 168 | 'Contributors', 169 | 'Issuance type', 170 | 'Shelfmarks' 171 | ] 172 | csv_file = "Books.csv" 173 | df = pd.read_csv(csv_file) 174 | 175 | # drop unnecessary columns 176 | df.drop(columns_to_drop, inplace=True, axis=1) 177 | 178 | # clean the date of publication & convert it to numeric data 179 | new_date = df['Date of Publication'].str.extract(r'^(\d{4})', expand=False) 180 | new_date = pd.to_numeric(new_date) 181 | new_date = new_date.fillna(0) 182 | df['Date of Publication'] = new_date 183 | 184 | # replace spaces in the name of columns 185 | df.columns = [c.replace(' ', '_') for c in df.columns] 186 | 187 | # set the index column; this will help us to find books with their ids 188 | df.set_index('Identifier', inplace=True) 189 | 190 | # run the application 191 | app.run(debug=True) 192 | -------------------------------------------------------------------------------- /Week8_Authentication/activity_1_client.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from requests.auth import HTTPBasicAuth 3 | 4 | 5 | def print_book(book): 6 | print("Book {") 7 | for key in book.keys(): 8 | attr = str(key) 9 | val = str(book[key]) 10 | print("\t" + attr + ":" + val) 11 | print("}") 12 | 13 | 14 | def get_book(id, username, password): 15 | r = requests.get("http://127.0.0.1:5000/books/" + str(id), auth=HTTPBasicAuth(username, password)) 16 | book = r.json() 17 | print("Get status Code:" + str(r.status_code)) 18 | if r.ok: 19 | print_book(book) 20 | return book 21 | else: 22 | print('Error:' + book['message']) 23 | 24 | if __name__ == '__main__': 25 | 26 | print("***** Book information With Valid Credentials *****") 27 | book = get_book(206, 'admin', 'admin') 28 | 29 | print("***** Book information With Invalid Credentials *****") 30 | book = get_book(206, 'xxxxxxxxx', 'yyyyyyyy') 31 | 32 | -------------------------------------------------------------------------------- /Week8_Authentication/activity_2.py: -------------------------------------------------------------------------------- 1 | from time import sleep, time 2 | from itsdangerous import JSONWebSignatureSerializer, BadSignature, SignatureExpired 3 | 4 | 5 | class AuthenticationToken: 6 | def __init__(self, secret_key, expires_in): 7 | self.secret_key = secret_key 8 | self.expires_in = expires_in 9 | self.serializer = JSONWebSignatureSerializer(secret_key) 10 | 11 | def generate_token(self, username): 12 | 13 | info = { 14 | 'username': username, 15 | 'creation_time': time() 16 | } 17 | 18 | token = self.serializer.dumps(info) 19 | return token.decode() 20 | 21 | def validate_token(self, token): 22 | info = self.serializer.loads(token.encode()) 23 | 24 | if time() - info['creation_time'] > self.expires_in: 25 | raise SignatureExpired("The Token has been expired; get a new token") 26 | 27 | return info['username'] 28 | 29 | 30 | if __name__ == "__main__": 31 | 32 | SECRET_KEY = "A SECRET KEY; USUALLY A VERY LONG RANDOM STRING" 33 | expires_in = 10 34 | auth = AuthenticationToken(SECRET_KEY, expires_in) 35 | token = auth.generate_token('admin') 36 | print("Generated token is:", token) 37 | 38 | info = auth.validate_token(token) 39 | print("The token decoded as:", str(info)) 40 | 41 | sleep(expires_in + 1) 42 | 43 | try: 44 | expired_info = auth.validate_token(token) 45 | except SignatureExpired as e: 46 | print(e) 47 | except BadSignature as e: 48 | print("Invalid Token") 49 | 50 | try: 51 | expired_info = auth.validate_token("sssssssssssss") 52 | except SignatureExpired as e: 53 | print(e) 54 | except BadSignature as e: 55 | print("Invalid Token") 56 | -------------------------------------------------------------------------------- /Week8_Authentication/activity_3.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import json 3 | from functools import wraps 4 | 5 | import jwt 6 | import pandas as pd 7 | from flask import Flask 8 | from flask import request 9 | from flask_restx import Resource, Api, abort 10 | from flask_restx import fields 11 | from flask_restx import inputs 12 | from flask_restx import reqparse 13 | 14 | 15 | class AuthenticationToken: 16 | def __init__(self, secret_key, expires_in): 17 | self.secret_key = secret_key 18 | self.expires_in = expires_in 19 | 20 | def generate_token(self, username): 21 | info = { 22 | 'username': username, 23 | 'exp': datetime.datetime.utcnow() + datetime.timedelta(seconds=self.expires_in) 24 | } 25 | return jwt.encode(info, self.secret_key, algorithm='HS256') 26 | 27 | def validate_token(self, token): 28 | info = jwt.decode(token, self.secret_key, algorithms=['HS256']) 29 | return info['username'] 30 | 31 | 32 | SECRET_KEY = "A SECRET KEY; USUALLY A VERY LONG RANDOM STRING" 33 | expires_in = 600 34 | auth = AuthenticationToken(SECRET_KEY, expires_in) 35 | 36 | app = Flask(__name__) 37 | api = Api(app, authorizations={ 38 | 'API-KEY': { 39 | 'type': 'apiKey', 40 | 'in': 'header', 41 | 'name': 'AUTH-TOKEN' 42 | } 43 | }, 44 | security='API-KEY', 45 | default="Books", # Default namespace 46 | title="Book Dataset", # Documentation Title 47 | description="This is just a simple example to show how publish data as a service.") # Documentation Description 48 | 49 | 50 | def requires_auth(f): 51 | @wraps(f) 52 | def decorated(*args, **kwargs): 53 | 54 | token = request.headers.get('AUTH-TOKEN') 55 | if not token: 56 | abort(401, 'Authentication token is missing') 57 | 58 | try: 59 | user = auth.validate_token(token) 60 | except Exception as e: 61 | abort(401, e) 62 | 63 | return f(*args, **kwargs) 64 | 65 | return decorated 66 | 67 | 68 | # The following is the schema of Book 69 | book_model = api.model('Book', { 70 | 'Flickr_URL': fields.String, 71 | 'Publisher': fields.String, 72 | 'Author': fields.String, 73 | 'Title': fields.String, 74 | 'Date_of_Publication': fields.Integer, 75 | 'Identifier': fields.Integer, 76 | 'Place_of_Publication': fields.String 77 | }) 78 | 79 | parser = reqparse.RequestParser() 80 | parser.add_argument('order', choices=list(column for column in book_model.keys())) 81 | parser.add_argument('ascending', type=inputs.boolean) 82 | 83 | credential_model = api.model('credential', { 84 | 'username': fields.String, 85 | 'password': fields.String 86 | }) 87 | 88 | credential_parser = reqparse.RequestParser() 89 | credential_parser.add_argument('username', type=str) 90 | credential_parser.add_argument('password', type=str) 91 | 92 | 93 | @api.route('/token') 94 | class Token(Resource): 95 | @api.response(200, 'Successful') 96 | @api.doc(description="Generates a authentication token") 97 | @api.expect(credential_parser, validate=True) 98 | def get(self): 99 | args = credential_parser.parse_args() 100 | 101 | username = args.get('username') 102 | password = args.get('password') 103 | 104 | if username == 'admin' and password == 'admin': 105 | return {"token": auth.generate_token(username)} 106 | 107 | return {"message": "authorization has been refused for those credentials."}, 401 108 | 109 | 110 | @api.route('/books') 111 | class BooksList(Resource): 112 | @api.response(200, 'Successful') 113 | @api.doc(description="Get all books") 114 | @requires_auth 115 | def get(self): 116 | # get books as JSON string 117 | args = parser.parse_args() 118 | 119 | # retrieve the query parameters 120 | order_by = args.get('order') 121 | ascending = args.get('ascending', True) 122 | 123 | if order_by: 124 | df.sort_values(by=order_by, inplace=True, ascending=ascending) 125 | 126 | json_str = df.to_json(orient='index') 127 | 128 | # convert the string JSON to a real JSON 129 | ds = json.loads(json_str) 130 | ret = [] 131 | 132 | for idx in ds: 133 | book = ds[idx] 134 | book['Identifier'] = int(idx) 135 | ret.append(book) 136 | 137 | return ret 138 | 139 | @api.response(201, 'Book Created Successfully') 140 | @api.response(400, 'Validation Error') 141 | @api.doc(description="Add a new book") 142 | @api.expect(book_model, validate=True) 143 | @requires_auth 144 | def post(self): 145 | book = request.json 146 | 147 | if 'Identifier' not in book: 148 | return {"message": "Missing Identifier"}, 400 149 | 150 | id = book['Identifier'] 151 | 152 | # check if the given identifier does not exist 153 | if id in df.index: 154 | return {"message": "A book with Identifier={} is already in the dataset".format(id)}, 400 155 | 156 | # Put the values into the dataframe 157 | for key in book: 158 | if key not in book_model.keys(): 159 | # unexpected column 160 | return {"message": "Property {} is invalid".format(key)}, 400 161 | df.loc[id, key] = book[key] 162 | 163 | # df.append(book, ignore_index=True) 164 | return {"message": "Book {} is created".format(id)}, 201 165 | 166 | 167 | @api.route('/books/') 168 | @api.param('id', 'The Book identifier') 169 | class Books(Resource): 170 | @api.response(404, 'Book was not found') 171 | @api.response(200, 'Successful') 172 | @api.doc(description="Get a book by its ID") 173 | @requires_auth 174 | def get(self, id): 175 | if id not in df.index: 176 | api.abort(404, "Book {} doesn't exist".format(id)) 177 | 178 | book = dict(df.loc[id]) 179 | return book 180 | 181 | @api.response(404, 'Book was not found') 182 | @api.response(200, 'Successful') 183 | @api.doc(description="Delete a book by its ID") 184 | @requires_auth 185 | def delete(self, id): 186 | if id not in df.index: 187 | api.abort(404, "Book {} doesn't exist".format(id)) 188 | 189 | df.drop(id, inplace=True) 190 | return {"message": "Book {} is removed.".format(id)}, 200 191 | 192 | @api.response(404, 'Book was not found') 193 | @api.response(400, 'Validation Error') 194 | @api.response(200, 'Successful') 195 | @api.expect(book_model, validate=True) 196 | @api.doc(description="Update a book by its ID") 197 | @requires_auth 198 | def put(self, id): 199 | 200 | if id not in df.index: 201 | api.abort(404, "Book {} doesn't exist".format(id)) 202 | 203 | # get the payload and convert it to a JSON 204 | book = request.json 205 | 206 | # Book ID cannot be changed 207 | if 'Identifier' in book and id != book['Identifier']: 208 | return {"message": "Identifier cannot be changed".format(id)}, 400 209 | 210 | # Update the values 211 | for key in book: 212 | if key not in book_model.keys(): 213 | # unexpected column 214 | return {"message": "Property {} is invalid".format(key)}, 400 215 | df.loc[id, key] = book[key] 216 | 217 | df.append(book, ignore_index=True) 218 | return {"message": "Book {} has been successfully updated".format(id)}, 200 219 | 220 | 221 | if __name__ == '__main__': 222 | columns_to_drop = ['Edition Statement', 223 | 'Corporate Author', 224 | 'Corporate Contributors', 225 | 'Former owner', 226 | 'Engraver', 227 | 'Contributors', 228 | 'Issuance type', 229 | 'Shelfmarks' 230 | ] 231 | csv_file = "Books.csv" 232 | df = pd.read_csv(csv_file) 233 | 234 | # drop unnecessary columns 235 | df.drop(columns_to_drop, inplace=True, axis=1) 236 | 237 | # clean the date of publication & convert it to numeric data 238 | new_date = df['Date of Publication'].str.extract(r'^(\d{4})', expand=False) 239 | new_date = pd.to_numeric(new_date) 240 | new_date = new_date.fillna(0) 241 | df['Date of Publication'] = new_date 242 | 243 | # replace spaces in the name of columns 244 | df.columns = [c.replace(' ', '_') for c in df.columns] 245 | 246 | # set the index column; this will help us to find books with their ids 247 | df.set_index('Identifier', inplace=True) 248 | 249 | # run the application 250 | app.run(debug=True) 251 | -------------------------------------------------------------------------------- /Week9_Classification/activity_1.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.neighbors import KNeighborsClassifier 3 | from sklearn.utils import shuffle 4 | 5 | 6 | def load_iris(iris_path, split_percentage): 7 | df = pd.read_csv(iris_path) 8 | 9 | df = shuffle(df) 10 | iris_x = df.drop('species', axis=1).values 11 | iris_y = df['species'].values 12 | 13 | # Split iris data in train and test data 14 | # A random permutation, to split the data randomly 15 | 16 | split_point = int(len(iris_x) * split_percentage) 17 | iris_X_train = iris_x[:split_point] 18 | iris_y_train = iris_y[:split_point] 19 | iris_X_test = iris_x[split_point:] 20 | iris_y_test = iris_y[split_point:] 21 | 22 | return iris_X_train, iris_y_train, iris_X_test, iris_y_test 23 | 24 | 25 | if __name__ == '__main__': 26 | csv_file = 'iris.csv' 27 | 28 | # Split the data into test and train parts 29 | iris_X_train, iris_y_train, iris_X_test, iris_y_test = load_iris(csv_file, split_percentage=0.7) 30 | 31 | # train a classifier 32 | knn = KNeighborsClassifier() 33 | knn.fit(iris_X_train, iris_y_train) 34 | 35 | # predict the test set 36 | predictions = knn.predict(iris_X_test) 37 | 38 | print("Actual: ") 39 | print(iris_y_test) 40 | 41 | print("Predictions: ") 42 | print(predictions) 43 | 44 | -------------------------------------------------------------------------------- /Week9_Classification/activity_2.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.metrics import confusion_matrix 3 | from sklearn.neighbors import KNeighborsClassifier 4 | from sklearn.utils import shuffle 5 | from sklearn.metrics import precision_score, accuracy_score, recall_score 6 | 7 | 8 | def load_iris(iris_path, split_percentage): 9 | df = pd.read_csv(iris_path) 10 | 11 | df = shuffle(df) 12 | iris_x = df.drop('species', axis=1).values 13 | iris_y = df['species'].values 14 | 15 | # Split iris data in train and test data 16 | # A random permutation, to split the data randomly 17 | 18 | split_point = int(len(iris_x) * split_percentage) 19 | iris_X_train = iris_x[:split_point] 20 | iris_y_train = iris_y[:split_point] 21 | iris_X_test = iris_x[split_point:] 22 | iris_y_test = iris_y[split_point:] 23 | 24 | return iris_X_train, iris_y_train, iris_X_test, iris_y_test 25 | 26 | 27 | if __name__ == '__main__': 28 | csv_file = 'iris.csv' 29 | 30 | # Split the data into test and train parts 31 | iris_X_train, iris_y_train, iris_X_test, iris_y_test = load_iris(csv_file, split_percentage=0.7) 32 | 33 | # train a classifier 34 | knn = KNeighborsClassifier() 35 | knn.fit(iris_X_train, iris_y_train) 36 | 37 | # predict the test set 38 | predictions = knn.predict(iris_X_test) 39 | 40 | print("confusion_matrix:\n", confusion_matrix(iris_y_test, predictions)) 41 | print("precision:\t", precision_score(iris_y_test, predictions, average=None)) 42 | print("recall:\t\t", recall_score(iris_y_test, predictions, average=None)) 43 | print("accuracy:\t", accuracy_score(iris_y_test, predictions)) 44 | -------------------------------------------------------------------------------- /Week9_Classification/activity_3.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.discriminant_analysis import LinearDiscriminantAnalysis 3 | from sklearn.linear_model import LogisticRegression 4 | from sklearn.model_selection import cross_val_score 5 | from sklearn.naive_bayes import GaussianNB 6 | from sklearn.neighbors import KNeighborsClassifier 7 | from sklearn.svm import SVC 8 | from sklearn.tree import DecisionTreeClassifier 9 | from sklearn.utils import shuffle 10 | 11 | 12 | def load_iris(iris_path, split_percentage): 13 | df = pd.read_csv(iris_path) 14 | 15 | df = shuffle(df) 16 | iris_x = df.drop('species', axis=1).values 17 | iris_y = df['species'].values 18 | 19 | # Split iris data in train and test data 20 | # A random permutation, to split the data randomly 21 | 22 | split_point = int(len(iris_x) * split_percentage) 23 | iris_X_train = iris_x[:split_point] 24 | iris_y_train = iris_y[:split_point] 25 | iris_X_test = iris_x[split_point:] 26 | iris_y_test = iris_y[split_point:] 27 | 28 | return iris_X_train, iris_y_train, iris_X_test, iris_y_test 29 | 30 | 31 | if __name__ == '__main__': 32 | 33 | csv_file = 'iris.csv' 34 | iris_X, iris_y, _, _ = load_iris(csv_file, split_percentage=0.7) 35 | 36 | classifiers = [KNeighborsClassifier(), 37 | DecisionTreeClassifier(), 38 | LinearDiscriminantAnalysis(), 39 | LogisticRegression(), 40 | GaussianNB(), 41 | SVC()] 42 | 43 | classifier_accuracy_list = [] 44 | for i, classifier in enumerate(classifiers): 45 | # split the dataset into 5 folds; then test the classifier against each fold one by one 46 | accuracies = cross_val_score(classifier, iris_X, iris_y, cv=5) 47 | classifier_accuracy_list.append((accuracies.mean(), type(classifier).__name__)) 48 | 49 | # sort the classifiers 50 | classifier_accuracy_list = sorted(classifier_accuracy_list, reverse=True) 51 | for item in classifier_accuracy_list: 52 | print(item[1], ':', item[0]) 53 | -------------------------------------------------------------------------------- /Week9_Classification/iris.csv: -------------------------------------------------------------------------------- 1 | sepal_length,sepal_width,petal_length,petal_width,species 2 | 5.1,3.5,1.4,0.2,setosa 3 | 4.9,3.0,1.4,0.2,setosa 4 | 4.7,3.2,1.3,0.2,setosa 5 | 4.6,3.1,1.5,0.2,setosa 6 | 5.0,3.6,1.4,0.2,setosa 7 | 5.4,3.9,1.7,0.4,setosa 8 | 4.6,3.4,1.4,0.3,setosa 9 | 5.0,3.4,1.5,0.2,setosa 10 | 4.4,2.9,1.4,0.2,setosa 11 | 4.9,3.1,1.5,0.1,setosa 12 | 5.4,3.7,1.5,0.2,setosa 13 | 4.8,3.4,1.6,0.2,setosa 14 | 4.8,3.0,1.4,0.1,setosa 15 | 4.3,3.0,1.1,0.1,setosa 16 | 5.8,4.0,1.2,0.2,setosa 17 | 5.7,4.4,1.5,0.4,setosa 18 | 5.4,3.9,1.3,0.4,setosa 19 | 5.1,3.5,1.4,0.3,setosa 20 | 5.7,3.8,1.7,0.3,setosa 21 | 5.1,3.8,1.5,0.3,setosa 22 | 5.4,3.4,1.7,0.2,setosa 23 | 5.1,3.7,1.5,0.4,setosa 24 | 4.6,3.6,1.0,0.2,setosa 25 | 5.1,3.3,1.7,0.5,setosa 26 | 4.8,3.4,1.9,0.2,setosa 27 | 5.0,3.0,1.6,0.2,setosa 28 | 5.0,3.4,1.6,0.4,setosa 29 | 5.2,3.5,1.5,0.2,setosa 30 | 5.2,3.4,1.4,0.2,setosa 31 | 4.7,3.2,1.6,0.2,setosa 32 | 4.8,3.1,1.6,0.2,setosa 33 | 5.4,3.4,1.5,0.4,setosa 34 | 5.2,4.1,1.5,0.1,setosa 35 | 5.5,4.2,1.4,0.2,setosa 36 | 4.9,3.1,1.5,0.1,setosa 37 | 5.0,3.2,1.2,0.2,setosa 38 | 5.5,3.5,1.3,0.2,setosa 39 | 4.9,3.1,1.5,0.1,setosa 40 | 4.4,3.0,1.3,0.2,setosa 41 | 5.1,3.4,1.5,0.2,setosa 42 | 5.0,3.5,1.3,0.3,setosa 43 | 4.5,2.3,1.3,0.3,setosa 44 | 4.4,3.2,1.3,0.2,setosa 45 | 5.0,3.5,1.6,0.6,setosa 46 | 5.1,3.8,1.9,0.4,setosa 47 | 4.8,3.0,1.4,0.3,setosa 48 | 5.1,3.8,1.6,0.2,setosa 49 | 4.6,3.2,1.4,0.2,setosa 50 | 5.3,3.7,1.5,0.2,setosa 51 | 5.0,3.3,1.4,0.2,setosa 52 | 7.0,3.2,4.7,1.4,versicolor 53 | 6.4,3.2,4.5,1.5,versicolor 54 | 6.9,3.1,4.9,1.5,versicolor 55 | 5.5,2.3,4.0,1.3,versicolor 56 | 6.5,2.8,4.6,1.5,versicolor 57 | 5.7,2.8,4.5,1.3,versicolor 58 | 6.3,3.3,4.7,1.6,versicolor 59 | 4.9,2.4,3.3,1.0,versicolor 60 | 6.6,2.9,4.6,1.3,versicolor 61 | 5.2,2.7,3.9,1.4,versicolor 62 | 5.0,2.0,3.5,1.0,versicolor 63 | 5.9,3.0,4.2,1.5,versicolor 64 | 6.0,2.2,4.0,1.0,versicolor 65 | 6.1,2.9,4.7,1.4,versicolor 66 | 5.6,2.9,3.6,1.3,versicolor 67 | 6.7,3.1,4.4,1.4,versicolor 68 | 5.6,3.0,4.5,1.5,versicolor 69 | 5.8,2.7,4.1,1.0,versicolor 70 | 6.2,2.2,4.5,1.5,versicolor 71 | 5.6,2.5,3.9,1.1,versicolor 72 | 5.9,3.2,4.8,1.8,versicolor 73 | 6.1,2.8,4.0,1.3,versicolor 74 | 6.3,2.5,4.9,1.5,versicolor 75 | 6.1,2.8,4.7,1.2,versicolor 76 | 6.4,2.9,4.3,1.3,versicolor 77 | 6.6,3.0,4.4,1.4,versicolor 78 | 6.8,2.8,4.8,1.4,versicolor 79 | 6.7,3.0,5.0,1.7,versicolor 80 | 6.0,2.9,4.5,1.5,versicolor 81 | 5.7,2.6,3.5,1.0,versicolor 82 | 5.5,2.4,3.8,1.1,versicolor 83 | 5.5,2.4,3.7,1.0,versicolor 84 | 5.8,2.7,3.9,1.2,versicolor 85 | 6.0,2.7,5.1,1.6,versicolor 86 | 5.4,3.0,4.5,1.5,versicolor 87 | 6.0,3.4,4.5,1.6,versicolor 88 | 6.7,3.1,4.7,1.5,versicolor 89 | 6.3,2.3,4.4,1.3,versicolor 90 | 5.6,3.0,4.1,1.3,versicolor 91 | 5.5,2.5,4.0,1.3,versicolor 92 | 5.5,2.6,4.4,1.2,versicolor 93 | 6.1,3.0,4.6,1.4,versicolor 94 | 5.8,2.6,4.0,1.2,versicolor 95 | 5.0,2.3,3.3,1.0,versicolor 96 | 5.6,2.7,4.2,1.3,versicolor 97 | 5.7,3.0,4.2,1.2,versicolor 98 | 5.7,2.9,4.2,1.3,versicolor 99 | 6.2,2.9,4.3,1.3,versicolor 100 | 5.1,2.5,3.0,1.1,versicolor 101 | 5.7,2.8,4.1,1.3,versicolor 102 | 6.3,3.3,6.0,2.5,virginica 103 | 5.8,2.7,5.1,1.9,virginica 104 | 7.1,3.0,5.9,2.1,virginica 105 | 6.3,2.9,5.6,1.8,virginica 106 | 6.5,3.0,5.8,2.2,virginica 107 | 7.6,3.0,6.6,2.1,virginica 108 | 4.9,2.5,4.5,1.7,virginica 109 | 7.3,2.9,6.3,1.8,virginica 110 | 6.7,2.5,5.8,1.8,virginica 111 | 7.2,3.6,6.1,2.5,virginica 112 | 6.5,3.2,5.1,2.0,virginica 113 | 6.4,2.7,5.3,1.9,virginica 114 | 6.8,3.0,5.5,2.1,virginica 115 | 5.7,2.5,5.0,2.0,virginica 116 | 5.8,2.8,5.1,2.4,virginica 117 | 6.4,3.2,5.3,2.3,virginica 118 | 6.5,3.0,5.5,1.8,virginica 119 | 7.7,3.8,6.7,2.2,virginica 120 | 7.7,2.6,6.9,2.3,virginica 121 | 6.0,2.2,5.0,1.5,virginica 122 | 6.9,3.2,5.7,2.3,virginica 123 | 5.6,2.8,4.9,2.0,virginica 124 | 7.7,2.8,6.7,2.0,virginica 125 | 6.3,2.7,4.9,1.8,virginica 126 | 6.7,3.3,5.7,2.1,virginica 127 | 7.2,3.2,6.0,1.8,virginica 128 | 6.2,2.8,4.8,1.8,virginica 129 | 6.1,3.0,4.9,1.8,virginica 130 | 6.4,2.8,5.6,2.1,virginica 131 | 7.2,3.0,5.8,1.6,virginica 132 | 7.4,2.8,6.1,1.9,virginica 133 | 7.9,3.8,6.4,2.0,virginica 134 | 6.4,2.8,5.6,2.2,virginica 135 | 6.3,2.8,5.1,1.5,virginica 136 | 6.1,2.6,5.6,1.4,virginica 137 | 7.7,3.0,6.1,2.3,virginica 138 | 6.3,3.4,5.6,2.4,virginica 139 | 6.4,3.1,5.5,1.8,virginica 140 | 6.0,3.0,4.8,1.8,virginica 141 | 6.9,3.1,5.4,2.1,virginica 142 | 6.7,3.1,5.6,2.4,virginica 143 | 6.9,3.1,5.1,2.3,virginica 144 | 5.8,2.7,5.1,1.9,virginica 145 | 6.8,3.2,5.9,2.3,virginica 146 | 6.7,3.3,5.7,2.5,virginica 147 | 6.7,3.0,5.2,2.3,virginica 148 | 6.3,2.5,5.0,1.9,virginica 149 | 6.5,3.0,5.2,2.0,virginica 150 | 6.2,3.4,5.4,2.3,virginica 151 | 5.9,3.0,5.1,1.8,virginica -------------------------------------------------------------------------------- /assignments/Countries-Continents.csv: -------------------------------------------------------------------------------- 1 | Continent,Country 2 | Africa,Algeria 3 | Africa,Angola 4 | Africa,Benin 5 | Africa,Botswana 6 | Africa,Burkina 7 | Africa,Burundi 8 | Africa,Cameroon 9 | Africa,Cape Verde 10 | Africa,Central African Republic 11 | Africa,Chad 12 | Africa,Comoros 13 | Africa,Congo 14 | Africa,"Congo, Democratic Republic of" 15 | Africa,Djibouti 16 | Africa,Egypt 17 | Africa,Equatorial Guinea 18 | Africa,Eritrea 19 | Africa,Ethiopia 20 | Africa,Gabon 21 | Africa,Gambia 22 | Africa,Ghana 23 | Africa,Guinea 24 | Africa,Guinea-Bissau 25 | Africa,Ivory Coast 26 | Africa,Kenya 27 | Africa,Lesotho 28 | Africa,Liberia 29 | Africa,Libya 30 | Africa,Madagascar 31 | Africa,Malawi 32 | Africa,Mali 33 | Africa,Mauritania 34 | Africa,Mauritius 35 | Africa,Morocco 36 | Africa,Mozambique 37 | Africa,Namibia 38 | Africa,Niger 39 | Africa,Nigeria 40 | Africa,Rwanda 41 | Africa,Sao Tome and Principe 42 | Africa,Senegal 43 | Africa,Seychelles 44 | Africa,Sierra Leone 45 | Africa,Somalia 46 | Africa,South Africa 47 | Africa,South Sudan 48 | Africa,Sudan 49 | Africa,Swaziland 50 | Africa,Tanzania 51 | Africa,Togo 52 | Africa,Tunisia 53 | Africa,Uganda 54 | Africa,Zambia 55 | Africa,Zimbabwe 56 | Asia,Afghanistan 57 | Asia,Bahrain 58 | Asia,Bangladesh 59 | Asia,Bhutan 60 | Asia,Brunei 61 | Asia,Burma (Myanmar) 62 | Asia,Cambodia 63 | Asia,China 64 | Asia,East Timor 65 | Asia,India 66 | Asia,Indonesia 67 | Asia,Iran 68 | Asia,Iraq 69 | Asia,Israel 70 | Asia,Japan 71 | Asia,Jordan 72 | Asia,Kazakhstan 73 | Asia,"Korea, North" 74 | Asia,"Korea, South" 75 | Asia,Kuwait 76 | Asia,Kyrgyzstan 77 | Asia,Laos 78 | Asia,Lebanon 79 | Asia,Malaysia 80 | Asia,Maldives 81 | Asia,Mongolia 82 | Asia,Nepal 83 | Asia,Oman 84 | Asia,Pakistan 85 | Asia,Philippines 86 | Asia,Qatar 87 | Asia,Russian Federation 88 | Asia,Saudi Arabia 89 | Asia,Singapore 90 | Asia,Sri Lanka 91 | Asia,Syria 92 | Asia,Tajikistan 93 | Asia,Thailand 94 | Asia,Turkey 95 | Asia,Turkmenistan 96 | Asia,United Arab Emirates 97 | Asia,Uzbekistan 98 | Asia,Vietnam 99 | Asia,Yemen 100 | Europe,Albania 101 | Europe,Andorra 102 | Europe,Armenia 103 | Europe,Austria 104 | Europe,Azerbaijan 105 | Europe,Belarus 106 | Europe,Belgium 107 | Europe,Bosnia and Herzegovina 108 | Europe,Bulgaria 109 | Europe,Croatia 110 | Europe,Cyprus 111 | Europe,CZ 112 | Europe,Denmark 113 | Europe,Estonia 114 | Europe,Finland 115 | Europe,France 116 | Europe,Georgia 117 | Europe,Germany 118 | Europe,Greece 119 | Europe,Hungary 120 | Europe,Iceland 121 | Europe,Ireland 122 | Europe,Italy 123 | Europe,Latvia 124 | Europe,Liechtenstein 125 | Europe,Lithuania 126 | Europe,Luxembourg 127 | Europe,Macedonia 128 | Europe,Malta 129 | Europe,Moldova 130 | Europe,Monaco 131 | Europe,Montenegro 132 | Europe,Netherlands 133 | Europe,Norway 134 | Europe,Poland 135 | Europe,Portugal 136 | Europe,Romania 137 | Europe,San Marino 138 | Europe,Serbia 139 | Europe,Slovakia 140 | Europe,Slovenia 141 | Europe,Spain 142 | Europe,Sweden 143 | Europe,Switzerland 144 | Europe,Ukraine 145 | Europe,United Kingdom 146 | Europe,Vatican City 147 | North America,Antigua and Barbuda 148 | North America,Bahamas 149 | North America,Barbados 150 | North America,Belize 151 | North America,Canada 152 | North America,Costa Rica 153 | North America,Cuba 154 | North America,Dominica 155 | North America,Dominican Republic 156 | North America,El Salvador 157 | North America,Grenada 158 | North America,Guatemala 159 | North America,Haiti 160 | North America,Honduras 161 | North America,Jamaica 162 | North America,Mexico 163 | North America,Nicaragua 164 | North America,Panama 165 | North America,Saint Kitts and Nevis 166 | North America,Saint Lucia 167 | North America,Saint Vincent and the Grenadines 168 | North America,Trinidad and Tobago 169 | North America,US 170 | Oceania,Australia 171 | Oceania,Fiji 172 | Oceania,Kiribati 173 | Oceania,Marshall Islands 174 | Oceania,Micronesia 175 | Oceania,Nauru 176 | Oceania,New Zealand 177 | Oceania,Palau 178 | Oceania,Papua New Guinea 179 | Oceania,Samoa 180 | Oceania,Solomon Islands 181 | Oceania,Tonga 182 | Oceania,Tuvalu 183 | Oceania,Vanuatu 184 | South America,Argentina 185 | South America,Bolivia 186 | South America,Brazil 187 | South America,Chile 188 | South America,Colombia 189 | South America,Ecuador 190 | South America,Guyana 191 | South America,Paraguay 192 | South America,Peru 193 | South America,Suriname 194 | South America,Uruguay 195 | South America,Venezuela 196 | -------------------------------------------------------------------------------- /assignments/Getting Started Academic Cloud.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mysilver/COMP9321-Data-Services/b477e20a819d63a53c684d0cb5fb332dc5b63e6c/assignments/Getting Started Academic Cloud.pdf -------------------------------------------------------------------------------- /assignments/Olympics_dataset1.csv: -------------------------------------------------------------------------------- 1 | Team,,Summer Games,,,, 2 | ,Rubish,Number of Games the country participated in,Gold,Silver,Bronze,Total 3 | Afghanistan (AFG),BLA,14,0,0,2,2 4 | Algeria (ALG),BLA,13,5,4,8,17 5 | Argentina (ARG),BLA,24,21,25,28,74 6 | Armenia (ARM),BLA,6,2,6,6,14 7 | Australasia (ANZ) [ANZ],BLA,2,3,4,5,12 8 | Kingdom of the Void (VOID),,,,,, 9 | Australia (AUS) [AUS] [Z],BLA,26,147,163,187,497 10 | Austria (AUT),BLA,27,18,33,36,87 11 | Azerbaijan (AZE),BLA,6,7,11,25,43 12 | Bahamas (BAH),BLA,16,6,2,6,14 13 | Bahrain (BRN),BLA,9,2,1,0,3 14 | Barbados (BAR) [BAR],BLA,12,0,0,1,1 15 | Belarus (BLR),BLA,6,12,27,39,78 16 | Belgium (BEL),BLA,26,40,53,55,148 17 | Bermuda (BER),BLA,18,0,0,1,1 18 | Bohemia (BOH) [BOH] [Z],BLA,3,0,1,3,4 19 | Botswana (BOT),BLA,10,0,1,0,1 20 | Treasure Island (TRI),,,,,, 21 | Brazil (BRA),BLA,22,30,36,62,128 22 | British West Indies (BWI) [BWI],BLA,1,0,0,2,2 23 | Bulgaria (BUL) [H],BLA,20,51,87,80,218 24 | Burundi (BDI),BLA,6,1,1,0,2 25 | Cameroon (CMR),BLA,14,3,1,2,6 26 | Canada (CAN),BLA,26,64,102,136,302 27 | Chile (CHI) [I],BLA,23,2,7,4,13 28 | China (CHN) [CHN],BLA,10,224,167,155,546 29 | Colombia (COL),BLA,19,5,9,14,28 30 | Costa Rica (CRC),BLA,15,1,1,2,4 31 | Ivory Coast (CIV) [CIV],BLA,13,1,1,1,3 32 | Croatia (CRO),BLA,7,11,10,12,33 33 | Cuba (CUB) [Z],BLA,20,78,68,79,225 34 | Cyprus (CYP),BLA,10,0,1,0,1 35 | Czech Republic (CZE) [CZE],BLA,6,15,17,24,56 36 | Czechoslovakia (TCH) [TCH],BLA,16,49,49,45,143 37 | Denmark (DEN) [Z],BLA,27,45,74,75,194 38 | Djibouti (DJI) [B],BLA,8,0,0,1,1 39 | Dominican Republic (DOM),BLA,14,3,2,2,7 40 | Ecuador (ECU),BLA,14,1,1,0,2 41 | Egypt (EGY) [EGY] [Z],BLA,22,7,10,15,32 42 | Eritrea (ERI),BLA,5,0,0,1,1 43 | Estonia (EST),BLA,12,9,9,16,34 44 | Ethiopia (ETH),BLA,13,22,11,20,53 45 | Fiji (FIJ),BLA,14,1,0,0,1 46 | Finland (FIN),BLA,25,101,85,117,303 47 | France (FRA) [O] [P] [Z],BLA,28,212,241,263,716 48 | Gabon (GAB),BLA,10,0,1,0,1 49 | Georgia (GEO),BLA,6,8,8,17,33 50 | Germany (GER) [GER] [Z],BLA,16,191,194,230,615 51 | United Team of Germany (EUA) [EUA],BLA,3,28,54,36,118 52 | East Germany (GDR) [GDR],BLA,5,153,129,127,409 53 | West Germany (FRG) [FRG],BLA,5,56,67,81,204 54 | Ghana (GHA) [GHA],BLA,14,0,1,3,4 55 | Great Britain (GBR) [GBR] [Z],BLA,28,263,295,291,849 56 | Republic of Mars (MARS),,,,,, 57 | Greece (GRE) [Z],BLA,28,33,43,40,116 58 | Grenada (GRN),BLA,9,1,1,0,2 59 | Guatemala (GUA),BLA,14,0,1,0,1 60 | Guyana (GUY) [GUY],BLA,17,0,0,1,1 61 | Haiti (HAI) [J],BLA,15,0,1,1,2 62 | Hong Kong (HKG) [HKG],BLA,16,1,1,1,3 63 | Hungary (HUN),BLA,26,175,147,169,491 64 | Iceland (ISL),BLA,20,0,2,2,4 65 | India (IND) [F],BLA,24,9,7,12,28 66 | Indonesia (INA),BLA,15,7,13,12,32 67 | Iran (IRI) [K],BLA,16,19,22,28,69 68 | Iraq (IRQ),BLA,14,0,0,1,1 69 | Ireland (IRL),BLA,21,9,10,12,31 70 | Israel (ISR),BLA,16,1,1,7,9 71 | Italy (ITA) [M] [S],BLA,27,206,178,193,577 72 | Jamaica (JAM) [JAM],BLA,17,22,35,20,77 73 | Japan (JPN),BLA,22,142,135,162,439 74 | Jordan (JOR),BLA,10,1,0,0,1 75 | Kazakhstan (KAZ),BLA,6,15,20,27,62 76 | Kenya (KEN),BLA,14,31,38,33,102 77 | Kosovo (KOS),BLA,1,1,0,0,1 78 | North Korea (PRK),BLA,10,16,16,22,54 79 | South Korea (KOR),BLA,17,90,87,90,267 80 | Kuwait (KUW),BLA,12,0,0,2,2 81 | Kyrgyzstan (KGZ),BLA,6,0,1,3,4 82 | Latvia (LAT),BLA,11,3,11,5,19 83 | Lebanon (LIB),BLA,17,0,2,2,4 84 | Liechtenstein (LIE),BLA,17,0,0,0,0 85 | Lithuania (LTU),BLA,9,6,7,12,25 86 | Luxembourg (LUX) [O],BLA,23,1,1,0,2 87 | Kingdom of the Rock (ROCK),,,,,, 88 | Macedonia (MKD),BLA,6,0,0,1,1 89 | Malaysia (MAS) [MAS],BLA,13,0,7,4,11 90 | Mauritius (MRI),BLA,9,0,0,1,1 91 | Mexico (MEX),BLA,23,13,24,32,69 92 | Moldova (MDA),BLA,6,0,2,3,5 93 | Mongolia (MGL),BLA,13,2,10,14,26 94 | Montenegro (MNE),BLA,3,0,1,0,1 95 | Morocco (MAR),BLA,14,6,5,12,23 96 | Mozambique (MOZ),BLA,10,1,0,1,2 97 | Namibia (NAM),BLA,7,0,4,0,4 98 | Netherlands (NED) [Z],BLA,26,85,92,108,285 99 | Netherlands Antilles (AHO) [AHO] [I],BLA,13,0,1,0,1 100 | New Zealand (NZL) [NZL],BLA,23,46,27,44,117 101 | Niger (NIG),BLA,12,0,1,1,2 102 | Nigeria (NGR),BLA,16,3,10,12,25 103 | Norway (NOR) [Q],BLA,25,56,49,47,152 104 | Pakistan (PAK),BLA,17,3,3,4,10 105 | Panama (PAN),BLA,17,1,0,2,3 106 | Paraguay (PAR),BLA,12,0,1,0,1 107 | Peru (PER) [L],BLA,18,1,3,0,4 108 | Kingdom of the Stromlands (STORM),,,,,, 109 | Philippines (PHI),BLA,21,0,3,7,10 110 | Poland (POL),BLA,21,68,83,133,284 111 | Portugal (POR),BLA,24,4,8,12,24 112 | Puerto Rico (PUR),BLA,18,1,2,6,9 113 | Qatar (QAT),BLA,9,0,1,4,5 114 | Romania (ROU),BLA,21,89,95,122,306 115 | Russia (RUS) [RUS],BLA,6,149,124,153,426 116 | Russian Empire (RU1) [RU1],BLA,3,1,4,3,8 117 | Soviet Union (URS) [URS],BLA,9,395,319,296,"1,010" 118 | Saudi Arabia (KSA),BLA,11,0,1,2,3 119 | Samoa (SAM),BLA,9,0,1,0,1 120 | Senegal (SEN),BLA,14,0,1,0,1 121 | Serbia (SRB) [SRB],BLA,4,3,6,6,15 122 | Serbia and Montenegro (SCG) [SCG],BLA,1,0,2,0,2 123 | Singapore (SIN),BLA,16,1,2,2,5 124 | Slovakia (SVK) [SVK],BLA,6,9,12,7,28 125 | Slovenia (SLO),BLA,7,5,8,10,23 126 | South Africa (RSA),BLA,19,26,31,29,86 127 | Spain (ESP) [Z],BLA,23,45,64,41,150 128 | Sri Lanka (SRI) [SRI],BLA,17,0,2,0,2 129 | Sudan (SUD),BLA,12,0,1,0,1 130 | Suriname (SUR) [E],BLA,12,1,0,1,2 131 | Sweden (SWE) [Z],BLA,27,145,170,179,494 132 | Switzerland (SUI),BLA,28,50,75,67,192 133 | Syria (SYR),BLA,13,1,1,1,3 134 | Chinese Taipei (TPE) [TPE] [TPE2],BLA,14,5,7,12,24 135 | Tajikistan (TJK),BLA,6,1,1,2,4 136 | Tanzania (TAN) [TAN],BLA,13,0,2,0,2 137 | Thailand (THA),BLA,16,9,8,16,33 138 | Togo (TOG),BLA,10,0,0,1,1 139 | Tonga (TGA),BLA,9,0,1,0,1 140 | Trinidad and Tobago (TRI) [TRI],BLA,17,2,6,11,19 141 | Tunisia (TUN),BLA,14,4,2,7,13 142 | Turkey (TUR),BLA,22,39,24,28,91 143 | Uganda (UGA),BLA,15,2,3,2,7 144 | Ukraine (UKR),BLA,6,35,30,56,121 145 | United Arab Emirates (UAE),BLA,9,1,0,1,2 146 | United States (USA) [P] [Q] [R] [Z],BLA,27,"1,022",795,705,"2,522" 147 | Uruguay (URU),BLA,21,2,2,6,10 148 | Uzbekistan (UZB),BLA,6,8,6,17,31 149 | Venezuela (VEN),BLA,18,2,3,10,15 150 | Vietnam (VIE),BLA,15,1,3,0,4 151 | Virgin Islands (ISV),BLA,12,0,1,0,1 152 | Yugoslavia (YUG) [YUG],BLA,18,28,31,31,90 153 | Zambia (ZAM) [ZAM],BLA,13,0,1,1,2 154 | Republic of Gamers (GAME),BLA,,,,, 155 | Zimbabwe (ZIM) [ZIM],BLA,13,3,4,1,8 156 | Unified Team (EUN) [EUN],BLA,1,45,38,29,112 157 | Independent Olympic Athletes (IOA) [IOA],BLA,3,1,0,1,2 158 | Independent Olympic Participants (IOP) [IOP],BLA,1,0,1,2,3 159 | Olympic Athletes from Russia (OAR) [OAR],BLA,0,0,0,0,0 160 | Mixed team (ZZX) [ZZX],BLA,3,8,5,4,17 161 | Totals,BLA,28,"5,115","5,080","5,482","15,677" 162 | -------------------------------------------------------------------------------- /assignments/Olympics_dataset2.csv: -------------------------------------------------------------------------------- 1 | Team,Winter Games,,,,,Combined Total,,,, 2 | ,Number of Games the country participated in,Gold,Silver,Bronze,Total,Number of Games the country participated in,Gold,Silver,Bronze,Total 3 | Afghanistan (AFG),0,0,0,0,0,14,0,0,2,2 4 | Algeria (ALG),3,0,0,0,0,16,5,4,8,17 5 | Argentina (ARG),19,0,0,0,0,43,21,25,28,74 6 | Armenia (ARM),7,0,0,0,0,13,2,6,6,14 7 | Australasia (ANZ) [ANZ],0,0,0,0,0,2,3,4,5,12 8 | Kingdom of the Void (VOID),,,,,,,,,, 9 | Australia (AUS) [AUS] [Z],19,5,5,5,15,45,152,168,192,512 10 | Austria (AUT),23,64,81,87,232,50,82,114,123,319 11 | Azerbaijan (AZE),6,0,0,0,0,12,7,11,25,43 12 | Bahamas (BAH),0,0,0,0,0,16,6,2,6,14 13 | Bahrain (BRN),0,0,0,0,0,9,2,1,0,3 14 | Barbados (BAR) [BAR],0,0,0,0,0,12,0,0,1,1 15 | Belarus (BLR),7,8,5,5,18,13,20,32,44,96 16 | Belgium (BEL),21,1,2,3,6,47,41,55,58,154 17 | Bermuda (BER),8,0,0,0,0,26,0,0,1,1 18 | Bohemia (BOH) [BOH] [Z],0,0,0,0,0,3,0,1,3,4 19 | Botswana (BOT),0,0,0,0,0,10,0,1,0,1 20 | Treasure Island (TRI),,,,,,,,,, 21 | Brazil (BRA),8,0,0,0,0,30,30,36,62,128 22 | British West Indies (BWI) [BWI],0,0,0,0,0,1,0,0,2,2 23 | Bulgaria (BUL) [H],20,1,2,3,6,40,52,89,83,224 24 | Burundi (BDI),0,0,0,0,0,6,1,1,0,2 25 | Cameroon (CMR),1,0,0,0,0,15,3,1,2,6 26 | Canada (CAN),23,73,64,62,199,49,137,166,198,501 27 | Chile (CHI) [I],17,0,0,0,0,40,2,7,4,13 28 | China (CHN) [CHN],11,13,28,21,62,21,237,195,176,608 29 | Colombia (COL),2,0,0,0,0,21,5,9,14,28 30 | Costa Rica (CRC),6,0,0,0,0,21,1,1,2,4 31 | Ivory Coast (CIV) [CIV],0,0,0,0,0,13,1,1,1,3 32 | Croatia (CRO),8,4,6,1,11,15,15,16,13,44 33 | Cuba (CUB) [Z],0,0,0,0,0,20,78,68,79,225 34 | Cyprus (CYP),11,0,0,0,0,21,0,1,0,1 35 | Czech Republic (CZE) [CZE],7,9,11,11,31,13,24,28,35,87 36 | Czechoslovakia (TCH) [TCH],16,2,8,15,25,32,51,57,60,168 37 | Denmark (DEN) [Z],14,0,1,0,1,41,45,75,75,195 38 | Djibouti (DJI) [B],0,0,0,0,0,8,0,0,1,1 39 | Dominican Republic (DOM),0,0,0,0,0,14,3,2,2,7 40 | Ecuador (ECU),1,0,0,0,0,15,1,1,0,2 41 | Egypt (EGY) [EGY] [Z],1,0,0,0,0,23,7,10,15,32 42 | Eritrea (ERI),1,0,0,0,0,6,0,0,1,1 43 | Estonia (EST),10,4,2,1,7,22,13,11,17,41 44 | Ethiopia (ETH),2,0,0,0,0,15,22,11,20,53 45 | Fiji (FIJ),3,0,0,0,0,17,1,0,0,1 46 | Finland (FIN),23,43,63,61,167,48,144,148,178,470 47 | France (FRA) [O] [P] [Z],23,36,35,53,124,51,248,276,316,840 48 | Gabon (GAB),0,0,0,0,0,10,0,1,0,1 49 | Georgia (GEO),7,0,0,0,0,13,8,8,17,33 50 | Germany (GER) [GER] [Z],12,92,88,60,240,28,283,282,290,855 51 | United Team of Germany (EUA) [EUA],3,8,6,5,19,6,36,60,41,137 52 | East Germany (GDR) [GDR],6,39,36,35,110,11,192,165,162,519 53 | West Germany (FRG) [FRG],6,11,15,13,39,11,67,82,94,243 54 | Ghana (GHA) [GHA],2,0,0,0,0,16,0,1,3,4 55 | Great Britain (GBR) [GBR] [Z],23,11,4,16,31,51,274,299,307,880 56 | Republic of Mars (MARS),,,,,,,,,, 57 | Greece (GRE) [Z],19,0,0,0,0,47,33,43,40,116 58 | Grenada (GRN),0,0,0,0,0,9,1,1,0,2 59 | Guatemala (GUA),1,0,0,0,0,15,0,1,0,1 60 | Guyana (GUY) [GUY],0,0,0,0,0,17,0,0,1,1 61 | Haiti (HAI) [J],0,0,0,0,0,15,0,1,1,2 62 | Hong Kong (HKG) [HKG],5,0,0,0,0,21,1,1,1,3 63 | Hungary (HUN),23,1,2,4,7,49,176,149,173,498 64 | Iceland (ISL),18,0,0,0,0,38,0,2,2,4 65 | India (IND) [F],10,0,0,0,0,34,9,7,12,28 66 | Indonesia (INA),0,0,0,0,0,15,7,13,12,32 67 | Iran (IRI) [K],11,0,0,0,0,27,19,22,28,69 68 | Iraq (IRQ),0,0,0,0,0,14,0,0,1,1 69 | Ireland (IRL),7,0,0,0,0,28,9,10,12,31 70 | Israel (ISR),7,0,0,0,0,23,1,1,7,9 71 | Italy (ITA) [M] [S],23,40,36,48,124,50,246,214,241,701 72 | Jamaica (JAM) [JAM],8,0,0,0,0,25,22,35,20,77 73 | Japan (JPN),21,14,22,22,58,43,156,157,184,497 74 | Jordan (JOR),0,0,0,0,0,10,1,0,0,1 75 | Kazakhstan (KAZ),7,1,3,4,8,13,16,23,31,70 76 | Kenya (KEN),4,0,0,0,0,18,31,38,33,102 77 | Kosovo (KOS),1,0,0,0,0,2,1,0,0,1 78 | North Korea (PRK),9,0,1,1,2,19,16,17,23,56 79 | South Korea (KOR),18,31,25,14,70,35,121,112,104,337 80 | Kuwait (KUW),0,0,0,0,0,12,0,0,2,2 81 | Kyrgyzstan (KGZ),7,0,0,0,0,13,0,1,3,4 82 | Latvia (LAT),11,0,4,4,8,22,3,15,9,27 83 | Lebanon (LIB),17,0,0,0,0,34,0,2,2,4 84 | Liechtenstein (LIE),19,2,2,6,10,36,2,2,6,10 85 | Lithuania (LTU),9,0,0,0,0,18,6,7,12,25 86 | Luxembourg (LUX) [O],9,0,2,0,2,32,1,3,0,4 87 | Kingdom of the Rock (ROCK),,,,,,,,,, 88 | Macedonia (MKD),6,0,0,0,0,12,0,0,1,1 89 | Malaysia (MAS) [MAS],1,0,0,0,0,14,0,7,4,11 90 | Mauritius (MRI),0,0,0,0,0,9,0,0,1,1 91 | Mexico (MEX),9,0,0,0,0,32,13,24,32,69 92 | Moldova (MDA),7,0,0,0,0,13,0,2,3,5 93 | Mongolia (MGL),14,0,0,0,0,27,2,10,14,26 94 | Montenegro (MNE),3,0,0,0,0,6,0,1,0,1 95 | Morocco (MAR),7,0,0,0,0,21,6,5,12,23 96 | Mozambique (MOZ),0,0,0,0,0,10,1,0,1,2 97 | Namibia (NAM),0,0,0,0,0,7,0,4,0,4 98 | Netherlands (NED) [Z],21,45,44,41,130,47,130,136,149,415 99 | Netherlands Antilles (AHO) [AHO] [I],2,0,0,0,0,15,0,1,0,1 100 | New Zealand (NZL) [NZL],16,0,1,2,3,39,46,28,46,120 101 | Niger (NIG),0,0,0,0,0,12,0,1,1,2 102 | Nigeria (NGR),1,0,0,0,0,17,3,10,12,25 103 | Norway (NOR) [Q],23,132,125,111,368,48,188,174,158,520 104 | Pakistan (PAK),3,0,0,0,0,20,3,3,4,10 105 | Panama (PAN),0,0,0,0,0,17,1,0,2,3 106 | Paraguay (PAR),1,0,0,0,0,13,0,1,0,1 107 | Peru (PER) [L],2,0,0,0,0,20,1,3,0,4 108 | Kingdom of the Stromlands (STORM),,,,,,,,,, 109 | Philippines (PHI),5,0,0,0,0,26,0,3,7,10 110 | Poland (POL),23,7,7,8,22,44,75,90,141,306 111 | Portugal (POR),8,0,0,0,0,32,4,8,12,24 112 | Puerto Rico (PUR),7,0,0,0,0,25,1,2,6,9 113 | Qatar (QAT),0,0,0,0,0,9,0,1,4,5 114 | Romania (ROU),21,0,0,1,1,42,89,95,123,307 115 | Russia (RUS) [RUS],6,47,38,35,120,12,196,162,188,546 116 | Russian Empire (RU1) [RU1],0,0,0,0,0,3,1,4,3,8 117 | Soviet Union (URS) [URS],9,78,57,59,194,18,473,376,355,"1,204" 118 | Saudi Arabia (KSA),0,0,0,0,0,11,0,1,2,3 119 | Samoa (SAM),0,0,0,0,0,9,0,1,0,1 120 | Senegal (SEN),5,0,0,0,0,19,0,1,0,1 121 | Serbia (SRB) [SRB],3,0,0,0,0,7,3,6,6,15 122 | Serbia and Montenegro (SCG) [SCG],1,0,0,0,0,2,0,2,0,2 123 | Singapore (SIN),1,0,0,0,0,17,1,2,2,5 124 | Slovakia (SVK) [SVK],7,3,4,1,8,13,12,16,8,36 125 | Slovenia (SLO),8,2,5,10,17,15,7,13,20,40 126 | South Africa (RSA),7,0,0,0,0,26,26,31,29,86 127 | Spain (ESP) [Z],20,1,0,3,4,43,46,64,44,154 128 | Sri Lanka (SRI) [SRI],0,0,0,0,0,17,0,2,0,2 129 | Sudan (SUD),0,0,0,0,0,12,0,1,0,1 130 | Suriname (SUR) [E],0,0,0,0,0,12,1,0,1,2 131 | Sweden (SWE) [Z],23,57,46,55,158,50,202,216,234,652 132 | Switzerland (SUI),23,55,46,52,153,51,105,121,119,345 133 | Syria (SYR),0,0,0,0,0,13,1,1,1,3 134 | Chinese Taipei (TPE) [TPE] [TPE2],12,0,0,0,0,26,5,7,12,24 135 | Tajikistan (TJK),4,0,0,0,0,10,1,1,2,4 136 | Tanzania (TAN) [TAN],0,0,0,0,0,13,0,2,0,2 137 | Thailand (THA),4,0,0,0,0,20,9,8,16,33 138 | Togo (TOG),2,0,0,0,0,12,0,0,1,1 139 | Tonga (TGA),2,0,0,0,0,11,0,1,0,1 140 | Trinidad and Tobago (TRI) [TRI],3,0,0,0,0,20,2,6,11,19 141 | Tunisia (TUN),0,0,0,0,0,14,4,2,7,13 142 | Turkey (TUR),17,0,0,0,0,39,39,24,28,91 143 | Uganda (UGA),0,0,0,0,0,15,2,3,2,7 144 | Ukraine (UKR),7,3,1,4,8,13,38,31,60,129 145 | United Arab Emirates (UAE),0,0,0,0,0,9,1,0,1,2 146 | United States (USA) [P] [Q] [R] [Z],23,105,110,90,305,50,"1,127",905,795,"2,827" 147 | Uruguay (URU),1,0,0,0,0,22,2,2,6,10 148 | Uzbekistan (UZB),7,1,0,0,1,13,9,6,17,32 149 | Venezuela (VEN),4,0,0,0,0,22,2,3,10,15 150 | Vietnam (VIE),0,0,0,0,0,15,1,3,0,4 151 | Virgin Islands (ISV),7,0,0,0,0,19,0,1,0,1 152 | Yugoslavia (YUG) [YUG],16,0,3,1,4,34,28,34,32,94 153 | Zambia (ZAM) [ZAM],0,0,0,0,0,13,0,1,1,2 154 | Republic of Gamers (GAME),,,,,,,,,, 155 | Zimbabwe (ZIM) [ZIM],1,0,0,0,0,14,3,4,1,8 156 | Unified Team (EUN) [EUN],1,9,6,8,23,2,54,44,37,135 157 | Independent Olympic Athletes (IOA) [IOA],0,0,0,0,0,3,1,0,1,2 158 | Independent Olympic Participants (IOP) [IOP],0,0,0,0,0,1,0,1,2,3 159 | Olympic Athletes from Russia (OAR) [OAR],1,2,6,9,17,1,2,6,9,17 160 | Mixed team (ZZX) [ZZX],0,0,0,0,0,3,8,5,4,17 161 | Totals,23,"1,060","1,058","1,050","3,168",51,"6,175","6,138","6,532","18,845" 162 | -------------------------------------------------------------------------------- /assignments/Process Mining Kickstarter - Exercises.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mysilver/COMP9321-Data-Services/b477e20a819d63a53c684d0cb5fb332dc5b63e6c/assignments/Process Mining Kickstarter - Exercises.pdf -------------------------------------------------------------------------------- /assignments/Process Mining Kickstarter - Solution Manual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mysilver/COMP9321-Data-Services/b477e20a819d63a53c684d0cb5fb332dc5b63e6c/assignments/Process Mining Kickstarter - Solution Manual.pdf -------------------------------------------------------------------------------- /assignments/z1111111.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | def question_1(): 5 | print("--------------- question_1 ---------------") 6 | pass 7 | 8 | 9 | def question_2(): 10 | print("--------------- question_2 ---------------") 11 | pass 12 | 13 | 14 | def question_3(): 15 | print("--------------- question_3 ---------------") 16 | pass 17 | 18 | 19 | def question_4(): 20 | print("--------------- question_4 ---------------") 21 | pass 22 | 23 | 24 | def question_5(): 25 | print("--------------- question_5 ---------------") 26 | pass 27 | 28 | 29 | def question_6(): 30 | print("--------------- question_6 ---------------") 31 | pass 32 | 33 | 34 | def question_7(): 35 | print("--------------- question_7 ---------------") 36 | pass 37 | 38 | 39 | def question_8(): 40 | print("--------------- question_8 ---------------") 41 | pass 42 | 43 | 44 | def question_9(): 45 | print("--------------- question_9 ---------------") 46 | pass 47 | 48 | 49 | def question_10(): 50 | print("--------------- question_10 ---------------") 51 | pass 52 | 53 | 54 | if __name__ == "__main__": 55 | question_1() 56 | question_2() 57 | question_3() 58 | question_4() 59 | question_5() 60 | question_6() 61 | question_7() 62 | question_8() 63 | question_9() 64 | question_10() 65 | -------------------------------------------------------------------------------- /docs/Flyer_UNSW_Al-Banna.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mysilver/COMP9321-Data-Services/b477e20a819d63a53c684d0cb5fb332dc5b63e6c/docs/Flyer_UNSW_Al-Banna.pdf -------------------------------------------------------------------------------- /docs/myExperience.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mysilver/COMP9321-Data-Services/b477e20a819d63a53c684d0cb5fb332dc5b63e6c/docs/myExperience.pdf --------------------------------------------------------------------------------