├── chapters ├── data │ ├── hello.py │ ├── bad_style.py │ └── cycling_data.csv ├── img │ ├── chapter1 │ │ ├── box.png │ │ ├── beach.png │ │ └── australia.png │ ├── chapter2 │ │ ├── bsod.jpg │ │ ├── type_hint_1.png │ │ └── type_hint_2.png │ ├── chapter4 │ │ ├── copy.png │ │ ├── xkcd.png │ │ ├── vscode.png │ │ ├── deep-copy.png │ │ ├── copy-append.png │ │ └── tomas_beuzen.png │ ├── chapter5 │ │ ├── pies.png │ │ ├── numpy.png │ │ ├── pies_loop.png │ │ ├── triangle.png │ │ ├── numpy_paper.png │ │ ├── broadcasting.png │ │ ├── numpy_arrays.png │ │ └── pies_broadcast.png │ ├── chapter6 │ │ ├── conv.gif │ │ └── tomas_beuzen.png │ ├── chapter8 │ │ ├── join.png │ │ ├── tidy.png │ │ ├── groupby_1.png │ │ ├── groupby_2.png │ │ ├── left_join.png │ │ ├── inner_join.png │ │ ├── melt_pivot.gif │ │ └── outer_join.png │ ├── chapter7 │ │ ├── pandas.png │ │ ├── series.png │ │ ├── dataframe.png │ │ ├── computer_panda.gif │ │ └── series_addition.png │ └── chapter9 │ │ └── pandas_stacking.gif └── wallet.py ├── docs ├── banner.png ├── logo.png └── favicon.png ├── py4ds.yaml ├── practice-exercises ├── circle.py ├── bad_style.py ├── chapter2-loops-functions-practice.ipynb ├── chapter1-basics-practice.ipynb ├── chapter5-numpy-practice.ipynb ├── chapter7-pandas-practice.ipynb ├── chapter3-tests-classes-practice.ipynb └── chapter9-wrangling-advanced-practice.ipynb ├── _toc.yml ├── _config.yml ├── README.md └── LICENSE /chapters/data/hello.py: -------------------------------------------------------------------------------- 1 | PLANET = "Earth" 2 | 3 | 4 | def hello_world(): 5 | print(f"Hello {PLANET}!") 6 | -------------------------------------------------------------------------------- /docs/banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/docs/banner.png -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/docs/logo.png -------------------------------------------------------------------------------- /docs/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/docs/favicon.png -------------------------------------------------------------------------------- /chapters/img/chapter1/box.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter1/box.png -------------------------------------------------------------------------------- /chapters/img/chapter2/bsod.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter2/bsod.jpg -------------------------------------------------------------------------------- /chapters/img/chapter4/copy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter4/copy.png -------------------------------------------------------------------------------- /chapters/img/chapter4/xkcd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter4/xkcd.png -------------------------------------------------------------------------------- /chapters/img/chapter5/pies.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter5/pies.png -------------------------------------------------------------------------------- /chapters/img/chapter6/conv.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter6/conv.gif -------------------------------------------------------------------------------- /chapters/img/chapter8/join.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter8/join.png -------------------------------------------------------------------------------- /chapters/img/chapter8/tidy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter8/tidy.png -------------------------------------------------------------------------------- /chapters/img/chapter1/beach.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter1/beach.png -------------------------------------------------------------------------------- /chapters/img/chapter4/vscode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter4/vscode.png -------------------------------------------------------------------------------- /chapters/img/chapter5/numpy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter5/numpy.png -------------------------------------------------------------------------------- /chapters/img/chapter7/pandas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter7/pandas.png -------------------------------------------------------------------------------- /chapters/img/chapter7/series.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter7/series.png -------------------------------------------------------------------------------- /chapters/img/chapter1/australia.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter1/australia.png -------------------------------------------------------------------------------- /chapters/img/chapter4/deep-copy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter4/deep-copy.png -------------------------------------------------------------------------------- /chapters/img/chapter5/pies_loop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter5/pies_loop.png -------------------------------------------------------------------------------- /chapters/img/chapter5/triangle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter5/triangle.png -------------------------------------------------------------------------------- /chapters/img/chapter7/dataframe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter7/dataframe.png -------------------------------------------------------------------------------- /chapters/img/chapter8/groupby_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter8/groupby_1.png -------------------------------------------------------------------------------- /chapters/img/chapter8/groupby_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter8/groupby_2.png -------------------------------------------------------------------------------- /chapters/img/chapter8/left_join.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter8/left_join.png -------------------------------------------------------------------------------- /chapters/img/chapter2/type_hint_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter2/type_hint_1.png -------------------------------------------------------------------------------- /chapters/img/chapter2/type_hint_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter2/type_hint_2.png -------------------------------------------------------------------------------- /chapters/img/chapter4/copy-append.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter4/copy-append.png -------------------------------------------------------------------------------- /chapters/img/chapter5/numpy_paper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter5/numpy_paper.png -------------------------------------------------------------------------------- /chapters/img/chapter8/inner_join.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter8/inner_join.png -------------------------------------------------------------------------------- /chapters/img/chapter8/melt_pivot.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter8/melt_pivot.gif -------------------------------------------------------------------------------- /chapters/img/chapter8/outer_join.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter8/outer_join.png -------------------------------------------------------------------------------- /chapters/img/chapter4/tomas_beuzen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter4/tomas_beuzen.png -------------------------------------------------------------------------------- /chapters/img/chapter5/broadcasting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter5/broadcasting.png -------------------------------------------------------------------------------- /chapters/img/chapter5/numpy_arrays.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter5/numpy_arrays.png -------------------------------------------------------------------------------- /chapters/img/chapter5/pies_broadcast.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter5/pies_broadcast.png -------------------------------------------------------------------------------- /chapters/img/chapter6/tomas_beuzen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter6/tomas_beuzen.png -------------------------------------------------------------------------------- /chapters/img/chapter7/computer_panda.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter7/computer_panda.gif -------------------------------------------------------------------------------- /chapters/img/chapter7/series_addition.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter7/series_addition.png -------------------------------------------------------------------------------- /chapters/img/chapter9/pandas_stacking.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter9/pandas_stacking.gif -------------------------------------------------------------------------------- /py4ds.yaml: -------------------------------------------------------------------------------- 1 | # install with `conda env create -fpy4ds.yaml` 2 | 3 | name: py4ds 4 | channels: 5 | - conda-forge 6 | - defaults 7 | dependencies: 8 | - python=3.8 9 | - jupyterlab 10 | - matplotlib 11 | - pandas 12 | - numpy 13 | - memory_profiler 14 | - tqdm 15 | - flake8 16 | - black 17 | -------------------------------------------------------------------------------- /practice-exercises/circle.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | class Circle: 4 | """A circle with a radius r.""" 5 | 6 | def __init__(self, radius): 7 | self.radius = radius 8 | 9 | def area(self): 10 | """Calculate the area of the circle.""" 11 | return math.pi * self.radius ** 2 12 | 13 | def circumference(self): 14 | """Calculate the circumference of the circle.""" 15 | return 2.0 * math.pi * self.radius 16 | 17 | def __str__(self): 18 | return f"A Circle with radius {self.radius}" -------------------------------------------------------------------------------- /practice-exercises/bad_style.py: -------------------------------------------------------------------------------- 1 | very_long_variable_name = {'field': 1, 2 | 'is_debug': True} 3 | if very_long_variable_name is not None and very_long_variable_name["field"] > 0 or very_long_variable_name['is_debug']: 4 | z = 'hello '+'world' 5 | else: 6 | f = rf'hello {world}' 7 | if (True): y = 'hello ''world'#FIXME: https://github.com/python/black/issues/26 8 | class Foo ( object ): 9 | def f (self ): 10 | return 37*-2 11 | def g(self, x,y=42): 12 | return y 13 | regular_formatting = [ 14 | 0, 1, 2, 15 | 3, 4, 5 16 | ] 17 | def CAPITALIZE(mystring): 18 | return mystring.upper() -------------------------------------------------------------------------------- /chapters/data/bad_style.py: -------------------------------------------------------------------------------- 1 | x = { 'a':37,'b':42, 2 | 'c':927} 3 | very_long_variable_name = {'field': 1, 4 | 'is_debug': True} 5 | this=True 6 | 7 | if very_long_variable_name is not None and very_long_variable_name["field"] > 0 or very_long_variable_name['is_debug']: 8 | z = 'hello '+'world' 9 | else: 10 | world = 'world' 11 | a = 'hello {}'.format(world) 12 | f = rf'hello {world}' 13 | if (this): y = 'hello ''world'#FIXME: https://github.com/python/black/issues/26 14 | class Foo ( object ): 15 | def f (self ): 16 | return 37*-2 17 | def g(self, x,y=42): 18 | return y 19 | # fmt: off 20 | custom_formatting = [ 21 | 0, 1, 2, 22 | 3, 4, 5 23 | ] 24 | # fmt: on 25 | regular_formatting = [ 26 | 0, 1, 2, 27 | 3, 4, 5 28 | ] -------------------------------------------------------------------------------- /_toc.yml: -------------------------------------------------------------------------------- 1 | - file: README 2 | numbered: false 3 | 4 | - part: Chapters 5 | chapters: 6 | - file: chapters/chapter1-basics 7 | - file: chapters/chapter2-loops-functions 8 | - file: chapters/chapter3-tests-classes 9 | - file: chapters/chapter4-style-scripts-imports 10 | - file: chapters/chapter5-numpy 11 | - file: chapters/chapter6-numpy-addendum 12 | - file: chapters/chapter7-pandas 13 | - file: chapters/chapter8-wrangling-basics 14 | - file: chapters/chapter9-wrangling-advanced 15 | 16 | - part: Practice Exercises 17 | chapters: 18 | - file: practice-exercises/chapter1-basics-practice.ipynb 19 | - file: practice-exercises/chapter2-loops-functions-practice.ipynb 20 | - file: practice-exercises/chapter3-tests-classes-practice.ipynb 21 | - file: practice-exercises/chapter4-style-scripts-imports-practice.ipynb 22 | - file: practice-exercises/chapter5-numpy-practice.ipynb 23 | - file: practice-exercises/chapter7-pandas-practice.ipynb 24 | - file: practice-exercises/chapter8-wrangling-basics-practice.ipynb 25 | - file: practice-exercises/chapter9-wrangling-advanced-practice.ipynb -------------------------------------------------------------------------------- /chapters/wallet.py: -------------------------------------------------------------------------------- 1 | # This module contains a class Wallet that can be used to store, spend, and earn cash. 2 | 3 | 4 | class Wallet: 5 | """A wallet that can store, spend, and earn cash. 6 | 7 | Parameters 8 | ---------- 9 | balance : number 10 | Amount of starting cash. 11 | 12 | Attributes 13 | ---------- 14 | item : str 15 | The type of item, a "Wallet" 16 | balance : float 17 | The amount of money currently in the wallet. 18 | """ 19 | 20 | item = "Wallet" 21 | 22 | def __init__(self, balance): 23 | """See help(Wallet)""" 24 | self.balance = balance 25 | 26 | def buy_item(self, cost, number=1): 27 | """Spend money and reduce your balance. 28 | 29 | Parameters 30 | ---------- 31 | cost : number 32 | cost of the item to buy. 33 | number : int 34 | number of items to buy. 35 | 36 | Raises 37 | ------ 38 | InsufficientCashError 39 | If you do not have enough money to spend. 40 | """ 41 | if cost * number <= self.balance: 42 | self.balance -= cost * number 43 | else: 44 | raise InsufficientCashError( 45 | f"You can't spend ${cost * number} as you only have ${self.balance}." 46 | ) 47 | 48 | def sell_item(self, cost, number=1): 49 | """Sell items and increase your balance. 50 | 51 | Parameters 52 | ---------- 53 | cost : number 54 | cost of the item to buy. 55 | number : int 56 | number of items to buy. 57 | 58 | """ 59 | self.balance += cost * number 60 | 61 | 62 | class InsufficientCashError(Exception): 63 | """Custom error used when there is insufficient cash for a transaction.""" 64 | 65 | pass 66 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | # Book settings 2 | title: Python Programming for Data Science 3 | author: Tomas Beuzen 4 | copyright: "2021" 5 | logo: docs/logo.png 6 | exclude_patterns: ["cache"] 7 | 8 | repository: 9 | url: https://github.com/TomasBeuzen/python-programming-for-data-science 10 | path_to_book: "" 11 | branch: main # Which branch of the repository should be used when creating links 12 | 13 | latex: 14 | latex_documents: 15 | targetname: book.tex 16 | 17 | # Execution settings 18 | execute: 19 | execute_notebooks : "cache" # Whether to execute notebooks at build time. Must be one of ("auto", "force", "cache", "off") 20 | cache : "cache" 21 | 22 | # HTML-specific settings 23 | html: 24 | favicon : "docs/favicon.png" # A path to a favicon image 25 | navbar_number_sections : false # Add a number to each section in your left navbar 26 | use_edit_page_button : false # Whether to add an "edit this page" button to pages. If `true`, repository information in repository: must be filled in 27 | use_repository_button : true # Whether to add a link to your repository button 28 | use_issues_button : false # Whether to add an "open an issue" button 29 | extra_navbar : Tomas Beuzen # Will be displayed underneath the left navbar. 30 | extra_footer : "" # Will be displayed underneath the footer. 31 | google_analytics_id : "" # A GA id that can be used to track book views. 32 | home_page_in_navbar : true # Whether to include your home page in the left Navigation Bar 33 | baseurl : "https://www.tomasbeuzen.com/python-programming-for-data-science/" # The base URL where your book will be hosted. Used for creating image previews and social links. e.g.: https://mypage.com/mybook/ 34 | -------------------------------------------------------------------------------- /chapters/data/cycling_data.csv: -------------------------------------------------------------------------------- 1 | Date,Name,Type,Time,Distance,Comments 2 | "10 Sep 2019, 00:13:04",Afternoon Ride,Ride,2084,12.62,Rain 3 | "10 Sep 2019, 13:52:18",Morning Ride,Ride,2531,13.03,rain 4 | "11 Sep 2019, 00:23:50",Afternoon Ride,Ride,1863,12.52,Wet road but nice weather 5 | "11 Sep 2019, 14:06:19",Morning Ride,Ride,2192,12.84,Stopped for photo of sunrise 6 | "12 Sep 2019, 00:28:05",Afternoon Ride,Ride,1891,12.48,Tired by the end of the week 7 | "16 Sep 2019, 13:57:48",Morning Ride,Ride,2272,12.45,Rested after the weekend! 8 | "17 Sep 2019, 00:15:47",Afternoon Ride,Ride,1973,12.45,Legs feeling strong! 9 | "17 Sep 2019, 13:43:34",Morning Ride,Ride,2285,12.6,Raining 10 | "18 Sep 2019, 13:49:53",Morning Ride,Ride,2903,14.57,Raining today 11 | "18 Sep 2019, 00:15:52",Afternoon Ride,Ride,2101,12.48,Pumped up tires 12 | "19 Sep 2019, 00:30:01",Afternoon Ride,Ride,48062,12.48,Feeling good 13 | "19 Sep 2019, 13:52:09",Morning Ride,Ride,2090,12.59,Getting colder which is nice 14 | "20 Sep 2019, 01:02:05",Afternoon Ride,Ride,2961,12.81,Feeling good 15 | "23 Sep 2019, 13:50:41",Morning Ride,Ride,2462,12.68,Rested after the weekend! 16 | "24 Sep 2019, 00:35:42",Afternoon Ride,Ride,2076,12.47,"Oiled chain, bike feels smooth" 17 | "24 Sep 2019, 13:41:24",Morning Ride,Ride,2321,12.68,Bike feeling much smoother 18 | "25 Sep 2019, 00:07:21",Afternoon Ride,Ride,1775,12.1,Feeling really tired 19 | "25 Sep 2019, 13:35:41",Morning Ride,Ride,2124,12.65,Stopped for photo of sunrise 20 | "26 Sep 2019, 00:13:33",Afternoon Ride,Ride,1860,12.52,raining 21 | "26 Sep 2019, 13:42:43",Morning Ride,Ride,2350,12.91,Detour around trucks at Jericho 22 | "27 Sep 2019, 01:00:18",Afternoon Ride,Ride,1712,12.47,Tired by the end of the week 23 | "30 Sep 2019, 13:53:52",Morning Ride,Ride,2118,12.71,Rested after the weekend! 24 | "1 Oct 2019, 00:15:07",Afternoon Ride,Ride,1732,NaN,Legs feeling strong! 25 | "1 Oct 2019, 13:45:55",Morning Ride,Ride,2222,12.82,Beautiful morning! Feeling fit 26 | "2 Oct 2019, 00:13:09",Afternoon Ride,Ride,1756,NaN,A little tired today but good weather 27 | "2 Oct 2019, 13:46:06",Morning Ride,Ride,2134,13.06,Bit tired today but good weather 28 | "3 Oct 2019, 00:45:22",Afternoon Ride,Ride,1724,12.52,Feeling good 29 | "3 Oct 2019, 13:47:36",Morning Ride,Ride,2182,12.68,Wet road 30 | "4 Oct 2019, 01:08:08",Afternoon Ride,Ride,1870,12.63,"Very tired, riding into the wind" 31 | "9 Oct 2019, 13:55:40",Morning Ride,Ride,2149,12.7,Really cold! But feeling good 32 | "10 Oct 2019, 00:10:31",Afternoon Ride,Ride,1841,12.59,Feeling good after a holiday break! 33 | "10 Oct 2019, 13:47:14",Morning Ride,Ride,2463,12.79,Stopped for photo of sunrise 34 | "11 Oct 2019, 00:16:57",Afternoon Ride,Ride,1843,11.79,"Bike feeling tight, needs an oil and pump" -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python Programming for Data Science 2 | 3 | **By [Tomas Beuzen](https://www.tomasbeuzen.com/) 🚀** 4 | 5 | Welcome to Python Programming for Data Science! With this [website](https://www.tomasbeuzen.com/python-programming-for-data-science/) I aim to provide an introduction to everything you need to know to start using Python for data science. We'll cover topics such as data structures, basic programming, code testing and documentation, and using libraries like NumPy and Pandas for data exploration and analysis. 6 | 7 |

8 | 9 |

10 | 11 | >If you're interested in learning more about Python packages, check out my and [Tiffany Timber's](https://www.tiffanytimbers.com/) book [**Python Packages**](https://py-pkgs.org/). Or, if you'd like to learn more about using Python and PyTorch for deep learning, you can check out my other online material [**Deep Learning with PyToch**](https://www.tomasbeuzen.com/deep-learning-with-pytorch/). 12 | 13 | >The content of this site is adapted from material I used to teach the 2020/2021 offering of the course "DSCI 511 Python Programming for Data Science" for the University of British Columbia's Master of Data Science Program. That material has built upon previous course material developed by [Patrick Walls](https://www.math.ubc.ca/~pwalls/) and [Mike Gelbart](https://www.mikegelbart.com/). 14 | 15 | ## Key Learning Outcomes 16 | 17 | These are the key learning outcomes for this material: 18 | 19 | 1. Translate fundamental programming concepts such as loops, conditionals, etc into Python code. 20 | 2. Understand the key data structures in Python. 21 | 3. Understand how to write functions in Python and assess if they are correct via unit testing. 22 | 4. Know when and how to abstract code (e.g., into functions, or classes) to make it more modular and robust. 23 | 5. Produce human-readable code that incorporates best practices of programming, documentation, and coding style. 24 | 6. Use NumPy perform common data wrangling and computational tasks in Python. 25 | 7. Use Pandas to create and manipulate data structures like Series and DataFrames. 26 | 8. Wrangle different types of data in Pandas including numeric data, strings, and datetimes. 27 | 28 | ## Getting Started 29 | 30 | The material on this site is written in Jupyter notebooks and rendered using [Jupyter Book](https://jupyterbook.org/intro.html) to make it easily accessible. However, if you wish to run these notebooks on your local machine, you can do the following: 31 | 32 | 1. Clone the GitHub repository: 33 | ```sh 34 | git clone https://github.com/TomasBeuzen/python-programming-for-data-science.git 35 | ``` 36 | 2. Install the conda environment by typing the following in your terminal: 37 | ```sh 38 | conda env create -f py4ds.yaml 39 | ``` 40 | 3. Open the course in JupyterLab by typing the following in your terminal: 41 | ```sh 42 | cd python-programming-for-data-science 43 | jupyterlab 44 | ``` 45 | 46 | >If you're not comfortable with `git`, `GitHub` or `conda`, feel free to just read through the material on this website - you're not missing out on anything! 47 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Legal Code 2 | 3 | CC0 1.0 Universal 4 | 5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE 6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN 7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS 8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES 9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS 10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM 11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED 12 | HEREUNDER. 13 | 14 | Statement of Purpose 15 | 16 | The laws of most jurisdictions throughout the world automatically confer 17 | exclusive Copyright and Related Rights (defined below) upon the creator 18 | and subsequent owner(s) (each and all, an "owner") of an original work of 19 | authorship and/or a database (each, a "Work"). 20 | 21 | Certain owners wish to permanently relinquish those rights to a Work for 22 | the purpose of contributing to a commons of creative, cultural and 23 | scientific works ("Commons") that the public can reliably and without fear 24 | of later claims of infringement build upon, modify, incorporate in other 25 | works, reuse and redistribute as freely as possible in any form whatsoever 26 | and for any purposes, including without limitation commercial purposes. 27 | These owners may contribute to the Commons to promote the ideal of a free 28 | culture and the further production of creative, cultural and scientific 29 | works, or to gain reputation or greater distribution for their Work in 30 | part through the use and efforts of others. 31 | 32 | For these and/or other purposes and motivations, and without any 33 | expectation of additional consideration or compensation, the person 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she 35 | is an owner of Copyright and Related Rights in the Work, voluntarily 36 | elects to apply CC0 to the Work and publicly distribute the Work under its 37 | terms, with knowledge of his or her Copyright and Related Rights in the 38 | Work and the meaning and intended legal effect of CC0 on those rights. 39 | 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be 41 | protected by copyright and related or neighboring rights ("Copyright and 42 | Related Rights"). Copyright and Related Rights include, but are not 43 | limited to, the following: 44 | 45 | i. the right to reproduce, adapt, distribute, perform, display, 46 | communicate, and translate a Work; 47 | ii. moral rights retained by the original author(s) and/or performer(s); 48 | iii. publicity and privacy rights pertaining to a person's image or 49 | likeness depicted in a Work; 50 | iv. rights protecting against unfair competition in regards to a Work, 51 | subject to the limitations in paragraph 4(a), below; 52 | v. rights protecting the extraction, dissemination, use and reuse of data 53 | in a Work; 54 | vi. database rights (such as those arising under Directive 96/9/EC of the 55 | European Parliament and of the Council of 11 March 1996 on the legal 56 | protection of databases, and under any national implementation 57 | thereof, including any amended or successor version of such 58 | directive); and 59 | vii. other similar, equivalent or corresponding rights throughout the 60 | world based on applicable law or treaty, and any national 61 | implementations thereof. 62 | 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention 64 | of, applicable law, Affirmer hereby overtly, fully, permanently, 65 | irrevocably and unconditionally waives, abandons, and surrenders all of 66 | Affirmer's Copyright and Related Rights and associated claims and causes 67 | of action, whether now known or unknown (including existing as well as 68 | future claims and causes of action), in the Work (i) in all territories 69 | worldwide, (ii) for the maximum duration provided by applicable law or 70 | treaty (including future time extensions), (iii) in any current or future 71 | medium and for any number of copies, and (iv) for any purpose whatsoever, 72 | including without limitation commercial, advertising or promotional 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each 74 | member of the public at large and to the detriment of Affirmer's heirs and 75 | successors, fully intending that such Waiver shall not be subject to 76 | revocation, rescission, cancellation, termination, or any other legal or 77 | equitable action to disrupt the quiet enjoyment of the Work by the public 78 | as contemplated by Affirmer's express Statement of Purpose. 79 | 80 | 3. Public License Fallback. Should any part of the Waiver for any reason 81 | be judged legally invalid or ineffective under applicable law, then the 82 | Waiver shall be preserved to the maximum extent permitted taking into 83 | account Affirmer's express Statement of Purpose. In addition, to the 84 | extent the Waiver is so judged Affirmer hereby grants to each affected 85 | person a royalty-free, non transferable, non sublicensable, non exclusive, 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the 88 | maximum duration provided by applicable law or treaty (including future 89 | time extensions), (iii) in any current or future medium and for any number 90 | of copies, and (iv) for any purpose whatsoever, including without 91 | limitation commercial, advertising or promotional purposes (the 92 | "License"). The License shall be deemed effective as of the date CC0 was 93 | applied by Affirmer to the Work. Should any part of the License for any 94 | reason be judged legally invalid or ineffective under applicable law, such 95 | partial invalidity or ineffectiveness shall not invalidate the remainder 96 | of the License, and in such case Affirmer hereby affirms that he or she 97 | will not (i) exercise any of his or her remaining Copyright and Related 98 | Rights in the Work or (ii) assert any associated claims and causes of 99 | action with respect to the Work, in either case contrary to Affirmer's 100 | express Statement of Purpose. 101 | 102 | 4. Limitations and Disclaimers. 103 | 104 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 105 | surrendered, licensed or otherwise affected by this document. 106 | b. Affirmer offers the Work as-is and makes no representations or 107 | warranties of any kind concerning the Work, express, implied, 108 | statutory or otherwise, including without limitation warranties of 109 | title, merchantability, fitness for a particular purpose, non 110 | infringement, or the absence of latent or other defects, accuracy, or 111 | the present or absence of errors, whether or not discoverable, all to 112 | the greatest extent permissible under applicable law. 113 | c. Affirmer disclaims responsibility for clearing rights of other persons 114 | that may apply to the Work or any use thereof, including without 115 | limitation any person's Copyright and Related Rights in the Work. 116 | Further, Affirmer disclaims responsibility for obtaining any necessary 117 | consents, permissions or other rights required for any use of the 118 | Work. 119 | d. Affirmer understands and acknowledges that Creative Commons is not a 120 | party to this document and has no duty or obligation with respect to 121 | this CC0 or use of the Work. 122 | -------------------------------------------------------------------------------- /practice-exercises/chapter2-loops-functions-practice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "![](../docs/banner.png)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "# Loops & Functions\n", 15 | "\n", 16 | "**Tomas Beuzen, September 2020**" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "These exercises complement [Chapter 2](../chapters/chapter2-loops-functions.ipynb)." 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "## Exercises" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "### 1." 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "Create a function `website()` that grabs the website domain from a url string. For example, if your function is passed `\"www.google.com\"`, it should return `\"google\"`." 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 1, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "def website(url):\n", 54 | " pass # Remove this line and add your answer here." 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "### 2." 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "Create a function `divisible(a, b)` that accepts two integers (`a` and `b`) and returns `True` if `a` is divisble by `b` without a remainder. For example, `divisible(10, 3)` should return `False`, while `divisible(6, 3)` should return `True`." 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 2, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "def divisible(a, b):\n", 78 | " pass # Remove this line and add your answer here." 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "### 3." 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "Use list comprehension to square every number in the following list of numbers. " 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 3, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "l = [1, 2, 3, 4, 5, 6, 7, 8, 9]\n", 102 | "\n", 103 | "# Your answer here." 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "### 4." 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "For the following list of names, write a list comprehension that creates a list of *only* words that start with a capital letter (hint: `str.isupper()`)." 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 4, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "names = ['Steve Irwin', 'koala', 'kangaroo', 'Australia', 'Sydney', 'desert']\n", 127 | "\n", 128 | "# Your answer here." 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "### 5." 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "For the following list of `keys` and `vals` use dictionary comprehension to create a dictionary of the form `{'key-0': 0, 'key-1': 1, etc}` (hint: `zip()` can help you combine two lists into on object to be used for comprehension/looping)." 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 5, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "keys = [f\"key-{k}\" for k in range(10)]\n", 152 | "vals = range(10)\n", 153 | "\n", 154 | "# Your answer here." 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "### 6." 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "This question is a little harder. Create a generator function called `listgen(n)` that yields numbers from 0 to n, in batches of lists of maximum 10 numbers at a time. For example, your function should behave as follows:\n", 169 | "\n", 170 | "```python\n", 171 | "g = listgen(100)\n", 172 | "next(g)\n", 173 | "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", 174 | "next(g)\n", 175 | "[10, 11, 12, 13, 14, 15, 16, 17, 18, 19]\n", 176 | "next(g)\n", 177 | "[20, 21, 22, 23, 24, 25, 26, 27, 28, 29]\n", 178 | "etc.\n", 179 | "\n", 180 | "g = listgen(5)\n", 181 | "next(g)\n", 182 | "```" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 6, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "def listgen(n):\n", 192 | " pass # Remove this line and add your answer here." 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "### 7." 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "Write a `try`/`except` to catch the error generated from the following code and print \"I caught you!\". Make sure you catch the specific error being caused, this is typically better practice than just catching all errors!" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 7, 212 | "metadata": { 213 | "tags": [ 214 | "raises-exception" 215 | ] 216 | }, 217 | "outputs": [ 218 | { 219 | "ename": "ZeroDivisionError", 220 | "evalue": "division by zero", 221 | "output_type": "error", 222 | "traceback": [ 223 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 224 | "\u001b[0;31mZeroDivisionError\u001b[0m Traceback (most recent call last)", 225 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;36m5\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;31m# Your answer here.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 226 | "\u001b[0;31mZeroDivisionError\u001b[0m: division by zero" 227 | ] 228 | } 229 | ], 230 | "source": [ 231 | "5 / 0\n", 232 | "\n", 233 | "# Your answer here." 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "### 8." 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "Create a function `lucky_sum()` that takes all the integers a user enters and returns their sum. *However*, if one of the values is 13 then it does not count towards the sum, nor do any values to its right.\n", 248 | "\n", 249 | "For example, your function should behave as follows:\n", 250 | "\n", 251 | "```python\n", 252 | "lucky_sum(1, 2, 3, 4)\n", 253 | "10\n", 254 | "\n", 255 | "lucky_sum(1, 13, 3, 4)\n", 256 | "1\n", 257 | "\n", 258 | "lucky_sum(13)\n", 259 | "0\n", 260 | "```\n", 261 | "\n", 262 | "*This example is inspired by the related [codingbat challenge](https://codingbat.com/prob/p130788).*" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 8, 268 | "metadata": {}, 269 | "outputs": [], 270 | "source": [ 271 | "def lucky_sum(*args):\n", 272 | " pass # Remove this line and add your answer here." 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "
\n", 280 | "
\n", 281 | "
" 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "metadata": {}, 287 | "source": [ 288 | "## Solutions" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "### 1." 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": [ 302 | "Create a function `website()` that grabs the website domain from a url string. For example, if your function is passed `\"www.google.com\"`, it should return `\"google\"`." 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 9, 308 | "metadata": {}, 309 | "outputs": [ 310 | { 311 | "data": { 312 | "text/plain": [ 313 | "'google'" 314 | ] 315 | }, 316 | "execution_count": 9, 317 | "metadata": {}, 318 | "output_type": "execute_result" 319 | } 320 | ], 321 | "source": [ 322 | "def website(url):\n", 323 | " return url.split(\".\")[1]\n", 324 | "website(\"www.google.com\")" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "### 2." 332 | ] 333 | }, 334 | { 335 | "cell_type": "markdown", 336 | "metadata": {}, 337 | "source": [ 338 | "Create a function `divisible(a, b)` that accepts two integers (`a` and `b`) and returns `True` if `a` is divisble by `b` without a remainder. For example, `divisible(10, 3)` should return `False`, while `divisible(6, 3)` should return `True`." 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": 10, 344 | "metadata": {}, 345 | "outputs": [ 346 | { 347 | "name": "stdout", 348 | "output_type": "stream", 349 | "text": [ 350 | "False\n", 351 | "True\n" 352 | ] 353 | } 354 | ], 355 | "source": [ 356 | "def divisible(a, b):\n", 357 | " return True if a % b == 0 else False\n", 358 | "print(divisible(10, 3))\n", 359 | "print(divisible(6, 3))" 360 | ] 361 | }, 362 | { 363 | "cell_type": "markdown", 364 | "metadata": {}, 365 | "source": [ 366 | "### 3." 367 | ] 368 | }, 369 | { 370 | "cell_type": "markdown", 371 | "metadata": {}, 372 | "source": [ 373 | "Use list comprehension to square every number in the following list of numbers. " 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": 11, 379 | "metadata": {}, 380 | "outputs": [ 381 | { 382 | "data": { 383 | "text/plain": [ 384 | "[1, 4, 9, 16, 25, 36, 49, 64, 81]" 385 | ] 386 | }, 387 | "execution_count": 11, 388 | "metadata": {}, 389 | "output_type": "execute_result" 390 | } 391 | ], 392 | "source": [ 393 | "l = [1, 2, 3, 4, 5, 6, 7, 8, 9]\n", 394 | "[_ ** 2 for _ in l]" 395 | ] 396 | }, 397 | { 398 | "cell_type": "markdown", 399 | "metadata": {}, 400 | "source": [ 401 | "### 4." 402 | ] 403 | }, 404 | { 405 | "cell_type": "markdown", 406 | "metadata": {}, 407 | "source": [ 408 | "For the following list of names, write a list comprehension that creates a list of *only* words that start with a capital letter (hint: `str.isupper()`)." 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": 12, 414 | "metadata": {}, 415 | "outputs": [ 416 | { 417 | "data": { 418 | "text/plain": [ 419 | "['Steve Irwin', 'Australia', 'Sydney']" 420 | ] 421 | }, 422 | "execution_count": 12, 423 | "metadata": {}, 424 | "output_type": "execute_result" 425 | } 426 | ], 427 | "source": [ 428 | "names = ['Steve Irwin', 'koala', 'kangaroo', 'Australia', 'Sydney', 'desert']\n", 429 | "[_ for _ in names if _[0].isupper()]" 430 | ] 431 | }, 432 | { 433 | "cell_type": "markdown", 434 | "metadata": {}, 435 | "source": [ 436 | "### 5." 437 | ] 438 | }, 439 | { 440 | "cell_type": "markdown", 441 | "metadata": {}, 442 | "source": [ 443 | "For the following list of `keys` and `vals` use dictionary comprehension to create a dictionary of the form `{'key-0': 0, 'key-1': 1, etc}` (hint: `zip()` can help you combine two lists into on object to be used for comprehension/looping)." 444 | ] 445 | }, 446 | { 447 | "cell_type": "code", 448 | "execution_count": 13, 449 | "metadata": {}, 450 | "outputs": [ 451 | { 452 | "data": { 453 | "text/plain": [ 454 | "{'key-0': 0,\n", 455 | " 'key-1': 1,\n", 456 | " 'key-2': 2,\n", 457 | " 'key-3': 3,\n", 458 | " 'key-4': 4,\n", 459 | " 'key-5': 5,\n", 460 | " 'key-6': 6,\n", 461 | " 'key-7': 7,\n", 462 | " 'key-8': 8,\n", 463 | " 'key-9': 9}" 464 | ] 465 | }, 466 | "execution_count": 13, 467 | "metadata": {}, 468 | "output_type": "execute_result" 469 | } 470 | ], 471 | "source": [ 472 | "keys = [f\"key-{k}\" for k in range(10)]\n", 473 | "vals = range(10)\n", 474 | "{k:v for k, v in zip(keys, vals)}" 475 | ] 476 | }, 477 | { 478 | "cell_type": "markdown", 479 | "metadata": {}, 480 | "source": [ 481 | "### 6." 482 | ] 483 | }, 484 | { 485 | "cell_type": "markdown", 486 | "metadata": {}, 487 | "source": [ 488 | "This question is a little harder. Create a generator function called `listgen(n)` that yields numbers from 0 to n, in batches of lists of maximum 10 numbers at a time. For example, your function should behave as follows:\n", 489 | "\n", 490 | "```python\n", 491 | "g = listgen(100)\n", 492 | "next(g)\n", 493 | "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", 494 | "next(g)\n", 495 | "[10, 11, 12, 13, 14, 15, 16, 17, 18, 19]\n", 496 | "next(g)\n", 497 | "[20, 21, 22, 23, 24, 25, 26, 27, 28, 29]\n", 498 | "etc.\n", 499 | "\n", 500 | "g = listgen(5)\n", 501 | "next(g)\n", 502 | "```" 503 | ] 504 | }, 505 | { 506 | "cell_type": "code", 507 | "execution_count": 14, 508 | "metadata": {}, 509 | "outputs": [], 510 | "source": [ 511 | "def listgen(n):\n", 512 | " counter = 0\n", 513 | " numbers = list(range(n))\n", 514 | " while counter <= n // 10:\n", 515 | " yield numbers[10 * counter:10*(counter+1)]\n", 516 | " counter += 1" 517 | ] 518 | }, 519 | { 520 | "cell_type": "markdown", 521 | "metadata": {}, 522 | "source": [ 523 | "### 7." 524 | ] 525 | }, 526 | { 527 | "cell_type": "markdown", 528 | "metadata": {}, 529 | "source": [ 530 | "Write a `try`/`except` to catch the error generated from the following code and print \"I caught you!\". Make sure you catch the specific error being caused, this is typically better practice than just catching all errors!" 531 | ] 532 | }, 533 | { 534 | "cell_type": "code", 535 | "execution_count": 15, 536 | "metadata": {}, 537 | "outputs": [ 538 | { 539 | "name": "stdout", 540 | "output_type": "stream", 541 | "text": [ 542 | "I caught you!\n" 543 | ] 544 | } 545 | ], 546 | "source": [ 547 | "try:\n", 548 | " 5 / 0\n", 549 | "except ZeroDivisionError:\n", 550 | " print(\"I caught you!\")" 551 | ] 552 | }, 553 | { 554 | "cell_type": "markdown", 555 | "metadata": {}, 556 | "source": [ 557 | "### 8." 558 | ] 559 | }, 560 | { 561 | "cell_type": "markdown", 562 | "metadata": {}, 563 | "source": [ 564 | "Create a function `lucky_sum()` that takes all the integers a user enters and returns their sum. *However*, if one of the values is 13 then it does not count towards the sum, nor do any values to its right.\n", 565 | "\n", 566 | "For example, your function should behave as follows:\n", 567 | "\n", 568 | "```python\n", 569 | "lucky_sum(1, 2, 3, 4)\n", 570 | "10\n", 571 | "\n", 572 | "lucky_sum(1, 13, 3, 4)\n", 573 | "1\n", 574 | "\n", 575 | "lucky_sum(13)\n", 576 | "0\n", 577 | "```\n", 578 | "\n", 579 | "*This example is inspired by the related [codingbat challenge](https://codingbat.com/prob/p130788).*" 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": 16, 585 | "metadata": {}, 586 | "outputs": [], 587 | "source": [ 588 | "def lucky_sum(*args):\n", 589 | " if 13 in args:\n", 590 | " return sum(args[:args.index(13)])\n", 591 | " return sum(args)" 592 | ] 593 | } 594 | ], 595 | "metadata": { 596 | "kernelspec": { 597 | "display_name": "Python 3", 598 | "language": "python", 599 | "name": "python3" 600 | }, 601 | "language_info": { 602 | "codemirror_mode": { 603 | "name": "ipython", 604 | "version": 3 605 | }, 606 | "file_extension": ".py", 607 | "mimetype": "text/x-python", 608 | "name": "python", 609 | "nbconvert_exporter": "python", 610 | "pygments_lexer": "ipython3", 611 | "version": "3.7.8" 612 | } 613 | }, 614 | "nbformat": 4, 615 | "nbformat_minor": 4 616 | } 617 | -------------------------------------------------------------------------------- /practice-exercises/chapter1-basics-practice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "![](../docs/banner.png)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "# Python Basics\n", 15 | "\n", 16 | "**Tomas Beuzen, September 2020**" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "These exercises complement [Chapter 1](../chapters/chapter1-basics.ipynb)." 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "## Exercises" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "### 1." 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "What is 5 to the power of 5?" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 1, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "# Your answer here." 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "### 2." 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "What is the remainder from dividing 73 by 6?" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 2, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "# Your answer here." 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "### 3." 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "How many times does the whole number 3 go into 123? What is the remainder of dividing 123 by 3?" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 3, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "# Your answer here." 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "### 4." 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "Split the following string into a list by splitting on the space character:" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 4, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "s = \"MDS is going virtual!\"\n", 123 | "\n", 124 | "# Your answer here." 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "### 5." 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "Given the following variables:\n", 139 | "\n", 140 | "```\n", 141 | "thing = \"light\"\n", 142 | "speed = 299792458 # m/s\n", 143 | "```\n", 144 | "\n", 145 | "Use f-strings to print:\n", 146 | "\n", 147 | "```\n", 148 | "The speed of light is 2.997925e+08 m/s.\n", 149 | "```" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 5, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "# Your answer here." 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "### 6." 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "Given this nested list, use indexing to grab the word \"MDS\":" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 6, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "l = [10, [3, 4], [5, [100, 200, [\"MDS\"]], 23, 11], 1, 7]\n", 182 | "\n", 183 | "# Your answer here." 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "### 7." 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "Given this nest dictionary grab the word \"MDS\":" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 7, 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "d = {\n", 207 | " \"outer\": [\n", 208 | " 1,\n", 209 | " 2,\n", 210 | " 3,\n", 211 | " {\"inner\": [\"this\", \"is\", \"inception\", {\"inner_inner\": [1, 2, 3, \"MDS\"]}]},\n", 212 | " ]\n", 213 | "}\n", 214 | "\n", 215 | "# Your answer here." 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "### 8." 223 | ] 224 | }, 225 | { 226 | "cell_type": "markdown", 227 | "metadata": {}, 228 | "source": [ 229 | "Why does the following cell return an error?" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 8, 235 | "metadata": { 236 | "tags": [ 237 | "raises-exception" 238 | ] 239 | }, 240 | "outputs": [ 241 | { 242 | "ename": "TypeError", 243 | "evalue": "'tuple' object does not support item assignment", 244 | "output_type": "error", 245 | "traceback": [ 246 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 247 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", 248 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m4\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m6\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 249 | "\u001b[0;31mTypeError\u001b[0m: 'tuple' object does not support item assignment" 250 | ] 251 | } 252 | ], 253 | "source": [ 254 | "t = (1, 2, 3, 4, 5)\n", 255 | "t[-1] = 6" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "metadata": {}, 261 | "source": [ 262 | "### 9." 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "Use string methods to extract the website domain from an email, e.g., from the string `\"tomas.beuzen@fakemail.com\"`, you should extract `\"fakemail\"`." 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 9, 275 | "metadata": {}, 276 | "outputs": [], 277 | "source": [ 278 | "email = \"tomas.beuzen@fakemail.com\"\n", 279 | "\n", 280 | "# Your answer here." 281 | ] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "metadata": {}, 286 | "source": [ 287 | "### 10." 288 | ] 289 | }, 290 | { 291 | "cell_type": "markdown", 292 | "metadata": {}, 293 | "source": [ 294 | "Given the variable `language` which contains a string, use `if/elif/else` to write a program that:\n", 295 | "- return \"I love snakes!\" if `language` is `\"python\"` (any kind of capitalization)\n", 296 | "- return \"Are you a pirate?\" if `language` is `\"R\"` (any kind of capitalization)\n", 297 | "- else return \"What is `language`?\" if `language` is anything else." 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 10, 303 | "metadata": {}, 304 | "outputs": [], 305 | "source": [ 306 | "language = \"python\"\n", 307 | "\n", 308 | "# Your answer here." 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "metadata": {}, 314 | "source": [ 315 | "
\n", 316 | "
\n", 317 | "
" 318 | ] 319 | }, 320 | { 321 | "cell_type": "markdown", 322 | "metadata": {}, 323 | "source": [ 324 | "## Solutions" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "### 1." 332 | ] 333 | }, 334 | { 335 | "cell_type": "markdown", 336 | "metadata": {}, 337 | "source": [ 338 | "What is 5 to the power of 5?" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": 11, 344 | "metadata": {}, 345 | "outputs": [ 346 | { 347 | "data": { 348 | "text/plain": [ 349 | "3125" 350 | ] 351 | }, 352 | "execution_count": 11, 353 | "metadata": {}, 354 | "output_type": "execute_result" 355 | } 356 | ], 357 | "source": [ 358 | "5 ** 5" 359 | ] 360 | }, 361 | { 362 | "cell_type": "markdown", 363 | "metadata": {}, 364 | "source": [ 365 | "### 2." 366 | ] 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "metadata": {}, 371 | "source": [ 372 | "What is the remainder from dividing 73 by 6?" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": 12, 378 | "metadata": {}, 379 | "outputs": [ 380 | { 381 | "data": { 382 | "text/plain": [ 383 | "1" 384 | ] 385 | }, 386 | "execution_count": 12, 387 | "metadata": {}, 388 | "output_type": "execute_result" 389 | } 390 | ], 391 | "source": [ 392 | "73 % 6" 393 | ] 394 | }, 395 | { 396 | "cell_type": "markdown", 397 | "metadata": {}, 398 | "source": [ 399 | "### 3." 400 | ] 401 | }, 402 | { 403 | "cell_type": "markdown", 404 | "metadata": {}, 405 | "source": [ 406 | "How many times does the whole number 3 go into 123? What is the remainder of dividing 123 by 3?" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": 13, 412 | "metadata": {}, 413 | "outputs": [ 414 | { 415 | "name": "stdout", 416 | "output_type": "stream", 417 | "text": [ 418 | "411\n", 419 | "1\n" 420 | ] 421 | } 422 | ], 423 | "source": [ 424 | "print(1234 // 3)\n", 425 | "print(1234 % 3)" 426 | ] 427 | }, 428 | { 429 | "cell_type": "markdown", 430 | "metadata": {}, 431 | "source": [ 432 | "### 4." 433 | ] 434 | }, 435 | { 436 | "cell_type": "markdown", 437 | "metadata": {}, 438 | "source": [ 439 | "Split this string on the space character into a list:\n", 440 | "\n", 441 | "```\n", 442 | "s = \"MDS is going virtual!\"\n", 443 | "```" 444 | ] 445 | }, 446 | { 447 | "cell_type": "code", 448 | "execution_count": 14, 449 | "metadata": {}, 450 | "outputs": [ 451 | { 452 | "data": { 453 | "text/plain": [ 454 | "['MDS', 'is', 'going', 'virtual!']" 455 | ] 456 | }, 457 | "execution_count": 14, 458 | "metadata": {}, 459 | "output_type": "execute_result" 460 | } 461 | ], 462 | "source": [ 463 | "s = \"MDS is going virtual!\"\n", 464 | "s.split()" 465 | ] 466 | }, 467 | { 468 | "cell_type": "markdown", 469 | "metadata": {}, 470 | "source": [ 471 | "### 5." 472 | ] 473 | }, 474 | { 475 | "cell_type": "markdown", 476 | "metadata": {}, 477 | "source": [ 478 | "Given the following variables:\n", 479 | "\n", 480 | "```\n", 481 | "thing = \"light\"\n", 482 | "speed = 299792458 # m/s\n", 483 | "```\n", 484 | "\n", 485 | "Use f-strings to print:\n", 486 | "\n", 487 | "```\n", 488 | "The speed of light is 2.997925e+08 m/s.\n", 489 | "```" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": 15, 495 | "metadata": {}, 496 | "outputs": [ 497 | { 498 | "name": "stdout", 499 | "output_type": "stream", 500 | "text": [ 501 | "The speed of light is 2.997925e+08 m/s.\n" 502 | ] 503 | } 504 | ], 505 | "source": [ 506 | "thing = \"light\"\n", 507 | "speed = 299792458 # m/s\n", 508 | "print(f\"The speed of {thing} is {speed:2e} m/s.\")" 509 | ] 510 | }, 511 | { 512 | "cell_type": "markdown", 513 | "metadata": {}, 514 | "source": [ 515 | "### 6." 516 | ] 517 | }, 518 | { 519 | "cell_type": "markdown", 520 | "metadata": {}, 521 | "source": [ 522 | "Given this nested list, use indexing to grab the word \"MDS\":" 523 | ] 524 | }, 525 | { 526 | "cell_type": "code", 527 | "execution_count": 16, 528 | "metadata": {}, 529 | "outputs": [], 530 | "source": [ 531 | "l = [10,[3,4],[5,[100,200,['MDS']],23,11],1,7]" 532 | ] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "execution_count": 17, 537 | "metadata": {}, 538 | "outputs": [ 539 | { 540 | "data": { 541 | "text/plain": [ 542 | "['MDS']" 543 | ] 544 | }, 545 | "execution_count": 17, 546 | "metadata": {}, 547 | "output_type": "execute_result" 548 | } 549 | ], 550 | "source": [ 551 | "l[2][1][2]" 552 | ] 553 | }, 554 | { 555 | "cell_type": "markdown", 556 | "metadata": {}, 557 | "source": [ 558 | "### 7." 559 | ] 560 | }, 561 | { 562 | "cell_type": "markdown", 563 | "metadata": {}, 564 | "source": [ 565 | "Given this nest dictionary grab the word \"MDS\":" 566 | ] 567 | }, 568 | { 569 | "cell_type": "code", 570 | "execution_count": 18, 571 | "metadata": {}, 572 | "outputs": [], 573 | "source": [ 574 | "d = {\n", 575 | " \"outer\": [\n", 576 | " 1,\n", 577 | " 2,\n", 578 | " 3,\n", 579 | " {\"inner\": [\"this\", \"is\", \"inception\", {\"inner_inner\": [1, 2, 3, \"MDS\"]}]},\n", 580 | " ]\n", 581 | "}" 582 | ] 583 | }, 584 | { 585 | "cell_type": "code", 586 | "execution_count": 19, 587 | "metadata": {}, 588 | "outputs": [ 589 | { 590 | "data": { 591 | "text/plain": [ 592 | "'MDS'" 593 | ] 594 | }, 595 | "execution_count": 19, 596 | "metadata": {}, 597 | "output_type": "execute_result" 598 | } 599 | ], 600 | "source": [ 601 | "d['outer'][3]['inner'][3]['inner_inner'][3]" 602 | ] 603 | }, 604 | { 605 | "cell_type": "markdown", 606 | "metadata": {}, 607 | "source": [ 608 | "### 8." 609 | ] 610 | }, 611 | { 612 | "cell_type": "markdown", 613 | "metadata": {}, 614 | "source": [ 615 | "Why does the following cell return an error?" 616 | ] 617 | }, 618 | { 619 | "cell_type": "code", 620 | "execution_count": 20, 621 | "metadata": { 622 | "tags": [ 623 | "raises-exception" 624 | ] 625 | }, 626 | "outputs": [ 627 | { 628 | "ename": "TypeError", 629 | "evalue": "'tuple' object does not support item assignment", 630 | "output_type": "error", 631 | "traceback": [ 632 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 633 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", 634 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m4\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m6\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 635 | "\u001b[0;31mTypeError\u001b[0m: 'tuple' object does not support item assignment" 636 | ] 637 | } 638 | ], 639 | "source": [ 640 | "t = (1, 2, 3, 4, 5)\n", 641 | "t[-1] = 6" 642 | ] 643 | }, 644 | { 645 | "cell_type": "markdown", 646 | "metadata": {}, 647 | "source": [ 648 | "Because tuples are immutable!" 649 | ] 650 | }, 651 | { 652 | "cell_type": "markdown", 653 | "metadata": {}, 654 | "source": [ 655 | "### 9." 656 | ] 657 | }, 658 | { 659 | "cell_type": "markdown", 660 | "metadata": {}, 661 | "source": [ 662 | "Use string methods to extract the website domain from an email, e.g., from the string `\"tomas.beuzen@fakemail.com\"`, you should extract `\"fakemail\"`." 663 | ] 664 | }, 665 | { 666 | "cell_type": "code", 667 | "execution_count": 21, 668 | "metadata": {}, 669 | "outputs": [ 670 | { 671 | "data": { 672 | "text/plain": [ 673 | "'fakemail'" 674 | ] 675 | }, 676 | "execution_count": 21, 677 | "metadata": {}, 678 | "output_type": "execute_result" 679 | } 680 | ], 681 | "source": [ 682 | "email = \"tomas.beuzen@fakemail.com\"\n", 683 | "email.split(\"@\")[-1].split(\".com\")[0]" 684 | ] 685 | }, 686 | { 687 | "cell_type": "markdown", 688 | "metadata": {}, 689 | "source": [ 690 | "### 10." 691 | ] 692 | }, 693 | { 694 | "cell_type": "markdown", 695 | "metadata": {}, 696 | "source": [ 697 | "Given the variable `language` which contains a string, use `if/elif/else` to write a program that:\n", 698 | "- return \"I love snakes!\" if `language` is `\"python\"` (any kind of capitalization)\n", 699 | "- return \"Are you a pirate?\" if `language` is `\"R\"` (any kind of capitalization)\n", 700 | "- else return \"What is `language`?\" if `language` is anything else." 701 | ] 702 | }, 703 | { 704 | "cell_type": "code", 705 | "execution_count": 22, 706 | "metadata": {}, 707 | "outputs": [ 708 | { 709 | "name": "stdout", 710 | "output_type": "stream", 711 | "text": [ 712 | "I love snakes!\n" 713 | ] 714 | } 715 | ], 716 | "source": [ 717 | "language = \"python\"\n", 718 | "if language.lower() == \"python\":\n", 719 | " print(\"I love snakes!\")\n", 720 | "elif language.lower() == \"r\":\n", 721 | " print(\"Are you a pirate?\")\n", 722 | "else:\n", 723 | " print(f\"What is {language}?\")" 724 | ] 725 | } 726 | ], 727 | "metadata": { 728 | "kernelspec": { 729 | "display_name": "Python 3", 730 | "language": "python", 731 | "name": "python3" 732 | }, 733 | "language_info": { 734 | "codemirror_mode": { 735 | "name": "ipython", 736 | "version": 3 737 | }, 738 | "file_extension": ".py", 739 | "mimetype": "text/x-python", 740 | "name": "python", 741 | "nbconvert_exporter": "python", 742 | "pygments_lexer": "ipython3", 743 | "version": "3.7.8" 744 | } 745 | }, 746 | "nbformat": 4, 747 | "nbformat_minor": 4 748 | } 749 | -------------------------------------------------------------------------------- /practice-exercises/chapter5-numpy-practice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "![](../docs/banner.png)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "# NumPy\n", 15 | "\n", 16 | "**Tomas Beuzen, September 2020**" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "These exercises complement [Chapter 5](../chapters/chapter5-numpy.ipynb) and [Chapter 6](../chapters/chapter6-numpy-addendum.ipynb)." 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "## Exercises" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "### 1." 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "Import numpy under the alias `np`." 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 1, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "# Your answer here." 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "### 2." 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "Create the following arrays:\n", 68 | "\n", 69 | "1. Create an array of 5 zeros.\n", 70 | "2. Create an array of 10 ones.\n", 71 | "3. Create an array of 5 3.141s.\n", 72 | "4. Create an array of the integers 1 to 20.\n", 73 | "5. Create a 5 x 5 matrix of ones with a dtype `int`." 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 2, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "# Your answer here." 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "### 3." 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "Use numpy to:\n", 97 | "1. Create an 3D matrix of 3 x 3 x 3 full of random numbers drawn from a standard normal distribution (hint: `np.random.randn()`)\n", 98 | "2. Reshape the above array into shape (27,)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 3, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "# Your answer here." 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "### 4." 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "Create an array of 20 linearly spaced numbers between 1 and 10." 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 4, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "# Your answer here." 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "### 5." 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "Run the following code to create an array of shape 4 x 4 and then use indexing to produce the outputs shown below." 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 5, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "import numpy as np\n", 154 | "a = np.arange(1, 26).reshape(5, -1)" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "```python\n", 162 | "20\n", 163 | "```" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 6, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "# Your answer here." 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "```python\n", 180 | "array([[ 9, 10],\n", 181 | " [14, 15],\n", 182 | " [19, 20],\n", 183 | " [24, 25]])\n", 184 | "```" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 7, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "# Your answer here." 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": {}, 199 | "source": [ 200 | "```python\n", 201 | "array([ 6, 7, 8, 9, 10])\n", 202 | "```" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 8, 208 | "metadata": {}, 209 | "outputs": [], 210 | "source": [ 211 | "# Your answer here." 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "```python\n", 219 | "array([[11, 12, 13, 14, 15],\n", 220 | " [16, 17, 18, 19, 20]])\n", 221 | "```" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 9, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "# Your answer here." 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "```python\n", 238 | "array([[ 8, 9],\n", 239 | " [13, 14]])\n", 240 | "```" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 10, 246 | "metadata": {}, 247 | "outputs": [], 248 | "source": [ 249 | "# Your answer here." 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "metadata": {}, 255 | "source": [ 256 | "### 6." 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": {}, 262 | "source": [ 263 | "Calculate the sum of all the numbers in `a`." 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 11, 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "# Your answer here." 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "### 7." 280 | ] 281 | }, 282 | { 283 | "cell_type": "markdown", 284 | "metadata": {}, 285 | "source": [ 286 | "Calculate the sum of each row in `a`." 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 12, 292 | "metadata": {}, 293 | "outputs": [], 294 | "source": [ 295 | "# Your answer here." 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": [ 302 | "### 8." 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "Extract all values of `a` greater than the mean of `a` (hint: use a boolean mask)." 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": 13, 315 | "metadata": {}, 316 | "outputs": [], 317 | "source": [ 318 | "# Your answer here." 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": [ 325 | "### 9." 326 | ] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": {}, 331 | "source": [ 332 | "Find the location of the minimum value in the following array `b`:" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 14, 338 | "metadata": {}, 339 | "outputs": [ 340 | { 341 | "data": { 342 | "text/plain": [ 343 | "array([-1.0856306 , 0.99734545, 0.2829785 , -1.50629471, -0.57860025,\n", 344 | " 1.65143654, -2.42667924, -0.42891263, 1.26593626, -0.8667404 ])" 345 | ] 346 | }, 347 | "execution_count": 14, 348 | "metadata": {}, 349 | "output_type": "execute_result" 350 | } 351 | ], 352 | "source": [ 353 | "np.random.seed(123)\n", 354 | "b = np.random.randn(10)\n", 355 | "b" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 15, 361 | "metadata": {}, 362 | "outputs": [], 363 | "source": [ 364 | "# Your answer here." 365 | ] 366 | }, 367 | { 368 | "cell_type": "markdown", 369 | "metadata": {}, 370 | "source": [ 371 | "### 10." 372 | ] 373 | }, 374 | { 375 | "cell_type": "markdown", 376 | "metadata": {}, 377 | "source": [ 378 | "Find the location of the maximum value in the following 2D array `c` (hint: there are many ways to do this question, but a quick search on stackoverflow.com will typically help you find the optimum solution for a problem, for example see [post](https://stackoverflow.com/questions/3584243/get-the-position-of-the-biggest-item-in-a-multi-dimensional-numpy-array)):" 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": 16, 384 | "metadata": {}, 385 | "outputs": [ 386 | { 387 | "data": { 388 | "text/plain": [ 389 | "array([[-1.0856306 , 0.99734545],\n", 390 | " [ 0.2829785 , -1.50629471],\n", 391 | " [-0.57860025, 1.65143654]])" 392 | ] 393 | }, 394 | "execution_count": 16, 395 | "metadata": {}, 396 | "output_type": "execute_result" 397 | } 398 | ], 399 | "source": [ 400 | "np.random.seed(123)\n", 401 | "c = np.random.randn(3, 2)\n", 402 | "c" 403 | ] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "execution_count": 17, 408 | "metadata": {}, 409 | "outputs": [], 410 | "source": [ 411 | "# Your answer here." 412 | ] 413 | }, 414 | { 415 | "cell_type": "markdown", 416 | "metadata": {}, 417 | "source": [ 418 | "
\n", 419 | "
\n", 420 | "
" 421 | ] 422 | }, 423 | { 424 | "cell_type": "markdown", 425 | "metadata": {}, 426 | "source": [ 427 | "## Solutions" 428 | ] 429 | }, 430 | { 431 | "cell_type": "markdown", 432 | "metadata": {}, 433 | "source": [ 434 | "### 1." 435 | ] 436 | }, 437 | { 438 | "cell_type": "markdown", 439 | "metadata": {}, 440 | "source": [ 441 | "Import numpy under the alias `np`." 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": 18, 447 | "metadata": {}, 448 | "outputs": [], 449 | "source": [ 450 | "import numpy as np" 451 | ] 452 | }, 453 | { 454 | "cell_type": "markdown", 455 | "metadata": {}, 456 | "source": [ 457 | "### 2." 458 | ] 459 | }, 460 | { 461 | "cell_type": "markdown", 462 | "metadata": {}, 463 | "source": [ 464 | "Create the following arrays:\n", 465 | "\n", 466 | "1. Create an array of 5 zeros.\n", 467 | "2. Create an array of 10 ones.\n", 468 | "3. Create an array of 5 3.141s.\n", 469 | "4. Create an array of the integers 1 to 20.\n", 470 | "5. Create a 5 x 5 matrix of ones with a dtype `int`." 471 | ] 472 | }, 473 | { 474 | "cell_type": "code", 475 | "execution_count": 19, 476 | "metadata": {}, 477 | "outputs": [ 478 | { 479 | "name": "stdout", 480 | "output_type": "stream", 481 | "text": [ 482 | "[0. 0. 0. 0. 0.]\n", 483 | "[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n", 484 | "[3.141 3.141 3.141 3.141 3.141]\n", 485 | "[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20]\n", 486 | "[[1 1 1 1 1]\n", 487 | " [1 1 1 1 1]\n", 488 | " [1 1 1 1 1]\n", 489 | " [1 1 1 1 1]\n", 490 | " [1 1 1 1 1]]\n" 491 | ] 492 | } 493 | ], 494 | "source": [ 495 | "print(np.zeros(5))\n", 496 | "print(np.ones(10))\n", 497 | "print(np.full(5, 3.141))\n", 498 | "print(np.array(range(21)))\n", 499 | "print(np.ones((5, 5), dtype=int))" 500 | ] 501 | }, 502 | { 503 | "cell_type": "markdown", 504 | "metadata": {}, 505 | "source": [ 506 | "### 3." 507 | ] 508 | }, 509 | { 510 | "cell_type": "markdown", 511 | "metadata": {}, 512 | "source": [ 513 | "Use numpy to:\n", 514 | "1. Create an 3D matrix of 3 x 3 x 3 full of random numbers drawn from a standard normal distribution (hint: `np.random.randn()`)\n", 515 | "2. Reshape the above array into shape (27,)" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": 20, 521 | "metadata": {}, 522 | "outputs": [ 523 | { 524 | "data": { 525 | "text/plain": [ 526 | "array([[[-2.42667924, -0.42891263, 1.26593626],\n", 527 | " [-0.8667404 , -0.67888615, -0.09470897],\n", 528 | " [ 1.49138963, -0.638902 , -0.44398196]],\n", 529 | "\n", 530 | " [[-0.43435128, 2.20593008, 2.18678609],\n", 531 | " [ 1.0040539 , 0.3861864 , 0.73736858],\n", 532 | " [ 1.49073203, -0.93583387, 1.17582904]],\n", 533 | "\n", 534 | " [[-1.25388067, -0.6377515 , 0.9071052 ],\n", 535 | " [-1.4286807 , -0.14006872, -0.8617549 ],\n", 536 | " [-0.25561937, -2.79858911, -1.7715331 ]]])" 537 | ] 538 | }, 539 | "execution_count": 20, 540 | "metadata": {}, 541 | "output_type": "execute_result" 542 | } 543 | ], 544 | "source": [ 545 | "x = np.random.randn(3, 3, 3)\n", 546 | "x" 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": 21, 552 | "metadata": {}, 553 | "outputs": [ 554 | { 555 | "data": { 556 | "text/plain": [ 557 | "array([-2.42667924, -0.42891263, 1.26593626, -0.8667404 , -0.67888615,\n", 558 | " -0.09470897, 1.49138963, -0.638902 , -0.44398196, -0.43435128,\n", 559 | " 2.20593008, 2.18678609, 1.0040539 , 0.3861864 , 0.73736858,\n", 560 | " 1.49073203, -0.93583387, 1.17582904, -1.25388067, -0.6377515 ,\n", 561 | " 0.9071052 , -1.4286807 , -0.14006872, -0.8617549 , -0.25561937,\n", 562 | " -2.79858911, -1.7715331 ])" 563 | ] 564 | }, 565 | "execution_count": 21, 566 | "metadata": {}, 567 | "output_type": "execute_result" 568 | } 569 | ], 570 | "source": [ 571 | "x.reshape(-1) # or x.reshape(27)" 572 | ] 573 | }, 574 | { 575 | "cell_type": "markdown", 576 | "metadata": {}, 577 | "source": [ 578 | "### 4." 579 | ] 580 | }, 581 | { 582 | "cell_type": "markdown", 583 | "metadata": {}, 584 | "source": [ 585 | "Create an array of 20 linearly spaced numbers between 1 and 10." 586 | ] 587 | }, 588 | { 589 | "cell_type": "code", 590 | "execution_count": 22, 591 | "metadata": {}, 592 | "outputs": [ 593 | { 594 | "data": { 595 | "text/plain": [ 596 | "array([ 1. , 1.47368421, 1.94736842, 2.42105263, 2.89473684,\n", 597 | " 3.36842105, 3.84210526, 4.31578947, 4.78947368, 5.26315789,\n", 598 | " 5.73684211, 6.21052632, 6.68421053, 7.15789474, 7.63157895,\n", 599 | " 8.10526316, 8.57894737, 9.05263158, 9.52631579, 10. ])" 600 | ] 601 | }, 602 | "execution_count": 22, 603 | "metadata": {}, 604 | "output_type": "execute_result" 605 | } 606 | ], 607 | "source": [ 608 | "np.linspace(1, 10, 20)" 609 | ] 610 | }, 611 | { 612 | "cell_type": "markdown", 613 | "metadata": {}, 614 | "source": [ 615 | "### 5." 616 | ] 617 | }, 618 | { 619 | "cell_type": "markdown", 620 | "metadata": {}, 621 | "source": [ 622 | "Below I've defined an array of shape 4 x 4. Use indexing to procude the given outputs." 623 | ] 624 | }, 625 | { 626 | "cell_type": "code", 627 | "execution_count": 23, 628 | "metadata": {}, 629 | "outputs": [ 630 | { 631 | "data": { 632 | "text/plain": [ 633 | "array([[ 1, 2, 3, 4, 5],\n", 634 | " [ 6, 7, 8, 9, 10],\n", 635 | " [11, 12, 13, 14, 15],\n", 636 | " [16, 17, 18, 19, 20],\n", 637 | " [21, 22, 23, 24, 25]])" 638 | ] 639 | }, 640 | "execution_count": 23, 641 | "metadata": {}, 642 | "output_type": "execute_result" 643 | } 644 | ], 645 | "source": [ 646 | "a = np.arange(1, 26).reshape(5, -1)\n", 647 | "a" 648 | ] 649 | }, 650 | { 651 | "cell_type": "markdown", 652 | "metadata": {}, 653 | "source": [ 654 | "```python\n", 655 | "20\n", 656 | "```" 657 | ] 658 | }, 659 | { 660 | "cell_type": "code", 661 | "execution_count": 24, 662 | "metadata": {}, 663 | "outputs": [ 664 | { 665 | "data": { 666 | "text/plain": [ 667 | "20" 668 | ] 669 | }, 670 | "execution_count": 24, 671 | "metadata": {}, 672 | "output_type": "execute_result" 673 | } 674 | ], 675 | "source": [ 676 | "a[3,4]" 677 | ] 678 | }, 679 | { 680 | "cell_type": "markdown", 681 | "metadata": {}, 682 | "source": [ 683 | "```python\n", 684 | "array([[ 9, 10],\n", 685 | " [14, 15],\n", 686 | " [19, 20],\n", 687 | " [24, 25]])\n", 688 | "```" 689 | ] 690 | }, 691 | { 692 | "cell_type": "code", 693 | "execution_count": 25, 694 | "metadata": {}, 695 | "outputs": [ 696 | { 697 | "data": { 698 | "text/plain": [ 699 | "array([[ 9, 10],\n", 700 | " [14, 15],\n", 701 | " [19, 20],\n", 702 | " [24, 25]])" 703 | ] 704 | }, 705 | "execution_count": 25, 706 | "metadata": {}, 707 | "output_type": "execute_result" 708 | } 709 | ], 710 | "source": [ 711 | "a[1:,3:]" 712 | ] 713 | }, 714 | { 715 | "cell_type": "markdown", 716 | "metadata": {}, 717 | "source": [ 718 | "```python\n", 719 | "array([ 6, 7, 8, 9, 10])\n", 720 | "```" 721 | ] 722 | }, 723 | { 724 | "cell_type": "code", 725 | "execution_count": 26, 726 | "metadata": {}, 727 | "outputs": [ 728 | { 729 | "data": { 730 | "text/plain": [ 731 | "array([ 6, 7, 8, 9, 10])" 732 | ] 733 | }, 734 | "execution_count": 26, 735 | "metadata": {}, 736 | "output_type": "execute_result" 737 | } 738 | ], 739 | "source": [ 740 | "a[1,:]" 741 | ] 742 | }, 743 | { 744 | "cell_type": "markdown", 745 | "metadata": {}, 746 | "source": [ 747 | "```python\n", 748 | "array([[11, 12, 13, 14, 15],\n", 749 | " [16, 17, 18, 19, 20]])\n", 750 | "```" 751 | ] 752 | }, 753 | { 754 | "cell_type": "markdown", 755 | "metadata": {}, 756 | "source": [ 757 | "```python\n", 758 | "array([[ 8, 9],\n", 759 | " [13, 14]])\n", 760 | "```" 761 | ] 762 | }, 763 | { 764 | "cell_type": "code", 765 | "execution_count": 27, 766 | "metadata": {}, 767 | "outputs": [ 768 | { 769 | "data": { 770 | "text/plain": [ 771 | "array([[ 8, 9],\n", 772 | " [13, 14]])" 773 | ] 774 | }, 775 | "execution_count": 27, 776 | "metadata": {}, 777 | "output_type": "execute_result" 778 | } 779 | ], 780 | "source": [ 781 | "a[1:3,2:4]" 782 | ] 783 | }, 784 | { 785 | "cell_type": "markdown", 786 | "metadata": {}, 787 | "source": [ 788 | "### 6." 789 | ] 790 | }, 791 | { 792 | "cell_type": "markdown", 793 | "metadata": {}, 794 | "source": [ 795 | "Calculate the sum of all the numbers in `a`." 796 | ] 797 | }, 798 | { 799 | "cell_type": "code", 800 | "execution_count": 28, 801 | "metadata": {}, 802 | "outputs": [ 803 | { 804 | "data": { 805 | "text/plain": [ 806 | "325" 807 | ] 808 | }, 809 | "execution_count": 28, 810 | "metadata": {}, 811 | "output_type": "execute_result" 812 | } 813 | ], 814 | "source": [ 815 | "a.sum()" 816 | ] 817 | }, 818 | { 819 | "cell_type": "markdown", 820 | "metadata": {}, 821 | "source": [ 822 | "### 7." 823 | ] 824 | }, 825 | { 826 | "cell_type": "markdown", 827 | "metadata": {}, 828 | "source": [ 829 | "Calculate the sum of each row in `a`." 830 | ] 831 | }, 832 | { 833 | "cell_type": "code", 834 | "execution_count": 29, 835 | "metadata": {}, 836 | "outputs": [ 837 | { 838 | "data": { 839 | "text/plain": [ 840 | "array([ 15, 40, 65, 90, 115])" 841 | ] 842 | }, 843 | "execution_count": 29, 844 | "metadata": {}, 845 | "output_type": "execute_result" 846 | } 847 | ], 848 | "source": [ 849 | "a.sum(axis=1)" 850 | ] 851 | }, 852 | { 853 | "cell_type": "markdown", 854 | "metadata": {}, 855 | "source": [ 856 | "### 8." 857 | ] 858 | }, 859 | { 860 | "cell_type": "markdown", 861 | "metadata": {}, 862 | "source": [ 863 | "Extract all values of `a` greater than the mean of `a` (hint: use a boolean mask)." 864 | ] 865 | }, 866 | { 867 | "cell_type": "code", 868 | "execution_count": 30, 869 | "metadata": {}, 870 | "outputs": [ 871 | { 872 | "data": { 873 | "text/plain": [ 874 | "array([14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25])" 875 | ] 876 | }, 877 | "execution_count": 30, 878 | "metadata": {}, 879 | "output_type": "execute_result" 880 | } 881 | ], 882 | "source": [ 883 | "a[a > a.mean()]" 884 | ] 885 | }, 886 | { 887 | "cell_type": "markdown", 888 | "metadata": {}, 889 | "source": [ 890 | "### 9." 891 | ] 892 | }, 893 | { 894 | "cell_type": "markdown", 895 | "metadata": {}, 896 | "source": [ 897 | "Find the location of the minimum value in the following array `b`:" 898 | ] 899 | }, 900 | { 901 | "cell_type": "code", 902 | "execution_count": 31, 903 | "metadata": {}, 904 | "outputs": [ 905 | { 906 | "data": { 907 | "text/plain": [ 908 | "array([-1.0856306 , 0.99734545, 0.2829785 , -1.50629471, -0.57860025,\n", 909 | " 1.65143654, -2.42667924, -0.42891263, 1.26593626, -0.8667404 ])" 910 | ] 911 | }, 912 | "execution_count": 31, 913 | "metadata": {}, 914 | "output_type": "execute_result" 915 | } 916 | ], 917 | "source": [ 918 | "np.random.seed(123)\n", 919 | "b = np.random.randn(10)\n", 920 | "b" 921 | ] 922 | }, 923 | { 924 | "cell_type": "code", 925 | "execution_count": 32, 926 | "metadata": {}, 927 | "outputs": [ 928 | { 929 | "data": { 930 | "text/plain": [ 931 | "6" 932 | ] 933 | }, 934 | "execution_count": 32, 935 | "metadata": {}, 936 | "output_type": "execute_result" 937 | } 938 | ], 939 | "source": [ 940 | "b.argmin()" 941 | ] 942 | }, 943 | { 944 | "cell_type": "markdown", 945 | "metadata": {}, 946 | "source": [ 947 | "### 10." 948 | ] 949 | }, 950 | { 951 | "cell_type": "markdown", 952 | "metadata": {}, 953 | "source": [ 954 | "Find the location of the maximum value in the following 2D array `c` (hint: there are many ways to do this question, but a quick search on stackoverflow.com will typically help you find the optimum solution for a problem, for example see [post](https://stackoverflow.com/questions/3584243/get-the-position-of-the-biggest-item-in-a-multi-dimensional-numpy-array)):" 955 | ] 956 | }, 957 | { 958 | "cell_type": "code", 959 | "execution_count": 33, 960 | "metadata": {}, 961 | "outputs": [ 962 | { 963 | "data": { 964 | "text/plain": [ 965 | "array([[-1.0856306 , 0.99734545],\n", 966 | " [ 0.2829785 , -1.50629471],\n", 967 | " [-0.57860025, 1.65143654]])" 968 | ] 969 | }, 970 | "execution_count": 33, 971 | "metadata": {}, 972 | "output_type": "execute_result" 973 | } 974 | ], 975 | "source": [ 976 | "np.random.seed(123)\n", 977 | "c = np.random.randn(3, 2)\n", 978 | "c" 979 | ] 980 | }, 981 | { 982 | "cell_type": "code", 983 | "execution_count": 34, 984 | "metadata": {}, 985 | "outputs": [ 986 | { 987 | "name": "stdout", 988 | "output_type": "stream", 989 | "text": [ 990 | "Location of maximum: (2, 1)\n", 991 | " Value of maximum: 1.65\n" 992 | ] 993 | } 994 | ], 995 | "source": [ 996 | "print(f\"Location of maximum: {np.unravel_index(c.argmax(), c.shape)}\")\n", 997 | "print(f\" Value of maximum: {c.max():.2f}\")" 998 | ] 999 | } 1000 | ], 1001 | "metadata": { 1002 | "kernelspec": { 1003 | "display_name": "Python 3", 1004 | "language": "python", 1005 | "name": "python3" 1006 | }, 1007 | "language_info": { 1008 | "codemirror_mode": { 1009 | "name": "ipython", 1010 | "version": 3 1011 | }, 1012 | "file_extension": ".py", 1013 | "mimetype": "text/x-python", 1014 | "name": "python", 1015 | "nbconvert_exporter": "python", 1016 | "pygments_lexer": "ipython3", 1017 | "version": "3.7.8" 1018 | } 1019 | }, 1020 | "nbformat": 4, 1021 | "nbformat_minor": 4 1022 | } 1023 | -------------------------------------------------------------------------------- /practice-exercises/chapter7-pandas-practice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "![](../docs/banner.png)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "# Pandas\n", 15 | "\n", 16 | "**Tomas Beuzen, September 2020**" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "These exercises complement [Chapter 7](../chapters/chapter7-pandas.ipynb)." 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "## Exercises" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "### 1." 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "In this set of practice exercises we'll be investigating the carbon footprint of different foods. We'll be leveraging a dataset compiled by [Kasia Kulma](https://r-tastic.co.uk/post/from-messy-to-tidy/) and contributed to [R's Tidy Tuesday project](https://github.com/rfordatascience/tidytuesday).\n", 45 | "\n", 46 | "Start by importing pandas with the alias `pd`." 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 2, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "# Your answer here." 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "### 2." 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "The dataset we'll be working with has the following columns:\n", 70 | "\n", 71 | "|column |description |\n", 72 | "|:-------------|:-----------|\n", 73 | "|country | Country Name |\n", 74 | "|food_category | Food Category |\n", 75 | "|consumption | Consumption (kg/person/year) |\n", 76 | "|co2_emmission | Co2 Emission (Kg CO2/person/year) |\n", 77 | "\n", 78 | "\n", 79 | "Import the dataset as a dataframe named `df` from this url: " 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 4, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "# Your answer here." 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "### 3." 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "How many rows and columns are there in the dataframe?" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 5, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "# Your answer here." 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "### 4." 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "What is the type of data in each column of `df`?" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 6, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "# Your answer here." 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "### 5." 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "What is the mean `co2_emission` of the whole dataset?" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 6, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "# Your answer here." 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "### 6." 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "How many different kinds of foods are there in the dataset? How many countries are in the dataset?" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 8, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "# Your answer here." 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": {}, 186 | "source": [ 187 | "### 7." 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": {}, 193 | "source": [ 194 | "What is the maximum `co2_emmission` in the dataset and which food type and country does it belong to?" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 8, 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "# Your answer here." 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "### 8." 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | "How many countries produce more than 1000 Kg CO2/person/year for at least one food type?" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 8, 223 | "metadata": {}, 224 | "outputs": [], 225 | "source": [ 226 | "# Your answer here." 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "### 9." 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "Which country consumes the least amount of beef per person per year?" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 8, 246 | "metadata": {}, 247 | "outputs": [], 248 | "source": [ 249 | "# Your answer here." 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "metadata": {}, 255 | "source": [ 256 | "### 10." 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": {}, 262 | "source": [ 263 | "Which country consumes the most amount of soybeans per person per year?" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 8, 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "# Your answer here." 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "### 11." 280 | ] 281 | }, 282 | { 283 | "cell_type": "markdown", 284 | "metadata": {}, 285 | "source": [ 286 | "What is the total emissions of all the meat products (Pork, Poultry, Fish, Lamb & Goat, Beef) in the dataset combined?" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 8, 292 | "metadata": {}, 293 | "outputs": [], 294 | "source": [ 295 | "# Your answer here." 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": [ 302 | "### 12." 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "What is the total emissions of all other (non-meat) products in the dataset combined?" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": 8, 315 | "metadata": {}, 316 | "outputs": [], 317 | "source": [ 318 | "# Your answer here." 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": [ 325 | "
\n", 326 | "
\n", 327 | "
" 328 | ] 329 | }, 330 | { 331 | "cell_type": "markdown", 332 | "metadata": {}, 333 | "source": [ 334 | "## Solutions" 335 | ] 336 | }, 337 | { 338 | "cell_type": "markdown", 339 | "metadata": {}, 340 | "source": [ 341 | "### 1." 342 | ] 343 | }, 344 | { 345 | "cell_type": "markdown", 346 | "metadata": {}, 347 | "source": [ 348 | "In this set of practice exercises we'll be investigating the carbon footprint of different foods. We'll be leveraging a dataset compiled by [Kasia Kulma](https://r-tastic.co.uk/post/from-messy-to-tidy/) and contributed to [R's Tidy Tuesday project](https://github.com/rfordatascience/tidytuesday).\n", 349 | "\n", 350 | "Start by importing pandas with the alias `pd`." 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": 1, 356 | "metadata": {}, 357 | "outputs": [], 358 | "source": [ 359 | "import pandas as pd" 360 | ] 361 | }, 362 | { 363 | "cell_type": "markdown", 364 | "metadata": {}, 365 | "source": [ 366 | "### 2." 367 | ] 368 | }, 369 | { 370 | "cell_type": "markdown", 371 | "metadata": {}, 372 | "source": [ 373 | "The dataset we'll be working with has the following columns:\n", 374 | "\n", 375 | "|column |description |\n", 376 | "|:-------------|:-----------|\n", 377 | "|country | Country Name |\n", 378 | "|food_category | Food Category |\n", 379 | "|consumption | Consumption (kg/person/year) |\n", 380 | "|co2_emmission | Co2 Emission (Kg CO2/person/year) |\n", 381 | "\n", 382 | "\n", 383 | "Import the dataset as a dataframe named `df` from this url: " 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": 3, 389 | "metadata": {}, 390 | "outputs": [ 391 | { 392 | "data": { 393 | "text/html": [ 394 | "
\n", 395 | "\n", 408 | "\n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | "
countryfood_categoryconsumptionco2_emmission
0ArgentinaPork10.5137.20
1ArgentinaPoultry38.6641.53
2ArgentinaBeef55.481712.00
3ArgentinaLamb & Goat1.5654.63
4ArgentinaFish4.366.96
...............
1425BangladeshMilk - inc. cheese21.9131.21
1426BangladeshWheat and Wheat Products17.473.33
1427BangladeshRice171.73219.76
1428BangladeshSoybeans0.610.27
1429BangladeshNuts inc. Peanut Butter0.721.27
\n", 498 | "

1430 rows × 4 columns

\n", 499 | "
" 500 | ], 501 | "text/plain": [ 502 | " country food_category consumption co2_emmission\n", 503 | "0 Argentina Pork 10.51 37.20\n", 504 | "1 Argentina Poultry 38.66 41.53\n", 505 | "2 Argentina Beef 55.48 1712.00\n", 506 | "3 Argentina Lamb & Goat 1.56 54.63\n", 507 | "4 Argentina Fish 4.36 6.96\n", 508 | "... ... ... ... ...\n", 509 | "1425 Bangladesh Milk - inc. cheese 21.91 31.21\n", 510 | "1426 Bangladesh Wheat and Wheat Products 17.47 3.33\n", 511 | "1427 Bangladesh Rice 171.73 219.76\n", 512 | "1428 Bangladesh Soybeans 0.61 0.27\n", 513 | "1429 Bangladesh Nuts inc. Peanut Butter 0.72 1.27\n", 514 | "\n", 515 | "[1430 rows x 4 columns]" 516 | ] 517 | }, 518 | "execution_count": 3, 519 | "metadata": {}, 520 | "output_type": "execute_result" 521 | } 522 | ], 523 | "source": [ 524 | "url = \"https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-18/food_consumption.csv\"\n", 525 | "df = pd.read_csv(url)\n", 526 | "df" 527 | ] 528 | }, 529 | { 530 | "cell_type": "markdown", 531 | "metadata": {}, 532 | "source": [ 533 | "### 3." 534 | ] 535 | }, 536 | { 537 | "cell_type": "markdown", 538 | "metadata": {}, 539 | "source": [ 540 | "How many rows and columns are there in the dataframe?" 541 | ] 542 | }, 543 | { 544 | "cell_type": "code", 545 | "execution_count": 4, 546 | "metadata": {}, 547 | "outputs": [ 548 | { 549 | "data": { 550 | "text/plain": [ 551 | "(1430, 4)" 552 | ] 553 | }, 554 | "execution_count": 4, 555 | "metadata": {}, 556 | "output_type": "execute_result" 557 | } 558 | ], 559 | "source": [ 560 | "df.shape" 561 | ] 562 | }, 563 | { 564 | "cell_type": "markdown", 565 | "metadata": {}, 566 | "source": [ 567 | "### 4." 568 | ] 569 | }, 570 | { 571 | "cell_type": "markdown", 572 | "metadata": {}, 573 | "source": [ 574 | "What is the type of data in each column of `df`?" 575 | ] 576 | }, 577 | { 578 | "cell_type": "code", 579 | "execution_count": 5, 580 | "metadata": {}, 581 | "outputs": [ 582 | { 583 | "name": "stdout", 584 | "output_type": "stream", 585 | "text": [ 586 | "\n", 587 | "RangeIndex: 1430 entries, 0 to 1429\n", 588 | "Data columns (total 4 columns):\n", 589 | " # Column Non-Null Count Dtype \n", 590 | "--- ------ -------------- ----- \n", 591 | " 0 country 1430 non-null object \n", 592 | " 1 food_category 1430 non-null object \n", 593 | " 2 consumption 1430 non-null float64\n", 594 | " 3 co2_emmission 1430 non-null float64\n", 595 | "dtypes: float64(2), object(2)\n", 596 | "memory usage: 44.8+ KB\n" 597 | ] 598 | } 599 | ], 600 | "source": [ 601 | "df.info()" 602 | ] 603 | }, 604 | { 605 | "cell_type": "markdown", 606 | "metadata": {}, 607 | "source": [ 608 | "### 5." 609 | ] 610 | }, 611 | { 612 | "cell_type": "markdown", 613 | "metadata": {}, 614 | "source": [ 615 | "What is the mean `co2_emission` of the whole dataset?" 616 | ] 617 | }, 618 | { 619 | "cell_type": "code", 620 | "execution_count": 7, 621 | "metadata": { 622 | "tags": [ 623 | "raises-exception" 624 | ] 625 | }, 626 | "outputs": [ 627 | { 628 | "data": { 629 | "text/plain": [ 630 | "74.383993006993" 631 | ] 632 | }, 633 | "execution_count": 7, 634 | "metadata": {}, 635 | "output_type": "execute_result" 636 | } 637 | ], 638 | "source": [ 639 | "df[\"co2_emmission\"].mean()" 640 | ] 641 | }, 642 | { 643 | "cell_type": "markdown", 644 | "metadata": {}, 645 | "source": [ 646 | "### 6." 647 | ] 648 | }, 649 | { 650 | "cell_type": "markdown", 651 | "metadata": {}, 652 | "source": [ 653 | "How many different kinds of foods are there in the dataset? How many countries are in the dataset?" 654 | ] 655 | }, 656 | { 657 | "cell_type": "code", 658 | "execution_count": 35, 659 | "metadata": {}, 660 | "outputs": [ 661 | { 662 | "name": "stdout", 663 | "output_type": "stream", 664 | "text": [ 665 | "There are 11 foods.\n", 666 | "There are 130 countries.\n" 667 | ] 668 | } 669 | ], 670 | "source": [ 671 | "print(f\"There are {df['food_category'].nunique()} foods.\")\n", 672 | "print(f\"There are {df['country'].nunique()} countries.\")" 673 | ] 674 | }, 675 | { 676 | "cell_type": "markdown", 677 | "metadata": {}, 678 | "source": [ 679 | "### 7." 680 | ] 681 | }, 682 | { 683 | "cell_type": "markdown", 684 | "metadata": {}, 685 | "source": [ 686 | "What is the maximum `co2_emmission` in the dataset and which food type and country does it belong to?" 687 | ] 688 | }, 689 | { 690 | "cell_type": "code", 691 | "execution_count": 46, 692 | "metadata": {}, 693 | "outputs": [ 694 | { 695 | "data": { 696 | "text/plain": [ 697 | "country Argentina\n", 698 | "food_category Beef\n", 699 | "consumption 55.48\n", 700 | "co2_emmission 1712\n", 701 | "Name: 2, dtype: object" 702 | ] 703 | }, 704 | "execution_count": 46, 705 | "metadata": {}, 706 | "output_type": "execute_result" 707 | } 708 | ], 709 | "source": [ 710 | "df.iloc[df['co2_emmission'].idxmax()]" 711 | ] 712 | }, 713 | { 714 | "cell_type": "markdown", 715 | "metadata": {}, 716 | "source": [ 717 | "### 8." 718 | ] 719 | }, 720 | { 721 | "cell_type": "markdown", 722 | "metadata": {}, 723 | "source": [ 724 | "How many countries produce more than 1000 Kg CO2/person/year for at least one food type?" 725 | ] 726 | }, 727 | { 728 | "cell_type": "code", 729 | "execution_count": 52, 730 | "metadata": {}, 731 | "outputs": [ 732 | { 733 | "data": { 734 | "text/html": [ 735 | "
\n", 736 | "\n", 749 | "\n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | "
countryfood_categoryconsumptionco2_emmission
2ArgentinaBeef55.481712.00
13AustraliaBeef33.861044.85
57USABeef36.241118.29
90BrazilBeef39.251211.17
123BermudaBeef33.151022.94
\n", 797 | "
" 798 | ], 799 | "text/plain": [ 800 | " country food_category consumption co2_emmission\n", 801 | "2 Argentina Beef 55.48 1712.00\n", 802 | "13 Australia Beef 33.86 1044.85\n", 803 | "57 USA Beef 36.24 1118.29\n", 804 | "90 Brazil Beef 39.25 1211.17\n", 805 | "123 Bermuda Beef 33.15 1022.94" 806 | ] 807 | }, 808 | "execution_count": 52, 809 | "metadata": {}, 810 | "output_type": "execute_result" 811 | } 812 | ], 813 | "source": [ 814 | "df.query(\"co2_emmission > 1000\")" 815 | ] 816 | }, 817 | { 818 | "cell_type": "markdown", 819 | "metadata": {}, 820 | "source": [ 821 | "### 9." 822 | ] 823 | }, 824 | { 825 | "cell_type": "markdown", 826 | "metadata": {}, 827 | "source": [ 828 | "Which country consumes the least amount of beef per person per year?" 829 | ] 830 | }, 831 | { 832 | "cell_type": "code", 833 | "execution_count": 66, 834 | "metadata": {}, 835 | "outputs": [ 836 | { 837 | "data": { 838 | "text/html": [ 839 | "
\n", 840 | "\n", 853 | "\n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | "
countryfood_categoryconsumptionco2_emmission
1410LiberiaBeef0.7824.07
\n", 873 | "
" 874 | ], 875 | "text/plain": [ 876 | " country food_category consumption co2_emmission\n", 877 | "1410 Liberia Beef 0.78 24.07" 878 | ] 879 | }, 880 | "execution_count": 66, 881 | "metadata": {}, 882 | "output_type": "execute_result" 883 | } 884 | ], 885 | "source": [ 886 | "(df.query(\"food_category == 'Beef'\")\n", 887 | " .sort_values(by=\"consumption\")\n", 888 | " .head(1))" 889 | ] 890 | }, 891 | { 892 | "cell_type": "markdown", 893 | "metadata": {}, 894 | "source": [ 895 | "### 10." 896 | ] 897 | }, 898 | { 899 | "cell_type": "markdown", 900 | "metadata": {}, 901 | "source": [ 902 | "Which country consumes the most amount of soybeans per person per year?" 903 | ] 904 | }, 905 | { 906 | "cell_type": "code", 907 | "execution_count": 68, 908 | "metadata": {}, 909 | "outputs": [ 910 | { 911 | "data": { 912 | "text/html": [ 913 | "
\n", 914 | "\n", 927 | "\n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | "
countryfood_categoryconsumptionco2_emmission
1010Taiwan. ROCSoybeans16.957.63
\n", 947 | "
" 948 | ], 949 | "text/plain": [ 950 | " country food_category consumption co2_emmission\n", 951 | "1010 Taiwan. ROC Soybeans 16.95 7.63" 952 | ] 953 | }, 954 | "execution_count": 68, 955 | "metadata": {}, 956 | "output_type": "execute_result" 957 | } 958 | ], 959 | "source": [ 960 | "(df.query(\"food_category == 'Soybeans'\")\n", 961 | " .sort_values(by=\"consumption\", ascending=False)\n", 962 | " .head(1))" 963 | ] 964 | }, 965 | { 966 | "cell_type": "markdown", 967 | "metadata": {}, 968 | "source": [ 969 | "### 11." 970 | ] 971 | }, 972 | { 973 | "cell_type": "markdown", 974 | "metadata": {}, 975 | "source": [ 976 | "What is the total emissions of all the meat products (Pork, Poultry, Fish, Lamb & Goat, Beef) in the dataset combined?" 977 | ] 978 | }, 979 | { 980 | "cell_type": "code", 981 | "execution_count": 78, 982 | "metadata": {}, 983 | "outputs": [ 984 | { 985 | "data": { 986 | "text/plain": [ 987 | "74441.13" 988 | ] 989 | }, 990 | "execution_count": 78, 991 | "metadata": {}, 992 | "output_type": "execute_result" 993 | } 994 | ], 995 | "source": [ 996 | "meat = ['Poultry', 'Pork', 'Fish', 'Lamb & Goat', 'Beef']\n", 997 | "df[\"co2_emmission\"][df['food_category'].isin(meat)].sum()" 998 | ] 999 | }, 1000 | { 1001 | "cell_type": "markdown", 1002 | "metadata": {}, 1003 | "source": [ 1004 | "### 12." 1005 | ] 1006 | }, 1007 | { 1008 | "cell_type": "markdown", 1009 | "metadata": {}, 1010 | "source": [ 1011 | "What is the total emissions of all other (non-meat) products in the dataset combined?" 1012 | ] 1013 | }, 1014 | { 1015 | "cell_type": "code", 1016 | "execution_count": 83, 1017 | "metadata": {}, 1018 | "outputs": [ 1019 | { 1020 | "data": { 1021 | "text/plain": [ 1022 | "31927.98" 1023 | ] 1024 | }, 1025 | "execution_count": 83, 1026 | "metadata": {}, 1027 | "output_type": "execute_result" 1028 | } 1029 | ], 1030 | "source": [ 1031 | "meat = ['Poultry', 'Pork', 'Fish', 'Lamb & Goat', 'Beef']\n", 1032 | "df[\"co2_emmission\"][~df['food_category'].isin(meat)].sum()" 1033 | ] 1034 | } 1035 | ], 1036 | "metadata": { 1037 | "kernelspec": { 1038 | "display_name": "Python 3", 1039 | "language": "python", 1040 | "name": "python3" 1041 | }, 1042 | "language_info": { 1043 | "codemirror_mode": { 1044 | "name": "ipython", 1045 | "version": 3 1046 | }, 1047 | "file_extension": ".py", 1048 | "mimetype": "text/x-python", 1049 | "name": "python", 1050 | "nbconvert_exporter": "python", 1051 | "pygments_lexer": "ipython3", 1052 | "version": "3.7.8" 1053 | } 1054 | }, 1055 | "nbformat": 4, 1056 | "nbformat_minor": 4 1057 | } 1058 | -------------------------------------------------------------------------------- /practice-exercises/chapter3-tests-classes-practice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "![](../docs/banner.png)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "# Unit Tests & Classes\n", 15 | "\n", 16 | "**Tomas Beuzen, September 2020**" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "These exercises complement [Chapter 3](../chapters/chapter3-tests-classes.ipynb)." 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 1, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "import math" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "## Exercises" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "### 1." 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "The function `area()` accepts the argument `radius` and calculates the area of a circle. Write three tests using `assert` statements for the following conditions:\n", 54 | "1. Assert that `area(1)` returns a `float`;\n", 55 | "2. Assert that `area(0)` returns a value of 0;\n", 56 | "3. Assert that `area(5)` is approximately equal to 78.5 (hint: `math.isclose(..., abs_tol=0.1)`)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 2, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "def area(radius):\n", 66 | " \"\"\"Calculate the area of a circle based on the given radius.\"\"\"\n", 67 | " return math.pi * radius ** 2" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 3, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "# Your answer here." 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "### 2." 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "In the spirit of the EAFP (easier to ask for forgiveness than permission) philosophy. Modify the code of the function `area()` and add a `try`/`except` statement to catch the type error raised by passing a string to `area()` as shown below:" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 4, 96 | "metadata": { 97 | "tags": [ 98 | "raises-exception" 99 | ] 100 | }, 101 | "outputs": [ 102 | { 103 | "ename": "TypeError", 104 | "evalue": "unsupported operand type(s) for ** or pow(): 'str' and 'int'", 105 | "output_type": "error", 106 | "traceback": [ 107 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 108 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", 109 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0marea\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'10'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 110 | "\u001b[0;32m\u001b[0m in \u001b[0;36marea\u001b[0;34m(radius)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0marea\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mradius\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\"\"\"Calculate the area of a circle based on the given radius.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpi\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mradius\u001b[0m \u001b[0;34m**\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 111 | "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for ** or pow(): 'str' and 'int'" 112 | ] 113 | } 114 | ], 115 | "source": [ 116 | "area('10')" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 5, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "def area(radius):\n", 126 | " \"\"\"Calculate the area of a circle based on the given radius.\"\"\"\n", 127 | " pass # Remove this line and add your answer here." 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "### 3." 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "In the spirit of the LBYL (look before you leap) philosophy. Modify the code of the function `area()` and add a conditional `if`/`else` statement to make sure that a user has passed a number (`int` or `float`) to the `area()` function. If they pass something else, raise a `TypeError`." 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 6, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "def area(radius):\n", 151 | " \"\"\"Calculate the area of a circle based on the given radius.\"\"\"\n", 152 | " pass # Remove this line and add your answer here." 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "### 4." 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "For this exercise I want you to create a class called `Circle`. It should have the following characteristics:\n", 167 | "1. It should be initiated with the argument `radius` and store this as an instance attribute.\n", 168 | "2. Have a method `area()` which calculates the area of the circle.\n", 169 | "3. Have a method `circumference()` which calculates the circumference of the circle.\n", 170 | "4. Have the method `__str__()` which is a special method in Python and controls what is output to the screen when you `print()` an instance of your class (learn more [here](https://realpython.com/lessons/how-and-when-use-__str__/)). The `print()` statement should print the string `f\"A Circle with radius {self.radius}\"`.\n", 171 | "\n", 172 | "I've provided some tests for you to check your class." 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 7, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "class Circle:\n", 182 | " \"\"\"A circle with a radius r.\"\"\"\n", 183 | "\n", 184 | " pass # Remove this line and add your answer here." 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 8, 190 | "metadata": { 191 | "tags": [ 192 | "raises-exception" 193 | ] 194 | }, 195 | "outputs": [ 196 | { 197 | "ename": "TypeError", 198 | "evalue": "Circle() takes no arguments", 199 | "output_type": "error", 200 | "traceback": [ 201 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 202 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", 203 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32massert\u001b[0m \u001b[0mCircle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mradius\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 1 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCircle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marea\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m28.3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mabs_tol\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 2 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCircle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcircumference\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m18.8\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mabs_tol\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 3 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mCircle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__str__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"A Circle with radius 3\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 4 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 204 | "\u001b[0;31mTypeError\u001b[0m: Circle() takes no arguments" 205 | ] 206 | } 207 | ], 208 | "source": [ 209 | "assert Circle(3).radius == 3, \"Test 1 failed.\"\n", 210 | "assert math.isclose(Circle(3).area(), 28.3, abs_tol=0.1), \"Test 2 failed.\"\n", 211 | "assert math.isclose(Circle(3).circumference(), 18.8, abs_tol=0.1), \"Test 3 failed.\"\n", 212 | "assert Circle(3).__str__() == \"A Circle with radius 3\", \"Test 4 failed.\"" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "### 5." 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": [ 226 | "Now, let's create a new class `sphere` that inherits from the `circle` class we created above. It should have the following characteristics:\n", 227 | "\n", 228 | "1. It should be initiated exactly the same as `Circle` was, with the single argument `radius` which is stored as an instance attribute.\n", 229 | "2. Have a method `volume()` which calculates the volume of the sphere ($\\frac{4}{3}{\\pi}{r^3}$).\n", 230 | "3. Outputs the string `f\"A Sphere with volume 4.19\"` when you call `print(Sphere(1))` (hint: recall the `__str__()` method from the previous question).\n", 231 | "\n", 232 | "I've provided some tests for you to check your class." 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 9, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "# Your answer here." 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 10, 247 | "metadata": { 248 | "tags": [ 249 | "raises-exception" 250 | ] 251 | }, 252 | "outputs": [ 253 | { 254 | "ename": "NameError", 255 | "evalue": "name 'Sphere' is not defined", 256 | "output_type": "error", 257 | "traceback": [ 258 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 259 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 260 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32massert\u001b[0m \u001b[0mSphere\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mradius\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 1 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mSphere\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marea\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m28.3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mabs_tol\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 2 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mSphere\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcircumference\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m18.8\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mabs_tol\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 3 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mSphere\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvolume\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m113.1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mabs_tol\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 3 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mSphere\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__str__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"A Sphere with volume 4.19\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 4 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 261 | "\u001b[0;31mNameError\u001b[0m: name 'Sphere' is not defined" 262 | ] 263 | } 264 | ], 265 | "source": [ 266 | "assert Sphere(3).radius == 3, \"Test 1 failed.\"\n", 267 | "assert math.isclose(Sphere(3).area(), 28.3, abs_tol=0.1), \"Test 2 failed.\"\n", 268 | "assert math.isclose(Sphere(3).circumference(), 18.8, abs_tol=0.1), \"Test 3 failed.\"\n", 269 | "assert math.isclose(Sphere(3).volume(), 113.1, abs_tol=0.1), \"Test 3 failed.\"\n", 270 | "assert Sphere(1).__str__() == \"A Sphere with volume 4.19\", \"Test 4 failed.\"" 271 | ] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "metadata": {}, 276 | "source": [ 277 | "### 6." 278 | ] 279 | }, 280 | { 281 | "cell_type": "markdown", 282 | "metadata": {}, 283 | "source": [ 284 | "Imagine that users of our `Sphere` class often want to instantiate our class with a `circumference` instead of a `radius`. Add a [class method](https://pages.github.ubc.ca/MDS-2020-21/DSCI_511_py-prog_students/lectures/lecture3-tests-classes.html#methods-class-methods-static-methods) called `from_circ()` to the `Sphere` class that allows users to do this. The method should calculate the `radius` from the passed `circumference`, and then use that `radius` to make an instance of `Sphere`.\n", 285 | "\n", 286 | "I've provided some tests for you to check your modified class." 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 11, 292 | "metadata": {}, 293 | "outputs": [], 294 | "source": [ 295 | "# Your answer here." 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": 12, 301 | "metadata": { 302 | "tags": [ 303 | "raises-exception" 304 | ] 305 | }, 306 | "outputs": [ 307 | { 308 | "ename": "NameError", 309 | "evalue": "name 'Sphere' is not defined", 310 | "output_type": "error", 311 | "traceback": [ 312 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 313 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 314 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32massert\u001b[0m \u001b[0mSphere\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_circ\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mradius\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 1 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mSphere\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_circ\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mradius\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1.5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 2 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mSphere\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_circ\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mradius\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0.95\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mabs_tol\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 3 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mSphere\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_circ\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvolume\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m3.65\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mabs_tol\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 4 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mSphere\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_circ\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__str__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"A Sphere with volume 3.65\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 5 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 315 | "\u001b[0;31mNameError\u001b[0m: name 'Sphere' is not defined" 316 | ] 317 | } 318 | ], 319 | "source": [ 320 | "assert Sphere.from_circ(0).radius == 0, \"Test 1 failed.\"\n", 321 | "assert Sphere.from_circ(3 * math.pi).radius == 1.5, \"Test 2 failed.\" \n", 322 | "assert math.isclose(Sphere.from_circ(6).radius, 0.95, abs_tol=0.1), \"Test 3 failed.\"\n", 323 | "assert math.isclose(Sphere.from_circ(6).volume(), 3.65, abs_tol=0.1), \"Test 4 failed.\"\n", 324 | "assert Sphere.from_circ(6).__str__() == \"A Sphere with volume 3.65\", \"Test 5 failed.\"" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "
\n", 332 | "
\n", 333 | "
" 334 | ] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": {}, 339 | "source": [ 340 | "## Solutions" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "### 1." 348 | ] 349 | }, 350 | { 351 | "cell_type": "markdown", 352 | "metadata": {}, 353 | "source": [ 354 | "The function `area()` accepts the argument `radius` and calculates the area of a circle. Write three tests using `assert` statements for the following conditions:\n", 355 | "1. Assert that `area(1)` returns a `float`;\n", 356 | "2. Assert that `area(0)` returns a value of 0;\n", 357 | "3. Assert that `area(5)` is approximately equal to 78.5 (hint: `math.isclose(..., abs_tol=0.1)`)" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": 13, 363 | "metadata": {}, 364 | "outputs": [], 365 | "source": [ 366 | "def area(radius):\n", 367 | " \"\"\"Calculate the area of a circle based on the given radius.\"\"\"\n", 368 | " return math.pi * radius ** 2" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": 14, 374 | "metadata": {}, 375 | "outputs": [], 376 | "source": [ 377 | "assert isinstance(area(1), float), 'Test 1 failed!'\n", 378 | "assert area(0) == 0, 'Test 2 failed!'\n", 379 | "assert math.isclose(area(5), 78.5, abs_tol=0.1)" 380 | ] 381 | }, 382 | { 383 | "cell_type": "markdown", 384 | "metadata": {}, 385 | "source": [ 386 | "### 2." 387 | ] 388 | }, 389 | { 390 | "cell_type": "markdown", 391 | "metadata": {}, 392 | "source": [ 393 | "In the spirit of the EAFP (easier to ask for forgiveness than permission) philosophy. Modify the code of the function `area()` and add a `try`/`except` statement to catch the type error raised by passing a string to `area()` as shown below:" 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": 15, 399 | "metadata": { 400 | "tags": [ 401 | "raises-exception" 402 | ] 403 | }, 404 | "outputs": [ 405 | { 406 | "ename": "TypeError", 407 | "evalue": "unsupported operand type(s) for ** or pow(): 'str' and 'int'", 408 | "output_type": "error", 409 | "traceback": [ 410 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 411 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", 412 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0marea\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'10'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 413 | "\u001b[0;32m\u001b[0m in \u001b[0;36marea\u001b[0;34m(radius)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0marea\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mradius\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\"\"\"Calculate the area of a circle based on the given radius.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpi\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mradius\u001b[0m \u001b[0;34m**\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 414 | "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for ** or pow(): 'str' and 'int'" 415 | ] 416 | } 417 | ], 418 | "source": [ 419 | "area('10')" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": 16, 425 | "metadata": {}, 426 | "outputs": [], 427 | "source": [ 428 | "def area(radius):\n", 429 | " \"\"\"Calculate the area of a circle based on the given radius.\"\"\"\n", 430 | " try:\n", 431 | " return math.pi * radius ** 2\n", 432 | " except TypeError:\n", 433 | " print(f\"radius should be a number but you entered a {type(radius)}\")\n", 434 | " except:\n", 435 | " print(\"Some other error occurred!\")" 436 | ] 437 | }, 438 | { 439 | "cell_type": "markdown", 440 | "metadata": {}, 441 | "source": [ 442 | "### 3." 443 | ] 444 | }, 445 | { 446 | "cell_type": "markdown", 447 | "metadata": {}, 448 | "source": [ 449 | "In the spirit of the LBYL (look before you leap) philosophy. Modify the code of the function `area()` and add a conditional `if`/`else` statement to make sure that a user has passed a number (`int` or `float`) to the `area()` function. If they pass something else, raise a `TypeError`." 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "execution_count": 17, 455 | "metadata": {}, 456 | "outputs": [], 457 | "source": [ 458 | "def area(radius):\n", 459 | " \"\"\"Calculate the area of a circle based on the given radius.\"\"\"\n", 460 | " if isinstance(radius, (int, float)):\n", 461 | " return math.pi * radius ** 2\n", 462 | " else:\n", 463 | " raise TypeError(f\"radius should be a number but you entered a {type(radius)}\")" 464 | ] 465 | }, 466 | { 467 | "cell_type": "markdown", 468 | "metadata": {}, 469 | "source": [ 470 | "### 4." 471 | ] 472 | }, 473 | { 474 | "cell_type": "markdown", 475 | "metadata": {}, 476 | "source": [ 477 | "For this exercise I want you to create a class called `circle`. It should have the following characteristics:\n", 478 | "1. It should be initiated with the argument `radius` and store this as an instance attribute.\n", 479 | "2. Have a method `area()` which calculates the area of the circle.\n", 480 | "3. Have a method `circumference()` which calculates the circumference of the circle.\n", 481 | "4. Have the method `__str__()` which is a special method in Python and controls what is output to the screen when you `print()` an instance of your class (learn more [here](https://realpython.com/lessons/how-and-when-use-__str__/)). The `print()` statement should print the string `f\"A Circle with radius {self.radius}\"`.\n", 482 | "\n", 483 | "I've provided some tests for you to check your class." 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": 18, 489 | "metadata": {}, 490 | "outputs": [], 491 | "source": [ 492 | "class Circle:\n", 493 | " \"\"\"A circle with a radius r.\"\"\"\n", 494 | "\n", 495 | " def __init__(self, radius):\n", 496 | " self.radius = radius\n", 497 | "\n", 498 | " def area(self):\n", 499 | " \"\"\"Calculate the area of the circle.\"\"\"\n", 500 | " return math.pi * self.radius ** 2\n", 501 | "\n", 502 | " def circumference(self):\n", 503 | " \"\"\"Calculate the circumference of the circle.\"\"\"\n", 504 | " return 2.0 * math.pi * self.radius\n", 505 | "\n", 506 | " def __str__(self):\n", 507 | " return f\"A Circle with radius {self.radius}\"" 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": 19, 513 | "metadata": {}, 514 | "outputs": [], 515 | "source": [ 516 | "assert Circle(3).radius == 3, \"Test 1 failed.\"\n", 517 | "assert math.isclose(Circle(3).area(), 28.3, abs_tol=0.1), \"Test 2 failed.\"\n", 518 | "assert math.isclose(Circle(3).circumference(), 18.8, abs_tol=0.1), \"Test 3 failed.\"\n", 519 | "assert Circle(3).__str__() == \"A Circle with radius 3\", \"Test 4 failed.\"" 520 | ] 521 | }, 522 | { 523 | "cell_type": "markdown", 524 | "metadata": {}, 525 | "source": [ 526 | "### 5." 527 | ] 528 | }, 529 | { 530 | "cell_type": "markdown", 531 | "metadata": {}, 532 | "source": [ 533 | "Now, let's create a new class `sphere` that inherits from the `circle` class we created above. It should have the following characteristics:\n", 534 | "\n", 535 | "1. It should be initiated exactly the same as `Circle` was, with the single argument `radius` which is stored as an instance attribute.\n", 536 | "2. Have a method `volume()` which calculates the volume of the sphere ($\\frac{4}{3}{\\pi}{r^3}$).\n", 537 | "3. Outputs the string `f\"A Sphere with volume 4.19\"` when you call `print(Sphere(1))` (hint: recall the `__str__()` method from the previous question).\n", 538 | "\n", 539 | "I've provided some tests for you to check your class." 540 | ] 541 | }, 542 | { 543 | "cell_type": "code", 544 | "execution_count": 20, 545 | "metadata": {}, 546 | "outputs": [], 547 | "source": [ 548 | "class Sphere(Circle):\n", 549 | " \"\"\"A sphere with a radius r.\"\"\"\n", 550 | " \n", 551 | " def volume(self):\n", 552 | " \"\"\"Calculate the volume of the sphere.\"\"\"\n", 553 | " return 4 / 3 * math.pi * self.radius ** 3\n", 554 | "\n", 555 | " def __str__(self):\n", 556 | " return f\"A Sphere with volume {self.volume():.2f}\"" 557 | ] 558 | }, 559 | { 560 | "cell_type": "code", 561 | "execution_count": 21, 562 | "metadata": {}, 563 | "outputs": [], 564 | "source": [ 565 | "assert Sphere(3).radius == 3, \"Test 1 failed.\"\n", 566 | "assert math.isclose(Sphere(3).area(), 28.3, abs_tol=0.1), \"Test 2 failed.\"\n", 567 | "assert math.isclose(Sphere(3).circumference(), 18.8, abs_tol=0.1), \"Test 3 failed.\"\n", 568 | "assert math.isclose(Sphere(3).volume(), 113.1, abs_tol=0.1), \"Test 3 failed.\"\n", 569 | "assert Sphere(1).__str__() == \"A Sphere with volume 4.19\", \"Test 4 failed.\"" 570 | ] 571 | }, 572 | { 573 | "cell_type": "markdown", 574 | "metadata": {}, 575 | "source": [ 576 | "### 6." 577 | ] 578 | }, 579 | { 580 | "cell_type": "markdown", 581 | "metadata": {}, 582 | "source": [ 583 | "Imagine that users of our `Sphere` class often want to instantiate our class with a `circumference` instead of a `radius`. Add a [class method](https://pages.github.ubc.ca/MDS-2020-21/DSCI_511_py-prog_students/lectures/lecture3-tests-classes.html#methods-class-methods-static-methods) called `from_circ()` to the `Sphere` class that allows users to do this. The method should calculate the `radius` from the passed `circumference`, and then use that `radius` to make an instance of `Sphere`.\n", 584 | "\n", 585 | "I've provided some tests for you to check your modified class." 586 | ] 587 | }, 588 | { 589 | "cell_type": "code", 590 | "execution_count": 22, 591 | "metadata": {}, 592 | "outputs": [], 593 | "source": [ 594 | "class Sphere(Circle):\n", 595 | " \"\"\"A sphere with a radius r.\"\"\"\n", 596 | " \n", 597 | " def volume(self):\n", 598 | " \"\"\"Calculate the volume of the sphere.\"\"\"\n", 599 | " return 4 / 3 * math.pi * self.radius ** 3\n", 600 | " \n", 601 | " @classmethod\n", 602 | " def from_circ(cls, circumference):\n", 603 | " \"\"\"Make an instance of Sphere from a circumference.\"\"\"\n", 604 | " radius = circumference / (2 * math.pi)\n", 605 | " return cls(radius)\n", 606 | "\n", 607 | " def __str__(self):\n", 608 | " return f\"A Sphere with volume {self.volume():.2f}\"" 609 | ] 610 | }, 611 | { 612 | "cell_type": "code", 613 | "execution_count": 23, 614 | "metadata": {}, 615 | "outputs": [], 616 | "source": [ 617 | "assert Sphere.from_circ(0).radius == 0, \"Test 1 failed.\"\n", 618 | "assert Sphere.from_circ(3 * math.pi).radius == 1.5, \"Test 2 failed.\" \n", 619 | "assert math.isclose(Sphere.from_circ(6).radius, 0.95, abs_tol=0.1), \"Test 3 failed.\"\n", 620 | "assert math.isclose(Sphere.from_circ(6).volume(), 3.65, abs_tol=0.1), \"Test 4 failed.\"\n", 621 | "assert Sphere.from_circ(6).__str__() == \"A Sphere with volume 3.65\", \"Test 5 failed.\"" 622 | ] 623 | } 624 | ], 625 | "metadata": { 626 | "kernelspec": { 627 | "display_name": "Python 3", 628 | "language": "python", 629 | "name": "python3" 630 | }, 631 | "language_info": { 632 | "codemirror_mode": { 633 | "name": "ipython", 634 | "version": 3 635 | }, 636 | "file_extension": ".py", 637 | "mimetype": "text/x-python", 638 | "name": "python", 639 | "nbconvert_exporter": "python", 640 | "pygments_lexer": "ipython3", 641 | "version": "3.7.8" 642 | } 643 | }, 644 | "nbformat": 4, 645 | "nbformat_minor": 4 646 | } 647 | -------------------------------------------------------------------------------- /practice-exercises/chapter9-wrangling-advanced-practice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "![](../docs/banner.png)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "# Advanced Wrangling With Pandas\n", 15 | "\n", 16 | "**Tomas Beuzen, September 2020**" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "These exercises complement [Chapter 9](../chapters/chapter9-wrangling-advanced.ipynb)." 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "## Exercises" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "### 1." 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "In this set of practice exercises we'll be looking at a cool dataset of real passwords (made available from actual data breaches) sourced and compiled from [Information is Beautiful](https://informationisbeautiful.net/visualizations/top-500-passwords-visualized/?utm_content=buffer994fa&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer) and contributed to [R's Tidy Tuesday project](https://github.com/rfordatascience/tidytuesday). These passwords are common (\"bad\") passwords that you should avoid using! But we're going to use this dataset to practice some regex skills.\n", 45 | "\n", 46 | "Let's start by importing pandas with the alias `pd`." 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 1, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "# Your answer here." 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "### 2." 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "The dataset has the following columns:\n", 70 | "\n", 71 | "|variable |class |description |\n", 72 | "|:-----------------|:---------|:-----------|\n", 73 | "|rank |int | popularity in their database of released passwords |\n", 74 | "|password |str | Actual text of the password |\n", 75 | "|category |str | What category does the password fall in to?|\n", 76 | "|value |float | Time to crack by online guessing |\n", 77 | "|time_unit |str | Time unit to match with value |\n", 78 | "|offline_crack_sec |float | Time to crack offline in seconds |\n", 79 | "|rank_alt |int | Rank 2 |\n", 80 | "|strength |int | Strength = quality of password where 10 is highest, 1 is lowest, please note that these are relative to these generally bad passwords |\n", 81 | "|font_size |int | Used to create the graphic for KIB |\n", 82 | "\n", 83 | "\n", 84 | "In these exercises, we're only interested in the `password`, `value` and `time_unit` columns so import only these two columns as a dataframe named `df` from this url: " 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 2, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "# Your answer here." 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "### 3." 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "An online password attack is when someone tries to hack your account by simply trying a very large number of username/password combinations to access your account. For each `password` in our dataset, the `value` column shows the amount of time it is estimated to take an \"online password attack\" to hack your account. The column `time_unit` shows the units of that time value (e.g., hours, days, years, etc.)\n", 108 | "\n", 109 | "It would be much nicer if our `value`s were of the same units so we can more easily compare the \"online password guessing time\" for each password. So your first task is to convert all of the values to units of hours (assume the conversion units I've provided below, e.g., 1 day is 24 hours, 1 week is 168 hours, etc)." 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 3, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "units = {\n", 119 | " \"seconds\": 1 / 3600,\n", 120 | " \"minutes\": 1 / 60,\n", 121 | " \"days\": 24,\n", 122 | " \"weeks\": 168,\n", 123 | " \"months\": 720,\n", 124 | " \"years\": 8760,\n", 125 | "}" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 4, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "# Your answer here." 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "### 4." 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "How many password begin with the sequence `123`?" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 5, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "# Your answer here." 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "### 5." 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "What is the average time in hours needed to crack these passwords that begin with `123`? How does this compare to the average of all passwords in the dataset?" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 6, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "# Your answer here." 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": {}, 186 | "source": [ 187 | "### 6." 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": {}, 193 | "source": [ 194 | "How many passwords do not contain a number?" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 7, 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "# Your answer here." 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "### 7." 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | "How many passwords contain at least one number?" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 8, 223 | "metadata": {}, 224 | "outputs": [], 225 | "source": [ 226 | "# Your answer here." 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "### 8." 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "Is there an obvious difference in online cracking time between passwords that don't contain a number vs passwords that contain at least one number?" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 9, 246 | "metadata": {}, 247 | "outputs": [], 248 | "source": [ 249 | "# Your answer here." 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "metadata": {}, 255 | "source": [ 256 | "### 9." 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": {}, 262 | "source": [ 263 | "How many passwords contain at least one of the following punctuations: `[.!?\\\\-]` (hint: remember this dataset contains *weak* passwords...)?" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 10, 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "# Your answer here." 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "### 10." 280 | ] 281 | }, 282 | { 283 | "cell_type": "markdown", 284 | "metadata": {}, 285 | "source": [ 286 | "Which password(s) in the datasets took the shortest time to crack by online guessing? Which took the longest?" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 11, 292 | "metadata": {}, 293 | "outputs": [], 294 | "source": [ 295 | "# Your answer here." 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": [ 302 | "
\n", 303 | "
\n", 304 | "
" 305 | ] 306 | }, 307 | { 308 | "cell_type": "markdown", 309 | "metadata": {}, 310 | "source": [ 311 | "## Solutions" 312 | ] 313 | }, 314 | { 315 | "cell_type": "markdown", 316 | "metadata": {}, 317 | "source": [ 318 | "### 1." 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": [ 325 | "In this set of practice exercises we'll be looking at a cool dataset of real passwords (made available from actual data breaches) sourced and compiled from [Information is Beautiful](https://informationisbeautiful.net/visualizations/top-500-passwords-visualized/?utm_content=buffer994fa&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer) and contributed to [R's Tidy Tuesday project](https://github.com/rfordatascience/tidytuesday). These passwords are common (\"bad\") passwords that you should avoid using! But we're going to use this dataset to practice some regex skills.\n", 326 | "\n", 327 | "Let's start by importing pandas with the alias `pd`." 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 12, 333 | "metadata": {}, 334 | "outputs": [], 335 | "source": [ 336 | "import pandas as pd" 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": {}, 342 | "source": [ 343 | "### 2." 344 | ] 345 | }, 346 | { 347 | "cell_type": "markdown", 348 | "metadata": {}, 349 | "source": [ 350 | "The dataset has the following columns:\n", 351 | "\n", 352 | "|variable |class |description |\n", 353 | "|:-----------------|:---------|:-----------|\n", 354 | "|rank |int | popularity in their database of released passwords |\n", 355 | "|password |str | Actual text of the password |\n", 356 | "|category |str | What category does the password fall in to?|\n", 357 | "|value |float | Time to crack by online guessing |\n", 358 | "|time_unit |str | Time unit to match with value |\n", 359 | "|offline_crack_sec |float | Time to crack offline in seconds |\n", 360 | "|rank_alt |int | Rank 2 |\n", 361 | "|strength |int | Strength = quality of password where 10 is highest, 1 is lowest, please note that these are relative to these generally bad passwords |\n", 362 | "|font_size |int | Used to create the graphic for KIB |\n", 363 | "\n", 364 | "\n", 365 | "In these exercises, we're only interested in the `password`, `value` and `time_unit` columns so import only these two columns as a dataframe named `df` from this url: " 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 13, 371 | "metadata": {}, 372 | "outputs": [ 373 | { 374 | "data": { 375 | "text/html": [ 376 | "
\n", 377 | "\n", 390 | "\n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | "
passwordvaluetime_unit
0password6.91years
112345618.52minutes
2123456781.29days
3123411.11seconds
4qwerty3.72days
\n", 432 | "
" 433 | ], 434 | "text/plain": [ 435 | " password value time_unit\n", 436 | "0 password 6.91 years\n", 437 | "1 123456 18.52 minutes\n", 438 | "2 12345678 1.29 days\n", 439 | "3 1234 11.11 seconds\n", 440 | "4 qwerty 3.72 days" 441 | ] 442 | }, 443 | "execution_count": 13, 444 | "metadata": {}, 445 | "output_type": "execute_result" 446 | } 447 | ], 448 | "source": [ 449 | "df = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-01-14/passwords.csv',\n", 450 | " usecols=['password', 'value', 'time_unit'],\n", 451 | " skipfooter = 7,\n", 452 | " engine='python')\n", 453 | "df.head()" 454 | ] 455 | }, 456 | { 457 | "cell_type": "markdown", 458 | "metadata": {}, 459 | "source": [ 460 | "### 3." 461 | ] 462 | }, 463 | { 464 | "cell_type": "markdown", 465 | "metadata": {}, 466 | "source": [ 467 | "An online password attack is when someone tries to hack your account by simply trying a very large number of username/password combinations to access your account. For each `password` in our dataset, the `value` column shows the amount of time it is estimated to take an \"online password attack\" to hack your account. The column `time_unit` shows the units of that time value (e.g., hours, days, years, etc.)\n", 468 | "\n", 469 | "It would be much nicer if our `value`s were of the same units so we can more easily compare the \"online password guessing time\" for each password. So your first task is to convert all of the values to units of hours (assume the conversion units I've provided below, e.g., 1 day is 24 hours, 1 week is 168 hours, etc)." 470 | ] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "execution_count": 14, 475 | "metadata": {}, 476 | "outputs": [ 477 | { 478 | "data": { 479 | "text/html": [ 480 | "
\n", 481 | "\n", 494 | "\n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | "
passwordvaluetime_unit
0password60531.600000hours
11234560.308667hours
21234567830.960000hours
312340.003086hours
4qwerty89.280000hours
\n", 536 | "
" 537 | ], 538 | "text/plain": [ 539 | " password value time_unit\n", 540 | "0 password 60531.600000 hours\n", 541 | "1 123456 0.308667 hours\n", 542 | "2 12345678 30.960000 hours\n", 543 | "3 1234 0.003086 hours\n", 544 | "4 qwerty 89.280000 hours" 545 | ] 546 | }, 547 | "execution_count": 14, 548 | "metadata": {}, 549 | "output_type": "execute_result" 550 | } 551 | ], 552 | "source": [ 553 | "units = {\n", 554 | " \"seconds\": 1 / 3600,\n", 555 | " \"minutes\": 1 / 60,\n", 556 | " \"days\": 24,\n", 557 | " \"weeks\": 168,\n", 558 | " \"months\": 720,\n", 559 | " \"years\": 8760,\n", 560 | "}\n", 561 | "\n", 562 | "for key, val in units.items():\n", 563 | " df.loc[df['time_unit'] == key, 'value'] *= val \n", 564 | "\n", 565 | "df['time_unit'] = 'hours'\n", 566 | "df.head()" 567 | ] 568 | }, 569 | { 570 | "cell_type": "markdown", 571 | "metadata": {}, 572 | "source": [ 573 | "### 4." 574 | ] 575 | }, 576 | { 577 | "cell_type": "markdown", 578 | "metadata": {}, 579 | "source": [ 580 | "How many password begin with the sequence `123`?" 581 | ] 582 | }, 583 | { 584 | "cell_type": "code", 585 | "execution_count": 15, 586 | "metadata": {}, 587 | "outputs": [ 588 | { 589 | "data": { 590 | "text/plain": [ 591 | "9" 592 | ] 593 | }, 594 | "execution_count": 15, 595 | "metadata": {}, 596 | "output_type": "execute_result" 597 | } 598 | ], 599 | "source": [ 600 | "df['password'].str.contains(r\"^123\").sum()" 601 | ] 602 | }, 603 | { 604 | "cell_type": "markdown", 605 | "metadata": {}, 606 | "source": [ 607 | "### 5." 608 | ] 609 | }, 610 | { 611 | "cell_type": "markdown", 612 | "metadata": {}, 613 | "source": [ 614 | "What is the average time in hours needed to crack these passwords that begin with `123`? How does this compare to the average of all passwords in the dataset?" 615 | ] 616 | }, 617 | { 618 | "cell_type": "code", 619 | "execution_count": 16, 620 | "metadata": {}, 621 | "outputs": [ 622 | { 623 | "name": "stdout", 624 | "output_type": "stream", 625 | "text": [ 626 | "Avg. time to crack passwords beginning with 123: 107 hrs\n", 627 | "Avg. time to crack for all passwords in dataset: 13918 hrs\n" 628 | ] 629 | } 630 | ], 631 | "source": [ 632 | "print(f\"Avg. time to crack passwords beginning with 123: {df[df['password'].str.contains(r'^123')]['value'].mean():.0f} hrs\")\n", 633 | "print(f\"Avg. time to crack for all passwords in dataset: {df['value'].mean():.0f} hrs\")" 634 | ] 635 | }, 636 | { 637 | "cell_type": "markdown", 638 | "metadata": {}, 639 | "source": [ 640 | "### 6." 641 | ] 642 | }, 643 | { 644 | "cell_type": "markdown", 645 | "metadata": {}, 646 | "source": [ 647 | "How many passwords do not contain a number?" 648 | ] 649 | }, 650 | { 651 | "cell_type": "code", 652 | "execution_count": 17, 653 | "metadata": {}, 654 | "outputs": [ 655 | { 656 | "data": { 657 | "text/html": [ 658 | "
\n", 659 | "\n", 672 | "\n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | "
passwordvaluetime_unit
0password60531.60hours
4qwerty89.28hours
6dragon89.28hours
7baseball60531.60hours
8football60531.60hours
\n", 714 | "
" 715 | ], 716 | "text/plain": [ 717 | " password value time_unit\n", 718 | "0 password 60531.60 hours\n", 719 | "4 qwerty 89.28 hours\n", 720 | "6 dragon 89.28 hours\n", 721 | "7 baseball 60531.60 hours\n", 722 | "8 football 60531.60 hours" 723 | ] 724 | }, 725 | "execution_count": 17, 726 | "metadata": {}, 727 | "output_type": "execute_result" 728 | } 729 | ], 730 | "source": [ 731 | "df[df['password'].str.contains(r\"^[^0-9]*$\")].head()" 732 | ] 733 | }, 734 | { 735 | "cell_type": "markdown", 736 | "metadata": {}, 737 | "source": [ 738 | "### 7." 739 | ] 740 | }, 741 | { 742 | "cell_type": "markdown", 743 | "metadata": {}, 744 | "source": [ 745 | "How many passwords contain at least one number?" 746 | ] 747 | }, 748 | { 749 | "cell_type": "code", 750 | "execution_count": 18, 751 | "metadata": {}, 752 | "outputs": [ 753 | { 754 | "data": { 755 | "text/html": [ 756 | "
\n", 757 | "\n", 770 | "\n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | "
passwordvaluetime_unit
11234560.308667hours
21234567830.960000hours
312340.003086hours
5123450.030833hours
116969690.308667hours
\n", 812 | "
" 813 | ], 814 | "text/plain": [ 815 | " password value time_unit\n", 816 | "1 123456 0.308667 hours\n", 817 | "2 12345678 30.960000 hours\n", 818 | "3 1234 0.003086 hours\n", 819 | "5 12345 0.030833 hours\n", 820 | "11 696969 0.308667 hours" 821 | ] 822 | }, 823 | "execution_count": 18, 824 | "metadata": {}, 825 | "output_type": "execute_result" 826 | } 827 | ], 828 | "source": [ 829 | "df[df['password'].str.contains(r\".*[0-9].*\")].head()" 830 | ] 831 | }, 832 | { 833 | "cell_type": "markdown", 834 | "metadata": {}, 835 | "source": [ 836 | "### 8." 837 | ] 838 | }, 839 | { 840 | "cell_type": "markdown", 841 | "metadata": {}, 842 | "source": [ 843 | "Is there an obvious difference in online cracking time between passwords that don't contain a number vs passwords that contain at least one number?" 844 | ] 845 | }, 846 | { 847 | "cell_type": "code", 848 | "execution_count": 19, 849 | "metadata": {}, 850 | "outputs": [ 851 | { 852 | "name": "stdout", 853 | "output_type": "stream", 854 | "text": [ 855 | " Avg. time to crack passwords without a number: 8095 hrs\n", 856 | "Avg. time to crack passwords with at least one number: 62005 hrs\n" 857 | ] 858 | } 859 | ], 860 | "source": [ 861 | "print(f\" Avg. time to crack passwords without a number: {df[df['password'].str.contains(r'^[^0-9]*$')]['value'].mean():.0f} hrs\")\n", 862 | "print(f\"Avg. time to crack passwords with at least one number: {df[df['password'].str.contains(r'.*[0-9].*')]['value'].mean():.0f} hrs\")" 863 | ] 864 | }, 865 | { 866 | "cell_type": "markdown", 867 | "metadata": {}, 868 | "source": [ 869 | "### 9." 870 | ] 871 | }, 872 | { 873 | "cell_type": "markdown", 874 | "metadata": {}, 875 | "source": [ 876 | "How many passwords contain at least one of the following punctuations: `[.!?\\\\-]` (hint: remember this dataset contains *weak* passwords...)?" 877 | ] 878 | }, 879 | { 880 | "cell_type": "code", 881 | "execution_count": 20, 882 | "metadata": {}, 883 | "outputs": [ 884 | { 885 | "data": { 886 | "text/html": [ 887 | "
\n", 888 | "\n", 901 | "\n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | "
passwordvaluetime_unit
\n", 913 | "
" 914 | ], 915 | "text/plain": [ 916 | "Empty DataFrame\n", 917 | "Columns: [password, value, time_unit]\n", 918 | "Index: []" 919 | ] 920 | }, 921 | "execution_count": 20, 922 | "metadata": {}, 923 | "output_type": "execute_result" 924 | } 925 | ], 926 | "source": [ 927 | "df[df['password'].str.contains(r'[.!?\\\\-]')]" 928 | ] 929 | }, 930 | { 931 | "cell_type": "markdown", 932 | "metadata": {}, 933 | "source": [ 934 | "### 10." 935 | ] 936 | }, 937 | { 938 | "cell_type": "markdown", 939 | "metadata": {}, 940 | "source": [ 941 | "Which password(s) in the datasets took the shortest time to crack by online guessing? Which took the longest?" 942 | ] 943 | }, 944 | { 945 | "cell_type": "code", 946 | "execution_count": 21, 947 | "metadata": {}, 948 | "outputs": [ 949 | { 950 | "data": { 951 | "text/html": [ 952 | "
\n", 953 | "\n", 966 | "\n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | "
passwordvaluetime_unit
312340.003086hours
1920000.003086hours
4469690.003086hours
7611110.003086hours
27651500.003086hours
31421120.003086hours
31512120.003086hours
32477770.003086hours
37122220.003086hours
37344440.003086hours
42913130.003086hours
\n", 1044 | "
" 1045 | ], 1046 | "text/plain": [ 1047 | " password value time_unit\n", 1048 | "3 1234 0.003086 hours\n", 1049 | "19 2000 0.003086 hours\n", 1050 | "44 6969 0.003086 hours\n", 1051 | "76 1111 0.003086 hours\n", 1052 | "276 5150 0.003086 hours\n", 1053 | "314 2112 0.003086 hours\n", 1054 | "315 1212 0.003086 hours\n", 1055 | "324 7777 0.003086 hours\n", 1056 | "371 2222 0.003086 hours\n", 1057 | "373 4444 0.003086 hours\n", 1058 | "429 1313 0.003086 hours" 1059 | ] 1060 | }, 1061 | "execution_count": 21, 1062 | "metadata": {}, 1063 | "output_type": "execute_result" 1064 | } 1065 | ], 1066 | "source": [ 1067 | "df.query(\"value == value.min()\")" 1068 | ] 1069 | }, 1070 | { 1071 | "cell_type": "code", 1072 | "execution_count": 22, 1073 | "metadata": {}, 1074 | "outputs": [ 1075 | { 1076 | "data": { 1077 | "text/html": [ 1078 | "
\n", 1079 | "\n", 1092 | "\n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | "
passwordvaluetime_unit
25trustno1808285.2hours
335rush2112808285.2hours
405jordan23808285.2hours
499passw0rd808285.2hours
\n", 1128 | "
" 1129 | ], 1130 | "text/plain": [ 1131 | " password value time_unit\n", 1132 | "25 trustno1 808285.2 hours\n", 1133 | "335 rush2112 808285.2 hours\n", 1134 | "405 jordan23 808285.2 hours\n", 1135 | "499 passw0rd 808285.2 hours" 1136 | ] 1137 | }, 1138 | "execution_count": 22, 1139 | "metadata": {}, 1140 | "output_type": "execute_result" 1141 | } 1142 | ], 1143 | "source": [ 1144 | "df.query(\"value == value.max()\")" 1145 | ] 1146 | } 1147 | ], 1148 | "metadata": { 1149 | "kernelspec": { 1150 | "display_name": "Python 3", 1151 | "language": "python", 1152 | "name": "python3" 1153 | }, 1154 | "language_info": { 1155 | "codemirror_mode": { 1156 | "name": "ipython", 1157 | "version": 3 1158 | }, 1159 | "file_extension": ".py", 1160 | "mimetype": "text/x-python", 1161 | "name": "python", 1162 | "nbconvert_exporter": "python", 1163 | "pygments_lexer": "ipython3", 1164 | "version": "3.7.8" 1165 | } 1166 | }, 1167 | "nbformat": 4, 1168 | "nbformat_minor": 4 1169 | } 1170 | --------------------------------------------------------------------------------