├── chapters
    ├── data
    │   ├── hello.py
    │   ├── bad_style.py
    │   └── cycling_data.csv
    ├── img
    │   ├── chapter1
    │   │   ├── box.png
    │   │   ├── beach.png
    │   │   └── australia.png
    │   ├── chapter2
    │   │   ├── bsod.jpg
    │   │   ├── type_hint_1.png
    │   │   └── type_hint_2.png
    │   ├── chapter4
    │   │   ├── copy.png
    │   │   ├── xkcd.png
    │   │   ├── vscode.png
    │   │   ├── deep-copy.png
    │   │   ├── copy-append.png
    │   │   └── tomas_beuzen.png
    │   ├── chapter5
    │   │   ├── pies.png
    │   │   ├── numpy.png
    │   │   ├── pies_loop.png
    │   │   ├── triangle.png
    │   │   ├── numpy_paper.png
    │   │   ├── broadcasting.png
    │   │   ├── numpy_arrays.png
    │   │   └── pies_broadcast.png
    │   ├── chapter6
    │   │   ├── conv.gif
    │   │   └── tomas_beuzen.png
    │   ├── chapter8
    │   │   ├── join.png
    │   │   ├── tidy.png
    │   │   ├── groupby_1.png
    │   │   ├── groupby_2.png
    │   │   ├── left_join.png
    │   │   ├── inner_join.png
    │   │   ├── melt_pivot.gif
    │   │   └── outer_join.png
    │   ├── chapter7
    │   │   ├── pandas.png
    │   │   ├── series.png
    │   │   ├── dataframe.png
    │   │   ├── computer_panda.gif
    │   │   └── series_addition.png
    │   └── chapter9
    │   │   └── pandas_stacking.gif
    └── wallet.py
├── docs
    ├── banner.png
    ├── logo.png
    └── favicon.png
├── py4ds.yaml
├── practice-exercises
    ├── circle.py
    ├── bad_style.py
    ├── chapter2-loops-functions-practice.ipynb
    ├── chapter1-basics-practice.ipynb
    ├── chapter5-numpy-practice.ipynb
    ├── chapter7-pandas-practice.ipynb
    ├── chapter3-tests-classes-practice.ipynb
    └── chapter9-wrangling-advanced-practice.ipynb
├── _toc.yml
├── _config.yml
├── README.md
└── LICENSE


/chapters/data/hello.py:
--------------------------------------------------------------------------------
1 | PLANET = "Earth"
2 | 
3 | 
4 | def hello_world():
5 |     print(f"Hello {PLANET}!")
6 | 


--------------------------------------------------------------------------------
/docs/banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/docs/banner.png


--------------------------------------------------------------------------------
/docs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/docs/logo.png


--------------------------------------------------------------------------------
/docs/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/docs/favicon.png


--------------------------------------------------------------------------------
/chapters/img/chapter1/box.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter1/box.png


--------------------------------------------------------------------------------
/chapters/img/chapter2/bsod.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter2/bsod.jpg


--------------------------------------------------------------------------------
/chapters/img/chapter4/copy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter4/copy.png


--------------------------------------------------------------------------------
/chapters/img/chapter4/xkcd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter4/xkcd.png


--------------------------------------------------------------------------------
/chapters/img/chapter5/pies.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter5/pies.png


--------------------------------------------------------------------------------
/chapters/img/chapter6/conv.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter6/conv.gif


--------------------------------------------------------------------------------
/chapters/img/chapter8/join.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter8/join.png


--------------------------------------------------------------------------------
/chapters/img/chapter8/tidy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter8/tidy.png


--------------------------------------------------------------------------------
/chapters/img/chapter1/beach.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter1/beach.png


--------------------------------------------------------------------------------
/chapters/img/chapter4/vscode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter4/vscode.png


--------------------------------------------------------------------------------
/chapters/img/chapter5/numpy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter5/numpy.png


--------------------------------------------------------------------------------
/chapters/img/chapter7/pandas.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter7/pandas.png


--------------------------------------------------------------------------------
/chapters/img/chapter7/series.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter7/series.png


--------------------------------------------------------------------------------
/chapters/img/chapter1/australia.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter1/australia.png


--------------------------------------------------------------------------------
/chapters/img/chapter4/deep-copy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter4/deep-copy.png


--------------------------------------------------------------------------------
/chapters/img/chapter5/pies_loop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter5/pies_loop.png


--------------------------------------------------------------------------------
/chapters/img/chapter5/triangle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter5/triangle.png


--------------------------------------------------------------------------------
/chapters/img/chapter7/dataframe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter7/dataframe.png


--------------------------------------------------------------------------------
/chapters/img/chapter8/groupby_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter8/groupby_1.png


--------------------------------------------------------------------------------
/chapters/img/chapter8/groupby_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter8/groupby_2.png


--------------------------------------------------------------------------------
/chapters/img/chapter8/left_join.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter8/left_join.png


--------------------------------------------------------------------------------
/chapters/img/chapter2/type_hint_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter2/type_hint_1.png


--------------------------------------------------------------------------------
/chapters/img/chapter2/type_hint_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter2/type_hint_2.png


--------------------------------------------------------------------------------
/chapters/img/chapter4/copy-append.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter4/copy-append.png


--------------------------------------------------------------------------------
/chapters/img/chapter5/numpy_paper.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter5/numpy_paper.png


--------------------------------------------------------------------------------
/chapters/img/chapter8/inner_join.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter8/inner_join.png


--------------------------------------------------------------------------------
/chapters/img/chapter8/melt_pivot.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter8/melt_pivot.gif


--------------------------------------------------------------------------------
/chapters/img/chapter8/outer_join.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter8/outer_join.png


--------------------------------------------------------------------------------
/chapters/img/chapter4/tomas_beuzen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter4/tomas_beuzen.png


--------------------------------------------------------------------------------
/chapters/img/chapter5/broadcasting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter5/broadcasting.png


--------------------------------------------------------------------------------
/chapters/img/chapter5/numpy_arrays.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter5/numpy_arrays.png


--------------------------------------------------------------------------------
/chapters/img/chapter5/pies_broadcast.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter5/pies_broadcast.png


--------------------------------------------------------------------------------
/chapters/img/chapter6/tomas_beuzen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter6/tomas_beuzen.png


--------------------------------------------------------------------------------
/chapters/img/chapter7/computer_panda.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter7/computer_panda.gif


--------------------------------------------------------------------------------
/chapters/img/chapter7/series_addition.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter7/series_addition.png


--------------------------------------------------------------------------------
/chapters/img/chapter9/pandas_stacking.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TomasBeuzen/python-programming-for-data-science/HEAD/chapters/img/chapter9/pandas_stacking.gif


--------------------------------------------------------------------------------
/py4ds.yaml:
--------------------------------------------------------------------------------
 1 | # install with `conda env create -fpy4ds.yaml`
 2 | 
 3 | name: py4ds
 4 | channels:
 5 |   - conda-forge
 6 |   - defaults
 7 | dependencies:
 8 |   - python=3.8
 9 |   - jupyterlab
10 |   - matplotlib
11 |   - pandas
12 |   - numpy
13 |   - memory_profiler
14 |   - tqdm
15 |   - flake8
16 |   - black
17 | 


--------------------------------------------------------------------------------
/practice-exercises/circle.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | class Circle:
 4 |     """A circle with a radius r."""
 5 | 
 6 |     def __init__(self, radius):
 7 |         self.radius = radius
 8 | 
 9 |     def area(self):
10 |         """Calculate the area of the circle."""
11 |         return math.pi * self.radius ** 2
12 | 
13 |     def circumference(self):
14 |         """Calculate the circumference of the circle."""
15 |         return 2.0 * math.pi * self.radius
16 | 
17 |     def __str__(self):
18 |         return f"A Circle with radius {self.radius}"


--------------------------------------------------------------------------------
/practice-exercises/bad_style.py:
--------------------------------------------------------------------------------
 1 | very_long_variable_name = {'field': 1,
 2 |                         'is_debug': True}
 3 | if very_long_variable_name is not None and very_long_variable_name["field"] > 0 or very_long_variable_name['is_debug']:
 4 |  z = 'hello '+'world'
 5 | else:
 6 |  f = rf'hello {world}'
 7 | if (True): y = 'hello ''world'#FIXME: https://github.com/python/black/issues/26
 8 | class Foo  (     object  ):
 9 |   def f    (self   ):
10 |     return       37*-2
11 |   def g(self, x,y=42):
12 |       return y
13 | regular_formatting = [
14 |     0,  1,  2,
15 |     3,  4,  5
16 | ]
17 | def CAPITALIZE(mystring):
18 |     return mystring.upper()


--------------------------------------------------------------------------------
/chapters/data/bad_style.py:
--------------------------------------------------------------------------------
 1 | x = {  'a':37,'b':42,
 2 | 'c':927}
 3 | very_long_variable_name = {'field': 1,
 4 |                         'is_debug': True}
 5 | this=True
 6 | 
 7 | if very_long_variable_name is not None and very_long_variable_name["field"] > 0 or very_long_variable_name['is_debug']:
 8 |  z = 'hello '+'world'
 9 | else:
10 |  world = 'world'
11 |  a = 'hello {}'.format(world)
12 |  f = rf'hello {world}'
13 | if (this): y = 'hello ''world'#FIXME: https://github.com/python/black/issues/26
14 | class Foo  (     object  ):
15 |   def f    (self   ):
16 |     return       37*-2
17 |   def g(self, x,y=42):
18 |       return y
19 | # fmt: off
20 | custom_formatting = [
21 |     0,  1,  2,
22 |     3,  4,  5
23 | ]
24 | # fmt: on
25 | regular_formatting = [
26 |     0,  1,  2,
27 |     3,  4,  5
28 | ]


--------------------------------------------------------------------------------
/_toc.yml:
--------------------------------------------------------------------------------
 1 | - file: README
 2 |   numbered: false
 3 | 
 4 | - part: Chapters
 5 |   chapters:
 6 |   - file: chapters/chapter1-basics
 7 |   - file: chapters/chapter2-loops-functions
 8 |   - file: chapters/chapter3-tests-classes
 9 |   - file: chapters/chapter4-style-scripts-imports
10 |   - file: chapters/chapter5-numpy
11 |   - file: chapters/chapter6-numpy-addendum
12 |   - file: chapters/chapter7-pandas
13 |   - file: chapters/chapter8-wrangling-basics
14 |   - file: chapters/chapter9-wrangling-advanced
15 | 
16 | - part: Practice Exercises
17 |   chapters:
18 |   - file: practice-exercises/chapter1-basics-practice.ipynb
19 |   - file: practice-exercises/chapter2-loops-functions-practice.ipynb
20 |   - file: practice-exercises/chapter3-tests-classes-practice.ipynb
21 |   - file: practice-exercises/chapter4-style-scripts-imports-practice.ipynb
22 |   - file: practice-exercises/chapter5-numpy-practice.ipynb
23 |   - file: practice-exercises/chapter7-pandas-practice.ipynb
24 |   - file: practice-exercises/chapter8-wrangling-basics-practice.ipynb
25 |   - file: practice-exercises/chapter9-wrangling-advanced-practice.ipynb


--------------------------------------------------------------------------------
/chapters/wallet.py:
--------------------------------------------------------------------------------
 1 | # This module contains a class Wallet that can be used to store, spend, and earn cash.
 2 | 
 3 | 
 4 | class Wallet:
 5 |     """A wallet that can store, spend, and earn cash.
 6 | 
 7 |     Parameters
 8 |     ----------
 9 |     balance : number
10 |         Amount of starting cash.
11 | 
12 |     Attributes
13 |     ----------
14 |     item : str
15 |         The type of item, a "Wallet"
16 |     balance : float
17 |         The amount of money currently in the wallet.
18 |     """
19 | 
20 |     item = "Wallet"
21 | 
22 |     def __init__(self, balance):
23 |         """See help(Wallet)"""
24 |         self.balance = balance
25 | 
26 |     def buy_item(self, cost, number=1):
27 |         """Spend money and reduce your balance.
28 | 
29 |         Parameters
30 |         ----------
31 |         cost : number
32 |             cost of the item to buy.
33 |         number : int
34 |             number of items to buy.
35 | 
36 |         Raises
37 |         ------
38 |         InsufficientCashError
39 |             If you do not have enough money to spend.
40 |         """
41 |         if cost * number <= self.balance:
42 |             self.balance -= cost * number
43 |         else:
44 |             raise InsufficientCashError(
45 |                 f"You can't spend ${cost * number} as you only have ${self.balance}."
46 |             )
47 | 
48 |     def sell_item(self, cost, number=1):
49 |         """Sell items and increase your balance.
50 | 
51 |         Parameters
52 |         ----------
53 |         cost : number
54 |             cost of the item to buy.
55 |         number : int
56 |             number of items to buy.
57 | 
58 |         """
59 |         self.balance += cost * number
60 | 
61 | 
62 | class InsufficientCashError(Exception):
63 |     """Custom error used when there is insufficient cash for a transaction."""
64 | 
65 |     pass
66 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
 1 | # Book settings
 2 | title: Python Programming for Data Science
 3 | author: Tomas Beuzen
 4 | copyright: "2021" 
 5 | logo: docs/logo.png
 6 | exclude_patterns: ["cache"]
 7 | 
 8 | repository:
 9 |   url: https://github.com/TomasBeuzen/python-programming-for-data-science
10 |   path_to_book: ""
11 |   branch: main  # Which branch of the repository should be used when creating links
12 |   
13 | latex:
14 |   latex_documents:
15 |     targetname: book.tex
16 | 
17 | # Execution settings
18 | execute:
19 |   execute_notebooks         : "cache"  # Whether to execute notebooks at build time. Must be one of ("auto", "force", "cache", "off")
20 |   cache                     : "cache"
21 | 
22 | # HTML-specific settings
23 | html:
24 |   favicon                   : "docs/favicon.png"  # A path to a favicon image
25 |   navbar_number_sections    : false  # Add a number to each section in your left navbar
26 |   use_edit_page_button      : false  # Whether to add an "edit this page" button to pages. If `true`, repository information in repository: must be filled in
27 |   use_repository_button     : true  # Whether to add a link to your repository button
28 |   use_issues_button         : false  # Whether to add an "open an issue" button
29 |   extra_navbar              : <a href="https://www.tomasbeuzen.com/">Tomas Beuzen</a>  # Will be displayed underneath the left navbar.
30 |   extra_footer              : ""  # Will be displayed underneath the footer.
31 |   google_analytics_id       : ""  # A GA id that can be used to track book views.
32 |   home_page_in_navbar       : true  # Whether to include your home page in the left Navigation Bar
33 |   baseurl                   : "https://www.tomasbeuzen.com/python-programming-for-data-science/"  # The base URL where your book will be hosted. Used for creating image previews and social links. e.g.: https://mypage.com/mybook/
34 | 


--------------------------------------------------------------------------------
/chapters/data/cycling_data.csv:
--------------------------------------------------------------------------------
 1 | Date,Name,Type,Time,Distance,Comments
 2 | "10 Sep 2019, 00:13:04",Afternoon Ride,Ride,2084,12.62,Rain
 3 | "10 Sep 2019, 13:52:18",Morning Ride,Ride,2531,13.03,rain
 4 | "11 Sep 2019, 00:23:50",Afternoon Ride,Ride,1863,12.52,Wet road but nice weather
 5 | "11 Sep 2019, 14:06:19",Morning Ride,Ride,2192,12.84,Stopped for photo of sunrise
 6 | "12 Sep 2019, 00:28:05",Afternoon Ride,Ride,1891,12.48,Tired by the end of the week
 7 | "16 Sep 2019, 13:57:48",Morning Ride,Ride,2272,12.45,Rested after the weekend!
 8 | "17 Sep 2019, 00:15:47",Afternoon Ride,Ride,1973,12.45,Legs feeling strong!
 9 | "17 Sep 2019, 13:43:34",Morning Ride,Ride,2285,12.6,Raining
10 | "18 Sep 2019, 13:49:53",Morning Ride,Ride,2903,14.57,Raining today
11 | "18 Sep 2019, 00:15:52",Afternoon Ride,Ride,2101,12.48,Pumped up tires
12 | "19 Sep 2019, 00:30:01",Afternoon Ride,Ride,48062,12.48,Feeling good
13 | "19 Sep 2019, 13:52:09",Morning Ride,Ride,2090,12.59,Getting colder which is nice
14 | "20 Sep 2019, 01:02:05",Afternoon Ride,Ride,2961,12.81,Feeling good
15 | "23 Sep 2019, 13:50:41",Morning Ride,Ride,2462,12.68,Rested after the weekend!
16 | "24 Sep 2019, 00:35:42",Afternoon Ride,Ride,2076,12.47,"Oiled chain, bike feels smooth"
17 | "24 Sep 2019, 13:41:24",Morning Ride,Ride,2321,12.68,Bike feeling much smoother
18 | "25 Sep 2019, 00:07:21",Afternoon Ride,Ride,1775,12.1,Feeling really tired
19 | "25 Sep 2019, 13:35:41",Morning Ride,Ride,2124,12.65,Stopped for photo of sunrise
20 | "26 Sep 2019, 00:13:33",Afternoon Ride,Ride,1860,12.52,raining
21 | "26 Sep 2019, 13:42:43",Morning Ride,Ride,2350,12.91,Detour around trucks at Jericho
22 | "27 Sep 2019, 01:00:18",Afternoon Ride,Ride,1712,12.47,Tired by the end of the week
23 | "30 Sep 2019, 13:53:52",Morning Ride,Ride,2118,12.71,Rested after the weekend!
24 | "1 Oct 2019, 00:15:07",Afternoon Ride,Ride,1732,NaN,Legs feeling strong!
25 | "1 Oct 2019, 13:45:55",Morning Ride,Ride,2222,12.82,Beautiful morning! Feeling fit
26 | "2 Oct 2019, 00:13:09",Afternoon Ride,Ride,1756,NaN,A little tired today but good weather
27 | "2 Oct 2019, 13:46:06",Morning Ride,Ride,2134,13.06,Bit tired today but good weather
28 | "3 Oct 2019, 00:45:22",Afternoon Ride,Ride,1724,12.52,Feeling good
29 | "3 Oct 2019, 13:47:36",Morning Ride,Ride,2182,12.68,Wet road
30 | "4 Oct 2019, 01:08:08",Afternoon Ride,Ride,1870,12.63,"Very tired, riding into the wind"
31 | "9 Oct 2019, 13:55:40",Morning Ride,Ride,2149,12.7,Really cold! But feeling good
32 | "10 Oct 2019, 00:10:31",Afternoon Ride,Ride,1841,12.59,Feeling good after a holiday break!
33 | "10 Oct 2019, 13:47:14",Morning Ride,Ride,2463,12.79,Stopped for photo of sunrise
34 | "11 Oct 2019, 00:16:57",Afternoon Ride,Ride,1843,11.79,"Bike feeling tight, needs an oil and pump"


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Python Programming for Data Science
 2 | 
 3 | **By [Tomas Beuzen](https://www.tomasbeuzen.com/) 🚀**
 4 | 
 5 | Welcome to Python Programming for Data Science! With this [website](https://www.tomasbeuzen.com/python-programming-for-data-science/) I aim to provide an introduction to everything you need to know to start using Python for data science. We'll cover topics such as data structures, basic programming, code testing and documentation, and using libraries like NumPy and Pandas for data exploration and analysis.
 6 | 
 7 | <p align="center">
 8 |   <img src="docs/logo.png" width="260">
 9 | </p>
10 | 
11 | >If you're interested in learning more about Python packages, check out my and [Tiffany Timber's](https://www.tiffanytimbers.com/) book [**Python Packages**](https://py-pkgs.org/). Or, if you'd like to learn more about using Python and PyTorch for deep learning, you can check out my other online material [**Deep Learning with PyToch**](https://www.tomasbeuzen.com/deep-learning-with-pytorch/).
12 | 
13 | >The content of this site is adapted from material I used to teach the 2020/2021 offering of the course "DSCI 511 Python Programming for Data Science" for the University of British Columbia's Master of Data Science Program. That material has built upon previous course material developed by [Patrick Walls](https://www.math.ubc.ca/~pwalls/) and [Mike Gelbart](https://www.mikegelbart.com/).
14 | 
15 | ## Key Learning Outcomes
16 | 
17 | These are the key learning outcomes for this material:
18 | 
19 | 1. Translate fundamental programming concepts such as loops, conditionals, etc into Python code.
20 | 2. Understand the key data structures in Python.
21 | 3. Understand how to write functions in Python and assess if they are correct via unit testing.
22 | 4. Know when and how to abstract code (e.g., into functions, or classes) to make it more modular and robust.
23 | 5. Produce human-readable code that incorporates best practices of programming, documentation, and coding style.
24 | 6. Use NumPy perform common data wrangling and computational tasks in Python.
25 | 7. Use Pandas to create and manipulate data structures like Series and DataFrames.
26 | 8. Wrangle different types of data in Pandas including numeric data, strings, and datetimes.
27 | 
28 | ## Getting Started
29 | 
30 | The material on this site is written in Jupyter notebooks and rendered using [Jupyter Book](https://jupyterbook.org/intro.html) to make it easily accessible. However, if you wish to run these notebooks on your local machine, you can do the following:
31 | 
32 | 1. Clone the GitHub repository:
33 |    ```sh
34 |    git clone https://github.com/TomasBeuzen/python-programming-for-data-science.git
35 |    ```
36 | 2. Install the conda environment by typing the following in your terminal:
37 |    ```sh
38 |    conda env create -f py4ds.yaml
39 |    ```
40 | 3. Open the course in JupyterLab by typing the following in your terminal:
41 |    ```sh
42 |    cd python-programming-for-data-science
43 |    jupyterlab
44 |    ```
45 | 
46 | >If you're not comfortable with `git`, `GitHub` or `conda`, feel free to just read through the material on this website - you're not missing out on anything! 
47 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Creative Commons Legal Code
  2 | 
  3 | CC0 1.0 Universal
  4 | 
  5 |     CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
  6 |     LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
  7 |     ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
  8 |     INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
  9 |     REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
 10 |     PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
 11 |     THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
 12 |     HEREUNDER.
 13 | 
 14 | Statement of Purpose
 15 | 
 16 | The laws of most jurisdictions throughout the world automatically confer
 17 | exclusive Copyright and Related Rights (defined below) upon the creator
 18 | and subsequent owner(s) (each and all, an "owner") of an original work of
 19 | authorship and/or a database (each, a "Work").
 20 | 
 21 | Certain owners wish to permanently relinquish those rights to a Work for
 22 | the purpose of contributing to a commons of creative, cultural and
 23 | scientific works ("Commons") that the public can reliably and without fear
 24 | of later claims of infringement build upon, modify, incorporate in other
 25 | works, reuse and redistribute as freely as possible in any form whatsoever
 26 | and for any purposes, including without limitation commercial purposes.
 27 | These owners may contribute to the Commons to promote the ideal of a free
 28 | culture and the further production of creative, cultural and scientific
 29 | works, or to gain reputation or greater distribution for their Work in
 30 | part through the use and efforts of others.
 31 | 
 32 | For these and/or other purposes and motivations, and without any
 33 | expectation of additional consideration or compensation, the person
 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she
 35 | is an owner of Copyright and Related Rights in the Work, voluntarily
 36 | elects to apply CC0 to the Work and publicly distribute the Work under its
 37 | terms, with knowledge of his or her Copyright and Related Rights in the
 38 | Work and the meaning and intended legal effect of CC0 on those rights.
 39 | 
 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be
 41 | protected by copyright and related or neighboring rights ("Copyright and
 42 | Related Rights"). Copyright and Related Rights include, but are not
 43 | limited to, the following:
 44 | 
 45 |   i. the right to reproduce, adapt, distribute, perform, display,
 46 |      communicate, and translate a Work;
 47 |  ii. moral rights retained by the original author(s) and/or performer(s);
 48 | iii. publicity and privacy rights pertaining to a person's image or
 49 |      likeness depicted in a Work;
 50 |  iv. rights protecting against unfair competition in regards to a Work,
 51 |      subject to the limitations in paragraph 4(a), below;
 52 |   v. rights protecting the extraction, dissemination, use and reuse of data
 53 |      in a Work;
 54 |  vi. database rights (such as those arising under Directive 96/9/EC of the
 55 |      European Parliament and of the Council of 11 March 1996 on the legal
 56 |      protection of databases, and under any national implementation
 57 |      thereof, including any amended or successor version of such
 58 |      directive); and
 59 | vii. other similar, equivalent or corresponding rights throughout the
 60 |      world based on applicable law or treaty, and any national
 61 |      implementations thereof.
 62 | 
 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention
 64 | of, applicable law, Affirmer hereby overtly, fully, permanently,
 65 | irrevocably and unconditionally waives, abandons, and surrenders all of
 66 | Affirmer's Copyright and Related Rights and associated claims and causes
 67 | of action, whether now known or unknown (including existing as well as
 68 | future claims and causes of action), in the Work (i) in all territories
 69 | worldwide, (ii) for the maximum duration provided by applicable law or
 70 | treaty (including future time extensions), (iii) in any current or future
 71 | medium and for any number of copies, and (iv) for any purpose whatsoever,
 72 | including without limitation commercial, advertising or promotional
 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
 74 | member of the public at large and to the detriment of Affirmer's heirs and
 75 | successors, fully intending that such Waiver shall not be subject to
 76 | revocation, rescission, cancellation, termination, or any other legal or
 77 | equitable action to disrupt the quiet enjoyment of the Work by the public
 78 | as contemplated by Affirmer's express Statement of Purpose.
 79 | 
 80 | 3. Public License Fallback. Should any part of the Waiver for any reason
 81 | be judged legally invalid or ineffective under applicable law, then the
 82 | Waiver shall be preserved to the maximum extent permitted taking into
 83 | account Affirmer's express Statement of Purpose. In addition, to the
 84 | extent the Waiver is so judged Affirmer hereby grants to each affected
 85 | person a royalty-free, non transferable, non sublicensable, non exclusive,
 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and
 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the
 88 | maximum duration provided by applicable law or treaty (including future
 89 | time extensions), (iii) in any current or future medium and for any number
 90 | of copies, and (iv) for any purpose whatsoever, including without
 91 | limitation commercial, advertising or promotional purposes (the
 92 | "License"). The License shall be deemed effective as of the date CC0 was
 93 | applied by Affirmer to the Work. Should any part of the License for any
 94 | reason be judged legally invalid or ineffective under applicable law, such
 95 | partial invalidity or ineffectiveness shall not invalidate the remainder
 96 | of the License, and in such case Affirmer hereby affirms that he or she
 97 | will not (i) exercise any of his or her remaining Copyright and Related
 98 | Rights in the Work or (ii) assert any associated claims and causes of
 99 | action with respect to the Work, in either case contrary to Affirmer's
100 | express Statement of Purpose.
101 | 
102 | 4. Limitations and Disclaimers.
103 | 
104 |  a. No trademark or patent rights held by Affirmer are waived, abandoned,
105 |     surrendered, licensed or otherwise affected by this document.
106 |  b. Affirmer offers the Work as-is and makes no representations or
107 |     warranties of any kind concerning the Work, express, implied,
108 |     statutory or otherwise, including without limitation warranties of
109 |     title, merchantability, fitness for a particular purpose, non
110 |     infringement, or the absence of latent or other defects, accuracy, or
111 |     the present or absence of errors, whether or not discoverable, all to
112 |     the greatest extent permissible under applicable law.
113 |  c. Affirmer disclaims responsibility for clearing rights of other persons
114 |     that may apply to the Work or any use thereof, including without
115 |     limitation any person's Copyright and Related Rights in the Work.
116 |     Further, Affirmer disclaims responsibility for obtaining any necessary
117 |     consents, permissions or other rights required for any use of the
118 |     Work.
119 |  d. Affirmer understands and acknowledges that Creative Commons is not a
120 |     party to this document and has no duty or obligation with respect to
121 |     this CC0 or use of the Work.
122 | 


--------------------------------------------------------------------------------
/practice-exercises/chapter2-loops-functions-practice.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "![](../docs/banner.png)"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "# Loops & Functions\n",
 15 |     "\n",
 16 |     "**Tomas Beuzen, September 2020**"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "These exercises complement [Chapter 2](../chapters/chapter2-loops-functions.ipynb)."
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "## Exercises"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "### 1."
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "Create a function `website()` that grabs the website domain from a url string. For example, if your function is passed `\"www.google.com\"`, it should return `\"google\"`."
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 1,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "def website(url):\n",
 54 |     "    pass  # Remove this line and add your answer here."
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "### 2."
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "Create a function `divisible(a, b)` that accepts two integers (`a` and `b`) and returns `True` if `a` is divisble by `b` without a remainder. For example, `divisible(10, 3)` should return `False`, while `divisible(6, 3)` should return `True`."
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 2,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "def divisible(a, b):\n",
 78 |     "    pass  # Remove this line and add your answer here."
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "### 3."
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "Use list comprehension to square every number in the following list of numbers. "
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 3,
 98 |    "metadata": {},
 99 |    "outputs": [],
100 |    "source": [
101 |     "l = [1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
102 |     "\n",
103 |     "# Your answer here."
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {},
109 |    "source": [
110 |     "### 4."
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "markdown",
115 |    "metadata": {},
116 |    "source": [
117 |     "For the following list of names, write a list comprehension that creates a list of *only* words that start with a capital letter (hint: `str.isupper()`)."
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 4,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "names = ['Steve Irwin', 'koala', 'kangaroo', 'Australia', 'Sydney', 'desert']\n",
127 |     "\n",
128 |     "# Your answer here."
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {},
134 |    "source": [
135 |     "### 5."
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "markdown",
140 |    "metadata": {},
141 |    "source": [
142 |     "For the following list of `keys` and `vals` use dictionary comprehension to create a dictionary of the form `{'key-0': 0, 'key-1': 1, etc}` (hint: `zip()` can help you combine two lists into on object to be used for comprehension/looping)."
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": 5,
148 |    "metadata": {},
149 |    "outputs": [],
150 |    "source": [
151 |     "keys = [f\"key-{k}\" for k in range(10)]\n",
152 |     "vals = range(10)\n",
153 |     "\n",
154 |     "# Your answer here."
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "markdown",
159 |    "metadata": {},
160 |    "source": [
161 |     "### 6."
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "markdown",
166 |    "metadata": {},
167 |    "source": [
168 |     "This question is a little harder. Create a generator function called `listgen(n)` that yields numbers from 0 to n, in batches of lists of maximum 10 numbers at a time. For example, your function should behave as follows:\n",
169 |     "\n",
170 |     "```python\n",
171 |     "g = listgen(100)\n",
172 |     "next(g)\n",
173 |     "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
174 |     "next(g)\n",
175 |     "[10, 11, 12, 13, 14, 15, 16, 17, 18, 19]\n",
176 |     "next(g)\n",
177 |     "[20, 21, 22, 23, 24, 25, 26, 27, 28, 29]\n",
178 |     "etc.\n",
179 |     "\n",
180 |     "g = listgen(5)\n",
181 |     "next(g)\n",
182 |     "```"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": 6,
188 |    "metadata": {},
189 |    "outputs": [],
190 |    "source": [
191 |     "def listgen(n):\n",
192 |     "    pass  # Remove this line and add your answer here."
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "markdown",
197 |    "metadata": {},
198 |    "source": [
199 |     "### 7."
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "markdown",
204 |    "metadata": {},
205 |    "source": [
206 |     "Write a `try`/`except` to catch the error generated from the following code and print \"I caught you!\". Make sure you catch the specific error being caused, this is typically better practice than just catching all errors!"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": 7,
212 |    "metadata": {
213 |     "tags": [
214 |      "raises-exception"
215 |     ]
216 |    },
217 |    "outputs": [
218 |     {
219 |      "ename": "ZeroDivisionError",
220 |      "evalue": "division by zero",
221 |      "output_type": "error",
222 |      "traceback": [
223 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
224 |       "\u001b[0;31mZeroDivisionError\u001b[0m                         Traceback (most recent call last)",
225 |       "\u001b[0;32m<ipython-input-7-9e2b7a365344>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;36m5\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;31m# Your answer here.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
226 |       "\u001b[0;31mZeroDivisionError\u001b[0m: division by zero"
227 |      ]
228 |     }
229 |    ],
230 |    "source": [
231 |     "5 / 0\n",
232 |     "\n",
233 |     "# Your answer here."
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "markdown",
238 |    "metadata": {},
239 |    "source": [
240 |     "### 8."
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "markdown",
245 |    "metadata": {},
246 |    "source": [
247 |     "Create a function `lucky_sum()` that takes all the integers a user enters and returns their sum. *However*, if one of the values is 13 then it does not count towards the sum, nor do any values to its right.\n",
248 |     "\n",
249 |     "For example, your function should behave as follows:\n",
250 |     "\n",
251 |     "```python\n",
252 |     "lucky_sum(1, 2, 3, 4)\n",
253 |     "10\n",
254 |     "\n",
255 |     "lucky_sum(1, 13, 3, 4)\n",
256 |     "1\n",
257 |     "\n",
258 |     "lucky_sum(13)\n",
259 |     "0\n",
260 |     "```\n",
261 |     "\n",
262 |     "*This example is inspired by the related [codingbat challenge](https://codingbat.com/prob/p130788).*"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "code",
267 |    "execution_count": 8,
268 |    "metadata": {},
269 |    "outputs": [],
270 |    "source": [
271 |     "def lucky_sum(*args):\n",
272 |     "    pass  # Remove this line and add your answer here."
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "markdown",
277 |    "metadata": {},
278 |    "source": [
279 |     "<hr>\n",
280 |     "<hr>\n",
281 |     "<hr>"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "markdown",
286 |    "metadata": {},
287 |    "source": [
288 |     "## Solutions"
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "markdown",
293 |    "metadata": {},
294 |    "source": [
295 |     "### 1."
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "markdown",
300 |    "metadata": {},
301 |    "source": [
302 |     "Create a function `website()` that grabs the website domain from a url string. For example, if your function is passed `\"www.google.com\"`, it should return `\"google\"`."
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "code",
307 |    "execution_count": 9,
308 |    "metadata": {},
309 |    "outputs": [
310 |     {
311 |      "data": {
312 |       "text/plain": [
313 |        "'google'"
314 |       ]
315 |      },
316 |      "execution_count": 9,
317 |      "metadata": {},
318 |      "output_type": "execute_result"
319 |     }
320 |    ],
321 |    "source": [
322 |     "def website(url):\n",
323 |     "    return url.split(\".\")[1]\n",
324 |     "website(\"www.google.com\")"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "markdown",
329 |    "metadata": {},
330 |    "source": [
331 |     "### 2."
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "markdown",
336 |    "metadata": {},
337 |    "source": [
338 |     "Create a function `divisible(a, b)` that accepts two integers (`a` and `b`) and returns `True` if `a` is divisble by `b` without a remainder. For example, `divisible(10, 3)` should return `False`, while `divisible(6, 3)` should return `True`."
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "code",
343 |    "execution_count": 10,
344 |    "metadata": {},
345 |    "outputs": [
346 |     {
347 |      "name": "stdout",
348 |      "output_type": "stream",
349 |      "text": [
350 |       "False\n",
351 |       "True\n"
352 |      ]
353 |     }
354 |    ],
355 |    "source": [
356 |     "def divisible(a, b):\n",
357 |     "    return True if a % b == 0 else False\n",
358 |     "print(divisible(10, 3))\n",
359 |     "print(divisible(6, 3))"
360 |    ]
361 |   },
362 |   {
363 |    "cell_type": "markdown",
364 |    "metadata": {},
365 |    "source": [
366 |     "### 3."
367 |    ]
368 |   },
369 |   {
370 |    "cell_type": "markdown",
371 |    "metadata": {},
372 |    "source": [
373 |     "Use list comprehension to square every number in the following list of numbers. "
374 |    ]
375 |   },
376 |   {
377 |    "cell_type": "code",
378 |    "execution_count": 11,
379 |    "metadata": {},
380 |    "outputs": [
381 |     {
382 |      "data": {
383 |       "text/plain": [
384 |        "[1, 4, 9, 16, 25, 36, 49, 64, 81]"
385 |       ]
386 |      },
387 |      "execution_count": 11,
388 |      "metadata": {},
389 |      "output_type": "execute_result"
390 |     }
391 |    ],
392 |    "source": [
393 |     "l = [1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
394 |     "[_ ** 2 for _ in l]"
395 |    ]
396 |   },
397 |   {
398 |    "cell_type": "markdown",
399 |    "metadata": {},
400 |    "source": [
401 |     "### 4."
402 |    ]
403 |   },
404 |   {
405 |    "cell_type": "markdown",
406 |    "metadata": {},
407 |    "source": [
408 |     "For the following list of names, write a list comprehension that creates a list of *only* words that start with a capital letter (hint: `str.isupper()`)."
409 |    ]
410 |   },
411 |   {
412 |    "cell_type": "code",
413 |    "execution_count": 12,
414 |    "metadata": {},
415 |    "outputs": [
416 |     {
417 |      "data": {
418 |       "text/plain": [
419 |        "['Steve Irwin', 'Australia', 'Sydney']"
420 |       ]
421 |      },
422 |      "execution_count": 12,
423 |      "metadata": {},
424 |      "output_type": "execute_result"
425 |     }
426 |    ],
427 |    "source": [
428 |     "names = ['Steve Irwin', 'koala', 'kangaroo', 'Australia', 'Sydney', 'desert']\n",
429 |     "[_ for _ in names if _[0].isupper()]"
430 |    ]
431 |   },
432 |   {
433 |    "cell_type": "markdown",
434 |    "metadata": {},
435 |    "source": [
436 |     "### 5."
437 |    ]
438 |   },
439 |   {
440 |    "cell_type": "markdown",
441 |    "metadata": {},
442 |    "source": [
443 |     "For the following list of `keys` and `vals` use dictionary comprehension to create a dictionary of the form `{'key-0': 0, 'key-1': 1, etc}` (hint: `zip()` can help you combine two lists into on object to be used for comprehension/looping)."
444 |    ]
445 |   },
446 |   {
447 |    "cell_type": "code",
448 |    "execution_count": 13,
449 |    "metadata": {},
450 |    "outputs": [
451 |     {
452 |      "data": {
453 |       "text/plain": [
454 |        "{'key-0': 0,\n",
455 |        " 'key-1': 1,\n",
456 |        " 'key-2': 2,\n",
457 |        " 'key-3': 3,\n",
458 |        " 'key-4': 4,\n",
459 |        " 'key-5': 5,\n",
460 |        " 'key-6': 6,\n",
461 |        " 'key-7': 7,\n",
462 |        " 'key-8': 8,\n",
463 |        " 'key-9': 9}"
464 |       ]
465 |      },
466 |      "execution_count": 13,
467 |      "metadata": {},
468 |      "output_type": "execute_result"
469 |     }
470 |    ],
471 |    "source": [
472 |     "keys = [f\"key-{k}\" for k in range(10)]\n",
473 |     "vals = range(10)\n",
474 |     "{k:v for k, v in zip(keys, vals)}"
475 |    ]
476 |   },
477 |   {
478 |    "cell_type": "markdown",
479 |    "metadata": {},
480 |    "source": [
481 |     "### 6."
482 |    ]
483 |   },
484 |   {
485 |    "cell_type": "markdown",
486 |    "metadata": {},
487 |    "source": [
488 |     "This question is a little harder. Create a generator function called `listgen(n)` that yields numbers from 0 to n, in batches of lists of maximum 10 numbers at a time. For example, your function should behave as follows:\n",
489 |     "\n",
490 |     "```python\n",
491 |     "g = listgen(100)\n",
492 |     "next(g)\n",
493 |     "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
494 |     "next(g)\n",
495 |     "[10, 11, 12, 13, 14, 15, 16, 17, 18, 19]\n",
496 |     "next(g)\n",
497 |     "[20, 21, 22, 23, 24, 25, 26, 27, 28, 29]\n",
498 |     "etc.\n",
499 |     "\n",
500 |     "g = listgen(5)\n",
501 |     "next(g)\n",
502 |     "```"
503 |    ]
504 |   },
505 |   {
506 |    "cell_type": "code",
507 |    "execution_count": 14,
508 |    "metadata": {},
509 |    "outputs": [],
510 |    "source": [
511 |     "def listgen(n):\n",
512 |     "    counter = 0\n",
513 |     "    numbers = list(range(n))\n",
514 |     "    while counter <= n // 10:\n",
515 |     "        yield numbers[10 * counter:10*(counter+1)]\n",
516 |     "        counter += 1"
517 |    ]
518 |   },
519 |   {
520 |    "cell_type": "markdown",
521 |    "metadata": {},
522 |    "source": [
523 |     "### 7."
524 |    ]
525 |   },
526 |   {
527 |    "cell_type": "markdown",
528 |    "metadata": {},
529 |    "source": [
530 |     "Write a `try`/`except` to catch the error generated from the following code and print \"I caught you!\". Make sure you catch the specific error being caused, this is typically better practice than just catching all errors!"
531 |    ]
532 |   },
533 |   {
534 |    "cell_type": "code",
535 |    "execution_count": 15,
536 |    "metadata": {},
537 |    "outputs": [
538 |     {
539 |      "name": "stdout",
540 |      "output_type": "stream",
541 |      "text": [
542 |       "I caught you!\n"
543 |      ]
544 |     }
545 |    ],
546 |    "source": [
547 |     "try:\n",
548 |     "    5 / 0\n",
549 |     "except ZeroDivisionError:\n",
550 |     "    print(\"I caught you!\")"
551 |    ]
552 |   },
553 |   {
554 |    "cell_type": "markdown",
555 |    "metadata": {},
556 |    "source": [
557 |     "### 8."
558 |    ]
559 |   },
560 |   {
561 |    "cell_type": "markdown",
562 |    "metadata": {},
563 |    "source": [
564 |     "Create a function `lucky_sum()` that takes all the integers a user enters and returns their sum. *However*, if one of the values is 13 then it does not count towards the sum, nor do any values to its right.\n",
565 |     "\n",
566 |     "For example, your function should behave as follows:\n",
567 |     "\n",
568 |     "```python\n",
569 |     "lucky_sum(1, 2, 3, 4)\n",
570 |     "10\n",
571 |     "\n",
572 |     "lucky_sum(1, 13, 3, 4)\n",
573 |     "1\n",
574 |     "\n",
575 |     "lucky_sum(13)\n",
576 |     "0\n",
577 |     "```\n",
578 |     "\n",
579 |     "*This example is inspired by the related [codingbat challenge](https://codingbat.com/prob/p130788).*"
580 |    ]
581 |   },
582 |   {
583 |    "cell_type": "code",
584 |    "execution_count": 16,
585 |    "metadata": {},
586 |    "outputs": [],
587 |    "source": [
588 |     "def lucky_sum(*args):\n",
589 |     "    if 13 in args:\n",
590 |     "        return sum(args[:args.index(13)])\n",
591 |     "    return sum(args)"
592 |    ]
593 |   }
594 |  ],
595 |  "metadata": {
596 |   "kernelspec": {
597 |    "display_name": "Python 3",
598 |    "language": "python",
599 |    "name": "python3"
600 |   },
601 |   "language_info": {
602 |    "codemirror_mode": {
603 |     "name": "ipython",
604 |     "version": 3
605 |    },
606 |    "file_extension": ".py",
607 |    "mimetype": "text/x-python",
608 |    "name": "python",
609 |    "nbconvert_exporter": "python",
610 |    "pygments_lexer": "ipython3",
611 |    "version": "3.7.8"
612 |   }
613 |  },
614 |  "nbformat": 4,
615 |  "nbformat_minor": 4
616 | }
617 | 


--------------------------------------------------------------------------------
/practice-exercises/chapter1-basics-practice.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "![](../docs/banner.png)"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "# Python Basics\n",
 15 |     "\n",
 16 |     "**Tomas Beuzen, September 2020**"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "These exercises complement [Chapter 1](../chapters/chapter1-basics.ipynb)."
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "## Exercises"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "### 1."
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "What is 5 to the power of 5?"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 1,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "# Your answer here."
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "### 2."
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "What is the remainder from dividing 73 by 6?"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 2,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "# Your answer here."
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "### 3."
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "How many times does the whole number 3 go into 123? What is the remainder of dividing 123 by 3?"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 3,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "# Your answer here."
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "### 4."
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "metadata": {},
112 |    "source": [
113 |     "Split the following string into a list by splitting on the space character:"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 4,
119 |    "metadata": {},
120 |    "outputs": [],
121 |    "source": [
122 |     "s = \"MDS is going virtual!\"\n",
123 |     "\n",
124 |     "# Your answer here."
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "markdown",
129 |    "metadata": {},
130 |    "source": [
131 |     "### 5."
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "markdown",
136 |    "metadata": {},
137 |    "source": [
138 |     "Given the following variables:\n",
139 |     "\n",
140 |     "```\n",
141 |     "thing = \"light\"\n",
142 |     "speed = 299792458  # m/s\n",
143 |     "```\n",
144 |     "\n",
145 |     "Use f-strings to print:\n",
146 |     "\n",
147 |     "```\n",
148 |     "The speed of light is 2.997925e+08 m/s.\n",
149 |     "```"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": 5,
155 |    "metadata": {},
156 |    "outputs": [],
157 |    "source": [
158 |     "# Your answer here."
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "markdown",
163 |    "metadata": {},
164 |    "source": [
165 |     "### 6."
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "markdown",
170 |    "metadata": {},
171 |    "source": [
172 |     "Given this nested list, use indexing to grab the word \"MDS\":"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": 6,
178 |    "metadata": {},
179 |    "outputs": [],
180 |    "source": [
181 |     "l = [10, [3, 4], [5, [100, 200, [\"MDS\"]], 23, 11], 1, 7]\n",
182 |     "\n",
183 |     "# Your answer here."
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "metadata": {},
189 |    "source": [
190 |     "### 7."
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "markdown",
195 |    "metadata": {},
196 |    "source": [
197 |     "Given this nest dictionary grab the word \"MDS\":"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": 7,
203 |    "metadata": {},
204 |    "outputs": [],
205 |    "source": [
206 |     "d = {\n",
207 |     "    \"outer\": [\n",
208 |     "        1,\n",
209 |     "        2,\n",
210 |     "        3,\n",
211 |     "        {\"inner\": [\"this\", \"is\", \"inception\", {\"inner_inner\": [1, 2, 3, \"MDS\"]}]},\n",
212 |     "    ]\n",
213 |     "}\n",
214 |     "\n",
215 |     "# Your answer here."
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "markdown",
220 |    "metadata": {},
221 |    "source": [
222 |     "### 8."
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "markdown",
227 |    "metadata": {},
228 |    "source": [
229 |     "Why does the following cell return an error?"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": 8,
235 |    "metadata": {
236 |     "tags": [
237 |      "raises-exception"
238 |     ]
239 |    },
240 |    "outputs": [
241 |     {
242 |      "ename": "TypeError",
243 |      "evalue": "'tuple' object does not support item assignment",
244 |      "output_type": "error",
245 |      "traceback": [
246 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
247 |       "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
248 |       "\u001b[0;32m<ipython-input-8-d668dd5b8bda>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m4\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m6\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
249 |       "\u001b[0;31mTypeError\u001b[0m: 'tuple' object does not support item assignment"
250 |      ]
251 |     }
252 |    ],
253 |    "source": [
254 |     "t = (1, 2, 3, 4, 5)\n",
255 |     "t[-1] = 6"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "markdown",
260 |    "metadata": {},
261 |    "source": [
262 |     "### 9."
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "markdown",
267 |    "metadata": {},
268 |    "source": [
269 |     "Use string methods to extract the website domain from an email, e.g., from the string `\"tomas.beuzen@fakemail.com\"`, you should extract `\"fakemail\"`."
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "code",
274 |    "execution_count": 9,
275 |    "metadata": {},
276 |    "outputs": [],
277 |    "source": [
278 |     "email = \"tomas.beuzen@fakemail.com\"\n",
279 |     "\n",
280 |     "# Your answer here."
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "markdown",
285 |    "metadata": {},
286 |    "source": [
287 |     "### 10."
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "markdown",
292 |    "metadata": {},
293 |    "source": [
294 |     "Given the variable `language` which contains a string, use `if/elif/else` to write a program that:\n",
295 |     "- return \"I love snakes!\" if `language` is `\"python\"` (any kind of capitalization)\n",
296 |     "- return \"Are you a pirate?\" if `language` is `\"R\"` (any kind of capitalization)\n",
297 |     "- else return \"What is `language`?\" if `language` is anything else."
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "code",
302 |    "execution_count": 10,
303 |    "metadata": {},
304 |    "outputs": [],
305 |    "source": [
306 |     "language = \"python\"\n",
307 |     "\n",
308 |     "# Your answer here."
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "markdown",
313 |    "metadata": {},
314 |    "source": [
315 |     "<hr>\n",
316 |     "<hr>\n",
317 |     "<hr>"
318 |    ]
319 |   },
320 |   {
321 |    "cell_type": "markdown",
322 |    "metadata": {},
323 |    "source": [
324 |     "## Solutions"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "markdown",
329 |    "metadata": {},
330 |    "source": [
331 |     "### 1."
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "markdown",
336 |    "metadata": {},
337 |    "source": [
338 |     "What is 5 to the power of 5?"
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "code",
343 |    "execution_count": 11,
344 |    "metadata": {},
345 |    "outputs": [
346 |     {
347 |      "data": {
348 |       "text/plain": [
349 |        "3125"
350 |       ]
351 |      },
352 |      "execution_count": 11,
353 |      "metadata": {},
354 |      "output_type": "execute_result"
355 |     }
356 |    ],
357 |    "source": [
358 |     "5 ** 5"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "markdown",
363 |    "metadata": {},
364 |    "source": [
365 |     "### 2."
366 |    ]
367 |   },
368 |   {
369 |    "cell_type": "markdown",
370 |    "metadata": {},
371 |    "source": [
372 |     "What is the remainder from dividing 73 by 6?"
373 |    ]
374 |   },
375 |   {
376 |    "cell_type": "code",
377 |    "execution_count": 12,
378 |    "metadata": {},
379 |    "outputs": [
380 |     {
381 |      "data": {
382 |       "text/plain": [
383 |        "1"
384 |       ]
385 |      },
386 |      "execution_count": 12,
387 |      "metadata": {},
388 |      "output_type": "execute_result"
389 |     }
390 |    ],
391 |    "source": [
392 |     "73 % 6"
393 |    ]
394 |   },
395 |   {
396 |    "cell_type": "markdown",
397 |    "metadata": {},
398 |    "source": [
399 |     "### 3."
400 |    ]
401 |   },
402 |   {
403 |    "cell_type": "markdown",
404 |    "metadata": {},
405 |    "source": [
406 |     "How many times does the whole number 3 go into 123? What is the remainder of dividing 123 by 3?"
407 |    ]
408 |   },
409 |   {
410 |    "cell_type": "code",
411 |    "execution_count": 13,
412 |    "metadata": {},
413 |    "outputs": [
414 |     {
415 |      "name": "stdout",
416 |      "output_type": "stream",
417 |      "text": [
418 |       "411\n",
419 |       "1\n"
420 |      ]
421 |     }
422 |    ],
423 |    "source": [
424 |     "print(1234 // 3)\n",
425 |     "print(1234 % 3)"
426 |    ]
427 |   },
428 |   {
429 |    "cell_type": "markdown",
430 |    "metadata": {},
431 |    "source": [
432 |     "### 4."
433 |    ]
434 |   },
435 |   {
436 |    "cell_type": "markdown",
437 |    "metadata": {},
438 |    "source": [
439 |     "Split this string on the space character into a list:\n",
440 |     "\n",
441 |     "```\n",
442 |     "s = \"MDS is going virtual!\"\n",
443 |     "```"
444 |    ]
445 |   },
446 |   {
447 |    "cell_type": "code",
448 |    "execution_count": 14,
449 |    "metadata": {},
450 |    "outputs": [
451 |     {
452 |      "data": {
453 |       "text/plain": [
454 |        "['MDS', 'is', 'going', 'virtual!']"
455 |       ]
456 |      },
457 |      "execution_count": 14,
458 |      "metadata": {},
459 |      "output_type": "execute_result"
460 |     }
461 |    ],
462 |    "source": [
463 |     "s = \"MDS is going virtual!\"\n",
464 |     "s.split()"
465 |    ]
466 |   },
467 |   {
468 |    "cell_type": "markdown",
469 |    "metadata": {},
470 |    "source": [
471 |     "### 5."
472 |    ]
473 |   },
474 |   {
475 |    "cell_type": "markdown",
476 |    "metadata": {},
477 |    "source": [
478 |     "Given the following variables:\n",
479 |     "\n",
480 |     "```\n",
481 |     "thing = \"light\"\n",
482 |     "speed = 299792458  # m/s\n",
483 |     "```\n",
484 |     "\n",
485 |     "Use f-strings to print:\n",
486 |     "\n",
487 |     "```\n",
488 |     "The speed of light is 2.997925e+08 m/s.\n",
489 |     "```"
490 |    ]
491 |   },
492 |   {
493 |    "cell_type": "code",
494 |    "execution_count": 15,
495 |    "metadata": {},
496 |    "outputs": [
497 |     {
498 |      "name": "stdout",
499 |      "output_type": "stream",
500 |      "text": [
501 |       "The speed of light is 2.997925e+08 m/s.\n"
502 |      ]
503 |     }
504 |    ],
505 |    "source": [
506 |     "thing = \"light\"\n",
507 |     "speed = 299792458  # m/s\n",
508 |     "print(f\"The speed of {thing} is {speed:2e} m/s.\")"
509 |    ]
510 |   },
511 |   {
512 |    "cell_type": "markdown",
513 |    "metadata": {},
514 |    "source": [
515 |     "### 6."
516 |    ]
517 |   },
518 |   {
519 |    "cell_type": "markdown",
520 |    "metadata": {},
521 |    "source": [
522 |     "Given this nested list, use indexing to grab the word \"MDS\":"
523 |    ]
524 |   },
525 |   {
526 |    "cell_type": "code",
527 |    "execution_count": 16,
528 |    "metadata": {},
529 |    "outputs": [],
530 |    "source": [
531 |     "l = [10,[3,4],[5,[100,200,['MDS']],23,11],1,7]"
532 |    ]
533 |   },
534 |   {
535 |    "cell_type": "code",
536 |    "execution_count": 17,
537 |    "metadata": {},
538 |    "outputs": [
539 |     {
540 |      "data": {
541 |       "text/plain": [
542 |        "['MDS']"
543 |       ]
544 |      },
545 |      "execution_count": 17,
546 |      "metadata": {},
547 |      "output_type": "execute_result"
548 |     }
549 |    ],
550 |    "source": [
551 |     "l[2][1][2]"
552 |    ]
553 |   },
554 |   {
555 |    "cell_type": "markdown",
556 |    "metadata": {},
557 |    "source": [
558 |     "### 7."
559 |    ]
560 |   },
561 |   {
562 |    "cell_type": "markdown",
563 |    "metadata": {},
564 |    "source": [
565 |     "Given this nest dictionary grab the word \"MDS\":"
566 |    ]
567 |   },
568 |   {
569 |    "cell_type": "code",
570 |    "execution_count": 18,
571 |    "metadata": {},
572 |    "outputs": [],
573 |    "source": [
574 |     "d = {\n",
575 |     "    \"outer\": [\n",
576 |     "        1,\n",
577 |     "        2,\n",
578 |     "        3,\n",
579 |     "        {\"inner\": [\"this\", \"is\", \"inception\", {\"inner_inner\": [1, 2, 3, \"MDS\"]}]},\n",
580 |     "    ]\n",
581 |     "}"
582 |    ]
583 |   },
584 |   {
585 |    "cell_type": "code",
586 |    "execution_count": 19,
587 |    "metadata": {},
588 |    "outputs": [
589 |     {
590 |      "data": {
591 |       "text/plain": [
592 |        "'MDS'"
593 |       ]
594 |      },
595 |      "execution_count": 19,
596 |      "metadata": {},
597 |      "output_type": "execute_result"
598 |     }
599 |    ],
600 |    "source": [
601 |     "d['outer'][3]['inner'][3]['inner_inner'][3]"
602 |    ]
603 |   },
604 |   {
605 |    "cell_type": "markdown",
606 |    "metadata": {},
607 |    "source": [
608 |     "### 8."
609 |    ]
610 |   },
611 |   {
612 |    "cell_type": "markdown",
613 |    "metadata": {},
614 |    "source": [
615 |     "Why does the following cell return an error?"
616 |    ]
617 |   },
618 |   {
619 |    "cell_type": "code",
620 |    "execution_count": 20,
621 |    "metadata": {
622 |     "tags": [
623 |      "raises-exception"
624 |     ]
625 |    },
626 |    "outputs": [
627 |     {
628 |      "ename": "TypeError",
629 |      "evalue": "'tuple' object does not support item assignment",
630 |      "output_type": "error",
631 |      "traceback": [
632 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
633 |       "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
634 |       "\u001b[0;32m<ipython-input-20-d668dd5b8bda>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m4\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mt\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m6\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
635 |       "\u001b[0;31mTypeError\u001b[0m: 'tuple' object does not support item assignment"
636 |      ]
637 |     }
638 |    ],
639 |    "source": [
640 |     "t = (1, 2, 3, 4, 5)\n",
641 |     "t[-1] = 6"
642 |    ]
643 |   },
644 |   {
645 |    "cell_type": "markdown",
646 |    "metadata": {},
647 |    "source": [
648 |     "Because tuples are immutable!"
649 |    ]
650 |   },
651 |   {
652 |    "cell_type": "markdown",
653 |    "metadata": {},
654 |    "source": [
655 |     "### 9."
656 |    ]
657 |   },
658 |   {
659 |    "cell_type": "markdown",
660 |    "metadata": {},
661 |    "source": [
662 |     "Use string methods to extract the website domain from an email, e.g., from the string `\"tomas.beuzen@fakemail.com\"`, you should extract `\"fakemail\"`."
663 |    ]
664 |   },
665 |   {
666 |    "cell_type": "code",
667 |    "execution_count": 21,
668 |    "metadata": {},
669 |    "outputs": [
670 |     {
671 |      "data": {
672 |       "text/plain": [
673 |        "'fakemail'"
674 |       ]
675 |      },
676 |      "execution_count": 21,
677 |      "metadata": {},
678 |      "output_type": "execute_result"
679 |     }
680 |    ],
681 |    "source": [
682 |     "email = \"tomas.beuzen@fakemail.com\"\n",
683 |     "email.split(\"@\")[-1].split(\".com\")[0]"
684 |    ]
685 |   },
686 |   {
687 |    "cell_type": "markdown",
688 |    "metadata": {},
689 |    "source": [
690 |     "### 10."
691 |    ]
692 |   },
693 |   {
694 |    "cell_type": "markdown",
695 |    "metadata": {},
696 |    "source": [
697 |     "Given the variable `language` which contains a string, use `if/elif/else` to write a program that:\n",
698 |     "- return \"I love snakes!\" if `language` is `\"python\"` (any kind of capitalization)\n",
699 |     "- return \"Are you a pirate?\" if `language` is `\"R\"` (any kind of capitalization)\n",
700 |     "- else return \"What is `language`?\" if `language` is anything else."
701 |    ]
702 |   },
703 |   {
704 |    "cell_type": "code",
705 |    "execution_count": 22,
706 |    "metadata": {},
707 |    "outputs": [
708 |     {
709 |      "name": "stdout",
710 |      "output_type": "stream",
711 |      "text": [
712 |       "I love snakes!\n"
713 |      ]
714 |     }
715 |    ],
716 |    "source": [
717 |     "language = \"python\"\n",
718 |     "if language.lower() == \"python\":\n",
719 |     "    print(\"I love snakes!\")\n",
720 |     "elif language.lower() == \"r\":\n",
721 |     "    print(\"Are you a pirate?\")\n",
722 |     "else:\n",
723 |     "    print(f\"What is {language}?\")"
724 |    ]
725 |   }
726 |  ],
727 |  "metadata": {
728 |   "kernelspec": {
729 |    "display_name": "Python 3",
730 |    "language": "python",
731 |    "name": "python3"
732 |   },
733 |   "language_info": {
734 |    "codemirror_mode": {
735 |     "name": "ipython",
736 |     "version": 3
737 |    },
738 |    "file_extension": ".py",
739 |    "mimetype": "text/x-python",
740 |    "name": "python",
741 |    "nbconvert_exporter": "python",
742 |    "pygments_lexer": "ipython3",
743 |    "version": "3.7.8"
744 |   }
745 |  },
746 |  "nbformat": 4,
747 |  "nbformat_minor": 4
748 | }
749 | 


--------------------------------------------------------------------------------
/practice-exercises/chapter5-numpy-practice.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "![](../docs/banner.png)"
   8 |    ]
   9 |   },
  10 |   {
  11 |    "cell_type": "markdown",
  12 |    "metadata": {},
  13 |    "source": [
  14 |     "# NumPy\n",
  15 |     "\n",
  16 |     "**Tomas Beuzen, September 2020**"
  17 |    ]
  18 |   },
  19 |   {
  20 |    "cell_type": "markdown",
  21 |    "metadata": {},
  22 |    "source": [
  23 |     "These exercises complement [Chapter 5](../chapters/chapter5-numpy.ipynb) and [Chapter 6](../chapters/chapter6-numpy-addendum.ipynb)."
  24 |    ]
  25 |   },
  26 |   {
  27 |    "cell_type": "markdown",
  28 |    "metadata": {},
  29 |    "source": [
  30 |     "## Exercises"
  31 |    ]
  32 |   },
  33 |   {
  34 |    "cell_type": "markdown",
  35 |    "metadata": {},
  36 |    "source": [
  37 |     "### 1."
  38 |    ]
  39 |   },
  40 |   {
  41 |    "cell_type": "markdown",
  42 |    "metadata": {},
  43 |    "source": [
  44 |     "Import numpy under the alias `np`."
  45 |    ]
  46 |   },
  47 |   {
  48 |    "cell_type": "code",
  49 |    "execution_count": 1,
  50 |    "metadata": {},
  51 |    "outputs": [],
  52 |    "source": [
  53 |     "# Your answer here."
  54 |    ]
  55 |   },
  56 |   {
  57 |    "cell_type": "markdown",
  58 |    "metadata": {},
  59 |    "source": [
  60 |     "### 2."
  61 |    ]
  62 |   },
  63 |   {
  64 |    "cell_type": "markdown",
  65 |    "metadata": {},
  66 |    "source": [
  67 |     "Create the following arrays:\n",
  68 |     "\n",
  69 |     "1. Create an array of 5 zeros.\n",
  70 |     "2. Create an array of 10 ones.\n",
  71 |     "3. Create an array of 5 3.141s.\n",
  72 |     "4. Create an array of the integers 1 to 20.\n",
  73 |     "5. Create a 5 x 5 matrix of ones with a dtype `int`."
  74 |    ]
  75 |   },
  76 |   {
  77 |    "cell_type": "code",
  78 |    "execution_count": 2,
  79 |    "metadata": {},
  80 |    "outputs": [],
  81 |    "source": [
  82 |     "# Your answer here."
  83 |    ]
  84 |   },
  85 |   {
  86 |    "cell_type": "markdown",
  87 |    "metadata": {},
  88 |    "source": [
  89 |     "### 3."
  90 |    ]
  91 |   },
  92 |   {
  93 |    "cell_type": "markdown",
  94 |    "metadata": {},
  95 |    "source": [
  96 |     "Use numpy to:\n",
  97 |     "1. Create an 3D matrix of 3 x 3 x 3 full of random numbers drawn from a standard normal distribution (hint: `np.random.randn()`)\n",
  98 |     "2. Reshape the above array into shape (27,)"
  99 |    ]
 100 |   },
 101 |   {
 102 |    "cell_type": "code",
 103 |    "execution_count": 3,
 104 |    "metadata": {},
 105 |    "outputs": [],
 106 |    "source": [
 107 |     "# Your answer here."
 108 |    ]
 109 |   },
 110 |   {
 111 |    "cell_type": "markdown",
 112 |    "metadata": {},
 113 |    "source": [
 114 |     "### 4."
 115 |    ]
 116 |   },
 117 |   {
 118 |    "cell_type": "markdown",
 119 |    "metadata": {},
 120 |    "source": [
 121 |     "Create an array of 20 linearly spaced numbers between 1 and 10."
 122 |    ]
 123 |   },
 124 |   {
 125 |    "cell_type": "code",
 126 |    "execution_count": 4,
 127 |    "metadata": {},
 128 |    "outputs": [],
 129 |    "source": [
 130 |     "# Your answer here."
 131 |    ]
 132 |   },
 133 |   {
 134 |    "cell_type": "markdown",
 135 |    "metadata": {},
 136 |    "source": [
 137 |     "### 5."
 138 |    ]
 139 |   },
 140 |   {
 141 |    "cell_type": "markdown",
 142 |    "metadata": {},
 143 |    "source": [
 144 |     "Run the following code to create an array of shape 4 x 4 and then use indexing to produce the outputs shown below."
 145 |    ]
 146 |   },
 147 |   {
 148 |    "cell_type": "code",
 149 |    "execution_count": 5,
 150 |    "metadata": {},
 151 |    "outputs": [],
 152 |    "source": [
 153 |     "import numpy as np\n",
 154 |     "a = np.arange(1, 26).reshape(5, -1)"
 155 |    ]
 156 |   },
 157 |   {
 158 |    "cell_type": "markdown",
 159 |    "metadata": {},
 160 |    "source": [
 161 |     "```python\n",
 162 |     "20\n",
 163 |     "```"
 164 |    ]
 165 |   },
 166 |   {
 167 |    "cell_type": "code",
 168 |    "execution_count": 6,
 169 |    "metadata": {},
 170 |    "outputs": [],
 171 |    "source": [
 172 |     "# Your answer here."
 173 |    ]
 174 |   },
 175 |   {
 176 |    "cell_type": "markdown",
 177 |    "metadata": {},
 178 |    "source": [
 179 |     "```python\n",
 180 |     "array([[ 9, 10],\n",
 181 |     "       [14, 15],\n",
 182 |     "       [19, 20],\n",
 183 |     "       [24, 25]])\n",
 184 |     "```"
 185 |    ]
 186 |   },
 187 |   {
 188 |    "cell_type": "code",
 189 |    "execution_count": 7,
 190 |    "metadata": {},
 191 |    "outputs": [],
 192 |    "source": [
 193 |     "# Your answer here."
 194 |    ]
 195 |   },
 196 |   {
 197 |    "cell_type": "markdown",
 198 |    "metadata": {},
 199 |    "source": [
 200 |     "```python\n",
 201 |     "array([ 6,  7,  8,  9, 10])\n",
 202 |     "```"
 203 |    ]
 204 |   },
 205 |   {
 206 |    "cell_type": "code",
 207 |    "execution_count": 8,
 208 |    "metadata": {},
 209 |    "outputs": [],
 210 |    "source": [
 211 |     "# Your answer here."
 212 |    ]
 213 |   },
 214 |   {
 215 |    "cell_type": "markdown",
 216 |    "metadata": {},
 217 |    "source": [
 218 |     "```python\n",
 219 |     "array([[11, 12, 13, 14, 15],\n",
 220 |     "       [16, 17, 18, 19, 20]])\n",
 221 |     "```"
 222 |    ]
 223 |   },
 224 |   {
 225 |    "cell_type": "code",
 226 |    "execution_count": 9,
 227 |    "metadata": {},
 228 |    "outputs": [],
 229 |    "source": [
 230 |     "# Your answer here."
 231 |    ]
 232 |   },
 233 |   {
 234 |    "cell_type": "markdown",
 235 |    "metadata": {},
 236 |    "source": [
 237 |     "```python\n",
 238 |     "array([[ 8,  9],\n",
 239 |     "       [13, 14]])\n",
 240 |     "```"
 241 |    ]
 242 |   },
 243 |   {
 244 |    "cell_type": "code",
 245 |    "execution_count": 10,
 246 |    "metadata": {},
 247 |    "outputs": [],
 248 |    "source": [
 249 |     "# Your answer here."
 250 |    ]
 251 |   },
 252 |   {
 253 |    "cell_type": "markdown",
 254 |    "metadata": {},
 255 |    "source": [
 256 |     "### 6."
 257 |    ]
 258 |   },
 259 |   {
 260 |    "cell_type": "markdown",
 261 |    "metadata": {},
 262 |    "source": [
 263 |     "Calculate the sum of all the numbers in `a`."
 264 |    ]
 265 |   },
 266 |   {
 267 |    "cell_type": "code",
 268 |    "execution_count": 11,
 269 |    "metadata": {},
 270 |    "outputs": [],
 271 |    "source": [
 272 |     "# Your answer here."
 273 |    ]
 274 |   },
 275 |   {
 276 |    "cell_type": "markdown",
 277 |    "metadata": {},
 278 |    "source": [
 279 |     "### 7."
 280 |    ]
 281 |   },
 282 |   {
 283 |    "cell_type": "markdown",
 284 |    "metadata": {},
 285 |    "source": [
 286 |     "Calculate the sum of each row in `a`."
 287 |    ]
 288 |   },
 289 |   {
 290 |    "cell_type": "code",
 291 |    "execution_count": 12,
 292 |    "metadata": {},
 293 |    "outputs": [],
 294 |    "source": [
 295 |     "# Your answer here."
 296 |    ]
 297 |   },
 298 |   {
 299 |    "cell_type": "markdown",
 300 |    "metadata": {},
 301 |    "source": [
 302 |     "### 8."
 303 |    ]
 304 |   },
 305 |   {
 306 |    "cell_type": "markdown",
 307 |    "metadata": {},
 308 |    "source": [
 309 |     "Extract all values of `a` greater than the mean of `a` (hint: use a boolean mask)."
 310 |    ]
 311 |   },
 312 |   {
 313 |    "cell_type": "code",
 314 |    "execution_count": 13,
 315 |    "metadata": {},
 316 |    "outputs": [],
 317 |    "source": [
 318 |     "# Your answer here."
 319 |    ]
 320 |   },
 321 |   {
 322 |    "cell_type": "markdown",
 323 |    "metadata": {},
 324 |    "source": [
 325 |     "### 9."
 326 |    ]
 327 |   },
 328 |   {
 329 |    "cell_type": "markdown",
 330 |    "metadata": {},
 331 |    "source": [
 332 |     "Find the location of the minimum value in the following array `b`:"
 333 |    ]
 334 |   },
 335 |   {
 336 |    "cell_type": "code",
 337 |    "execution_count": 14,
 338 |    "metadata": {},
 339 |    "outputs": [
 340 |     {
 341 |      "data": {
 342 |       "text/plain": [
 343 |        "array([-1.0856306 ,  0.99734545,  0.2829785 , -1.50629471, -0.57860025,\n",
 344 |        "        1.65143654, -2.42667924, -0.42891263,  1.26593626, -0.8667404 ])"
 345 |       ]
 346 |      },
 347 |      "execution_count": 14,
 348 |      "metadata": {},
 349 |      "output_type": "execute_result"
 350 |     }
 351 |    ],
 352 |    "source": [
 353 |     "np.random.seed(123)\n",
 354 |     "b = np.random.randn(10)\n",
 355 |     "b"
 356 |    ]
 357 |   },
 358 |   {
 359 |    "cell_type": "code",
 360 |    "execution_count": 15,
 361 |    "metadata": {},
 362 |    "outputs": [],
 363 |    "source": [
 364 |     "# Your answer here."
 365 |    ]
 366 |   },
 367 |   {
 368 |    "cell_type": "markdown",
 369 |    "metadata": {},
 370 |    "source": [
 371 |     "### 10."
 372 |    ]
 373 |   },
 374 |   {
 375 |    "cell_type": "markdown",
 376 |    "metadata": {},
 377 |    "source": [
 378 |     "Find the location of the maximum value in the following 2D array `c` (hint: there are many ways to do this question, but a quick search on stackoverflow.com will typically help you find the optimum solution for a problem, for example see [post](https://stackoverflow.com/questions/3584243/get-the-position-of-the-biggest-item-in-a-multi-dimensional-numpy-array)):"
 379 |    ]
 380 |   },
 381 |   {
 382 |    "cell_type": "code",
 383 |    "execution_count": 16,
 384 |    "metadata": {},
 385 |    "outputs": [
 386 |     {
 387 |      "data": {
 388 |       "text/plain": [
 389 |        "array([[-1.0856306 ,  0.99734545],\n",
 390 |        "       [ 0.2829785 , -1.50629471],\n",
 391 |        "       [-0.57860025,  1.65143654]])"
 392 |       ]
 393 |      },
 394 |      "execution_count": 16,
 395 |      "metadata": {},
 396 |      "output_type": "execute_result"
 397 |     }
 398 |    ],
 399 |    "source": [
 400 |     "np.random.seed(123)\n",
 401 |     "c = np.random.randn(3, 2)\n",
 402 |     "c"
 403 |    ]
 404 |   },
 405 |   {
 406 |    "cell_type": "code",
 407 |    "execution_count": 17,
 408 |    "metadata": {},
 409 |    "outputs": [],
 410 |    "source": [
 411 |     "# Your answer here."
 412 |    ]
 413 |   },
 414 |   {
 415 |    "cell_type": "markdown",
 416 |    "metadata": {},
 417 |    "source": [
 418 |     "<hr>\n",
 419 |     "<hr>\n",
 420 |     "<hr>"
 421 |    ]
 422 |   },
 423 |   {
 424 |    "cell_type": "markdown",
 425 |    "metadata": {},
 426 |    "source": [
 427 |     "## Solutions"
 428 |    ]
 429 |   },
 430 |   {
 431 |    "cell_type": "markdown",
 432 |    "metadata": {},
 433 |    "source": [
 434 |     "### 1."
 435 |    ]
 436 |   },
 437 |   {
 438 |    "cell_type": "markdown",
 439 |    "metadata": {},
 440 |    "source": [
 441 |     "Import numpy under the alias `np`."
 442 |    ]
 443 |   },
 444 |   {
 445 |    "cell_type": "code",
 446 |    "execution_count": 18,
 447 |    "metadata": {},
 448 |    "outputs": [],
 449 |    "source": [
 450 |     "import numpy as np"
 451 |    ]
 452 |   },
 453 |   {
 454 |    "cell_type": "markdown",
 455 |    "metadata": {},
 456 |    "source": [
 457 |     "### 2."
 458 |    ]
 459 |   },
 460 |   {
 461 |    "cell_type": "markdown",
 462 |    "metadata": {},
 463 |    "source": [
 464 |     "Create the following arrays:\n",
 465 |     "\n",
 466 |     "1. Create an array of 5 zeros.\n",
 467 |     "2. Create an array of 10 ones.\n",
 468 |     "3. Create an array of 5 3.141s.\n",
 469 |     "4. Create an array of the integers 1 to 20.\n",
 470 |     "5. Create a 5 x 5 matrix of ones with a dtype `int`."
 471 |    ]
 472 |   },
 473 |   {
 474 |    "cell_type": "code",
 475 |    "execution_count": 19,
 476 |    "metadata": {},
 477 |    "outputs": [
 478 |     {
 479 |      "name": "stdout",
 480 |      "output_type": "stream",
 481 |      "text": [
 482 |       "[0. 0. 0. 0. 0.]\n",
 483 |       "[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
 484 |       "[3.141 3.141 3.141 3.141 3.141]\n",
 485 |       "[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]\n",
 486 |       "[[1 1 1 1 1]\n",
 487 |       " [1 1 1 1 1]\n",
 488 |       " [1 1 1 1 1]\n",
 489 |       " [1 1 1 1 1]\n",
 490 |       " [1 1 1 1 1]]\n"
 491 |      ]
 492 |     }
 493 |    ],
 494 |    "source": [
 495 |     "print(np.zeros(5))\n",
 496 |     "print(np.ones(10))\n",
 497 |     "print(np.full(5, 3.141))\n",
 498 |     "print(np.array(range(21)))\n",
 499 |     "print(np.ones((5, 5), dtype=int))"
 500 |    ]
 501 |   },
 502 |   {
 503 |    "cell_type": "markdown",
 504 |    "metadata": {},
 505 |    "source": [
 506 |     "### 3."
 507 |    ]
 508 |   },
 509 |   {
 510 |    "cell_type": "markdown",
 511 |    "metadata": {},
 512 |    "source": [
 513 |     "Use numpy to:\n",
 514 |     "1. Create an 3D matrix of 3 x 3 x 3 full of random numbers drawn from a standard normal distribution (hint: `np.random.randn()`)\n",
 515 |     "2. Reshape the above array into shape (27,)"
 516 |    ]
 517 |   },
 518 |   {
 519 |    "cell_type": "code",
 520 |    "execution_count": 20,
 521 |    "metadata": {},
 522 |    "outputs": [
 523 |     {
 524 |      "data": {
 525 |       "text/plain": [
 526 |        "array([[[-2.42667924, -0.42891263,  1.26593626],\n",
 527 |        "        [-0.8667404 , -0.67888615, -0.09470897],\n",
 528 |        "        [ 1.49138963, -0.638902  , -0.44398196]],\n",
 529 |        "\n",
 530 |        "       [[-0.43435128,  2.20593008,  2.18678609],\n",
 531 |        "        [ 1.0040539 ,  0.3861864 ,  0.73736858],\n",
 532 |        "        [ 1.49073203, -0.93583387,  1.17582904]],\n",
 533 |        "\n",
 534 |        "       [[-1.25388067, -0.6377515 ,  0.9071052 ],\n",
 535 |        "        [-1.4286807 , -0.14006872, -0.8617549 ],\n",
 536 |        "        [-0.25561937, -2.79858911, -1.7715331 ]]])"
 537 |       ]
 538 |      },
 539 |      "execution_count": 20,
 540 |      "metadata": {},
 541 |      "output_type": "execute_result"
 542 |     }
 543 |    ],
 544 |    "source": [
 545 |     "x = np.random.randn(3, 3, 3)\n",
 546 |     "x"
 547 |    ]
 548 |   },
 549 |   {
 550 |    "cell_type": "code",
 551 |    "execution_count": 21,
 552 |    "metadata": {},
 553 |    "outputs": [
 554 |     {
 555 |      "data": {
 556 |       "text/plain": [
 557 |        "array([-2.42667924, -0.42891263,  1.26593626, -0.8667404 , -0.67888615,\n",
 558 |        "       -0.09470897,  1.49138963, -0.638902  , -0.44398196, -0.43435128,\n",
 559 |        "        2.20593008,  2.18678609,  1.0040539 ,  0.3861864 ,  0.73736858,\n",
 560 |        "        1.49073203, -0.93583387,  1.17582904, -1.25388067, -0.6377515 ,\n",
 561 |        "        0.9071052 , -1.4286807 , -0.14006872, -0.8617549 , -0.25561937,\n",
 562 |        "       -2.79858911, -1.7715331 ])"
 563 |       ]
 564 |      },
 565 |      "execution_count": 21,
 566 |      "metadata": {},
 567 |      "output_type": "execute_result"
 568 |     }
 569 |    ],
 570 |    "source": [
 571 |     "x.reshape(-1) # or x.reshape(27)"
 572 |    ]
 573 |   },
 574 |   {
 575 |    "cell_type": "markdown",
 576 |    "metadata": {},
 577 |    "source": [
 578 |     "### 4."
 579 |    ]
 580 |   },
 581 |   {
 582 |    "cell_type": "markdown",
 583 |    "metadata": {},
 584 |    "source": [
 585 |     "Create an array of 20 linearly spaced numbers between 1 and 10."
 586 |    ]
 587 |   },
 588 |   {
 589 |    "cell_type": "code",
 590 |    "execution_count": 22,
 591 |    "metadata": {},
 592 |    "outputs": [
 593 |     {
 594 |      "data": {
 595 |       "text/plain": [
 596 |        "array([ 1.        ,  1.47368421,  1.94736842,  2.42105263,  2.89473684,\n",
 597 |        "        3.36842105,  3.84210526,  4.31578947,  4.78947368,  5.26315789,\n",
 598 |        "        5.73684211,  6.21052632,  6.68421053,  7.15789474,  7.63157895,\n",
 599 |        "        8.10526316,  8.57894737,  9.05263158,  9.52631579, 10.        ])"
 600 |       ]
 601 |      },
 602 |      "execution_count": 22,
 603 |      "metadata": {},
 604 |      "output_type": "execute_result"
 605 |     }
 606 |    ],
 607 |    "source": [
 608 |     "np.linspace(1, 10, 20)"
 609 |    ]
 610 |   },
 611 |   {
 612 |    "cell_type": "markdown",
 613 |    "metadata": {},
 614 |    "source": [
 615 |     "### 5."
 616 |    ]
 617 |   },
 618 |   {
 619 |    "cell_type": "markdown",
 620 |    "metadata": {},
 621 |    "source": [
 622 |     "Below I've defined an array of shape 4 x 4. Use indexing to procude the given outputs."
 623 |    ]
 624 |   },
 625 |   {
 626 |    "cell_type": "code",
 627 |    "execution_count": 23,
 628 |    "metadata": {},
 629 |    "outputs": [
 630 |     {
 631 |      "data": {
 632 |       "text/plain": [
 633 |        "array([[ 1,  2,  3,  4,  5],\n",
 634 |        "       [ 6,  7,  8,  9, 10],\n",
 635 |        "       [11, 12, 13, 14, 15],\n",
 636 |        "       [16, 17, 18, 19, 20],\n",
 637 |        "       [21, 22, 23, 24, 25]])"
 638 |       ]
 639 |      },
 640 |      "execution_count": 23,
 641 |      "metadata": {},
 642 |      "output_type": "execute_result"
 643 |     }
 644 |    ],
 645 |    "source": [
 646 |     "a = np.arange(1, 26).reshape(5, -1)\n",
 647 |     "a"
 648 |    ]
 649 |   },
 650 |   {
 651 |    "cell_type": "markdown",
 652 |    "metadata": {},
 653 |    "source": [
 654 |     "```python\n",
 655 |     "20\n",
 656 |     "```"
 657 |    ]
 658 |   },
 659 |   {
 660 |    "cell_type": "code",
 661 |    "execution_count": 24,
 662 |    "metadata": {},
 663 |    "outputs": [
 664 |     {
 665 |      "data": {
 666 |       "text/plain": [
 667 |        "20"
 668 |       ]
 669 |      },
 670 |      "execution_count": 24,
 671 |      "metadata": {},
 672 |      "output_type": "execute_result"
 673 |     }
 674 |    ],
 675 |    "source": [
 676 |     "a[3,4]"
 677 |    ]
 678 |   },
 679 |   {
 680 |    "cell_type": "markdown",
 681 |    "metadata": {},
 682 |    "source": [
 683 |     "```python\n",
 684 |     "array([[ 9, 10],\n",
 685 |     "       [14, 15],\n",
 686 |     "       [19, 20],\n",
 687 |     "       [24, 25]])\n",
 688 |     "```"
 689 |    ]
 690 |   },
 691 |   {
 692 |    "cell_type": "code",
 693 |    "execution_count": 25,
 694 |    "metadata": {},
 695 |    "outputs": [
 696 |     {
 697 |      "data": {
 698 |       "text/plain": [
 699 |        "array([[ 9, 10],\n",
 700 |        "       [14, 15],\n",
 701 |        "       [19, 20],\n",
 702 |        "       [24, 25]])"
 703 |       ]
 704 |      },
 705 |      "execution_count": 25,
 706 |      "metadata": {},
 707 |      "output_type": "execute_result"
 708 |     }
 709 |    ],
 710 |    "source": [
 711 |     "a[1:,3:]"
 712 |    ]
 713 |   },
 714 |   {
 715 |    "cell_type": "markdown",
 716 |    "metadata": {},
 717 |    "source": [
 718 |     "```python\n",
 719 |     "array([ 6,  7,  8,  9, 10])\n",
 720 |     "```"
 721 |    ]
 722 |   },
 723 |   {
 724 |    "cell_type": "code",
 725 |    "execution_count": 26,
 726 |    "metadata": {},
 727 |    "outputs": [
 728 |     {
 729 |      "data": {
 730 |       "text/plain": [
 731 |        "array([ 6,  7,  8,  9, 10])"
 732 |       ]
 733 |      },
 734 |      "execution_count": 26,
 735 |      "metadata": {},
 736 |      "output_type": "execute_result"
 737 |     }
 738 |    ],
 739 |    "source": [
 740 |     "a[1,:]"
 741 |    ]
 742 |   },
 743 |   {
 744 |    "cell_type": "markdown",
 745 |    "metadata": {},
 746 |    "source": [
 747 |     "```python\n",
 748 |     "array([[11, 12, 13, 14, 15],\n",
 749 |     "       [16, 17, 18, 19, 20]])\n",
 750 |     "```"
 751 |    ]
 752 |   },
 753 |   {
 754 |    "cell_type": "markdown",
 755 |    "metadata": {},
 756 |    "source": [
 757 |     "```python\n",
 758 |     "array([[ 8,  9],\n",
 759 |     "       [13, 14]])\n",
 760 |     "```"
 761 |    ]
 762 |   },
 763 |   {
 764 |    "cell_type": "code",
 765 |    "execution_count": 27,
 766 |    "metadata": {},
 767 |    "outputs": [
 768 |     {
 769 |      "data": {
 770 |       "text/plain": [
 771 |        "array([[ 8,  9],\n",
 772 |        "       [13, 14]])"
 773 |       ]
 774 |      },
 775 |      "execution_count": 27,
 776 |      "metadata": {},
 777 |      "output_type": "execute_result"
 778 |     }
 779 |    ],
 780 |    "source": [
 781 |     "a[1:3,2:4]"
 782 |    ]
 783 |   },
 784 |   {
 785 |    "cell_type": "markdown",
 786 |    "metadata": {},
 787 |    "source": [
 788 |     "### 6."
 789 |    ]
 790 |   },
 791 |   {
 792 |    "cell_type": "markdown",
 793 |    "metadata": {},
 794 |    "source": [
 795 |     "Calculate the sum of all the numbers in `a`."
 796 |    ]
 797 |   },
 798 |   {
 799 |    "cell_type": "code",
 800 |    "execution_count": 28,
 801 |    "metadata": {},
 802 |    "outputs": [
 803 |     {
 804 |      "data": {
 805 |       "text/plain": [
 806 |        "325"
 807 |       ]
 808 |      },
 809 |      "execution_count": 28,
 810 |      "metadata": {},
 811 |      "output_type": "execute_result"
 812 |     }
 813 |    ],
 814 |    "source": [
 815 |     "a.sum()"
 816 |    ]
 817 |   },
 818 |   {
 819 |    "cell_type": "markdown",
 820 |    "metadata": {},
 821 |    "source": [
 822 |     "### 7."
 823 |    ]
 824 |   },
 825 |   {
 826 |    "cell_type": "markdown",
 827 |    "metadata": {},
 828 |    "source": [
 829 |     "Calculate the sum of each row in `a`."
 830 |    ]
 831 |   },
 832 |   {
 833 |    "cell_type": "code",
 834 |    "execution_count": 29,
 835 |    "metadata": {},
 836 |    "outputs": [
 837 |     {
 838 |      "data": {
 839 |       "text/plain": [
 840 |        "array([ 15,  40,  65,  90, 115])"
 841 |       ]
 842 |      },
 843 |      "execution_count": 29,
 844 |      "metadata": {},
 845 |      "output_type": "execute_result"
 846 |     }
 847 |    ],
 848 |    "source": [
 849 |     "a.sum(axis=1)"
 850 |    ]
 851 |   },
 852 |   {
 853 |    "cell_type": "markdown",
 854 |    "metadata": {},
 855 |    "source": [
 856 |     "### 8."
 857 |    ]
 858 |   },
 859 |   {
 860 |    "cell_type": "markdown",
 861 |    "metadata": {},
 862 |    "source": [
 863 |     "Extract all values of `a` greater than the mean of `a` (hint: use a boolean mask)."
 864 |    ]
 865 |   },
 866 |   {
 867 |    "cell_type": "code",
 868 |    "execution_count": 30,
 869 |    "metadata": {},
 870 |    "outputs": [
 871 |     {
 872 |      "data": {
 873 |       "text/plain": [
 874 |        "array([14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25])"
 875 |       ]
 876 |      },
 877 |      "execution_count": 30,
 878 |      "metadata": {},
 879 |      "output_type": "execute_result"
 880 |     }
 881 |    ],
 882 |    "source": [
 883 |     "a[a > a.mean()]"
 884 |    ]
 885 |   },
 886 |   {
 887 |    "cell_type": "markdown",
 888 |    "metadata": {},
 889 |    "source": [
 890 |     "### 9."
 891 |    ]
 892 |   },
 893 |   {
 894 |    "cell_type": "markdown",
 895 |    "metadata": {},
 896 |    "source": [
 897 |     "Find the location of the minimum value in the following array `b`:"
 898 |    ]
 899 |   },
 900 |   {
 901 |    "cell_type": "code",
 902 |    "execution_count": 31,
 903 |    "metadata": {},
 904 |    "outputs": [
 905 |     {
 906 |      "data": {
 907 |       "text/plain": [
 908 |        "array([-1.0856306 ,  0.99734545,  0.2829785 , -1.50629471, -0.57860025,\n",
 909 |        "        1.65143654, -2.42667924, -0.42891263,  1.26593626, -0.8667404 ])"
 910 |       ]
 911 |      },
 912 |      "execution_count": 31,
 913 |      "metadata": {},
 914 |      "output_type": "execute_result"
 915 |     }
 916 |    ],
 917 |    "source": [
 918 |     "np.random.seed(123)\n",
 919 |     "b = np.random.randn(10)\n",
 920 |     "b"
 921 |    ]
 922 |   },
 923 |   {
 924 |    "cell_type": "code",
 925 |    "execution_count": 32,
 926 |    "metadata": {},
 927 |    "outputs": [
 928 |     {
 929 |      "data": {
 930 |       "text/plain": [
 931 |        "6"
 932 |       ]
 933 |      },
 934 |      "execution_count": 32,
 935 |      "metadata": {},
 936 |      "output_type": "execute_result"
 937 |     }
 938 |    ],
 939 |    "source": [
 940 |     "b.argmin()"
 941 |    ]
 942 |   },
 943 |   {
 944 |    "cell_type": "markdown",
 945 |    "metadata": {},
 946 |    "source": [
 947 |     "### 10."
 948 |    ]
 949 |   },
 950 |   {
 951 |    "cell_type": "markdown",
 952 |    "metadata": {},
 953 |    "source": [
 954 |     "Find the location of the maximum value in the following 2D array `c` (hint: there are many ways to do this question, but a quick search on stackoverflow.com will typically help you find the optimum solution for a problem, for example see [post](https://stackoverflow.com/questions/3584243/get-the-position-of-the-biggest-item-in-a-multi-dimensional-numpy-array)):"
 955 |    ]
 956 |   },
 957 |   {
 958 |    "cell_type": "code",
 959 |    "execution_count": 33,
 960 |    "metadata": {},
 961 |    "outputs": [
 962 |     {
 963 |      "data": {
 964 |       "text/plain": [
 965 |        "array([[-1.0856306 ,  0.99734545],\n",
 966 |        "       [ 0.2829785 , -1.50629471],\n",
 967 |        "       [-0.57860025,  1.65143654]])"
 968 |       ]
 969 |      },
 970 |      "execution_count": 33,
 971 |      "metadata": {},
 972 |      "output_type": "execute_result"
 973 |     }
 974 |    ],
 975 |    "source": [
 976 |     "np.random.seed(123)\n",
 977 |     "c = np.random.randn(3, 2)\n",
 978 |     "c"
 979 |    ]
 980 |   },
 981 |   {
 982 |    "cell_type": "code",
 983 |    "execution_count": 34,
 984 |    "metadata": {},
 985 |    "outputs": [
 986 |     {
 987 |      "name": "stdout",
 988 |      "output_type": "stream",
 989 |      "text": [
 990 |       "Location of maximum: (2, 1)\n",
 991 |       "   Value of maximum: 1.65\n"
 992 |      ]
 993 |     }
 994 |    ],
 995 |    "source": [
 996 |     "print(f\"Location of maximum: {np.unravel_index(c.argmax(), c.shape)}\")\n",
 997 |     "print(f\"   Value of maximum: {c.max():.2f}\")"
 998 |    ]
 999 |   }
1000 |  ],
1001 |  "metadata": {
1002 |   "kernelspec": {
1003 |    "display_name": "Python 3",
1004 |    "language": "python",
1005 |    "name": "python3"
1006 |   },
1007 |   "language_info": {
1008 |    "codemirror_mode": {
1009 |     "name": "ipython",
1010 |     "version": 3
1011 |    },
1012 |    "file_extension": ".py",
1013 |    "mimetype": "text/x-python",
1014 |    "name": "python",
1015 |    "nbconvert_exporter": "python",
1016 |    "pygments_lexer": "ipython3",
1017 |    "version": "3.7.8"
1018 |   }
1019 |  },
1020 |  "nbformat": 4,
1021 |  "nbformat_minor": 4
1022 | }
1023 | 


--------------------------------------------------------------------------------
/practice-exercises/chapter7-pandas-practice.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "![](../docs/banner.png)"
   8 |    ]
   9 |   },
  10 |   {
  11 |    "cell_type": "markdown",
  12 |    "metadata": {},
  13 |    "source": [
  14 |     "# Pandas\n",
  15 |     "\n",
  16 |     "**Tomas Beuzen, September 2020**"
  17 |    ]
  18 |   },
  19 |   {
  20 |    "cell_type": "markdown",
  21 |    "metadata": {},
  22 |    "source": [
  23 |     "These exercises complement [Chapter 7](../chapters/chapter7-pandas.ipynb)."
  24 |    ]
  25 |   },
  26 |   {
  27 |    "cell_type": "markdown",
  28 |    "metadata": {},
  29 |    "source": [
  30 |     "## Exercises"
  31 |    ]
  32 |   },
  33 |   {
  34 |    "cell_type": "markdown",
  35 |    "metadata": {},
  36 |    "source": [
  37 |     "### 1."
  38 |    ]
  39 |   },
  40 |   {
  41 |    "cell_type": "markdown",
  42 |    "metadata": {},
  43 |    "source": [
  44 |     "In this set of practice exercises we'll be investigating the carbon footprint of different foods. We'll be leveraging a dataset compiled by [Kasia Kulma](https://r-tastic.co.uk/post/from-messy-to-tidy/) and contributed to [R's Tidy Tuesday project](https://github.com/rfordatascience/tidytuesday).\n",
  45 |     "\n",
  46 |     "Start by importing pandas with the alias `pd`."
  47 |    ]
  48 |   },
  49 |   {
  50 |    "cell_type": "code",
  51 |    "execution_count": 2,
  52 |    "metadata": {},
  53 |    "outputs": [],
  54 |    "source": [
  55 |     "# Your answer here."
  56 |    ]
  57 |   },
  58 |   {
  59 |    "cell_type": "markdown",
  60 |    "metadata": {},
  61 |    "source": [
  62 |     "### 2."
  63 |    ]
  64 |   },
  65 |   {
  66 |    "cell_type": "markdown",
  67 |    "metadata": {},
  68 |    "source": [
  69 |     "The dataset we'll be working with has the following columns:\n",
  70 |     "\n",
  71 |     "|column      |description |\n",
  72 |     "|:-------------|:-----------|\n",
  73 |     "|country       | Country Name |\n",
  74 |     "|food_category | Food Category |\n",
  75 |     "|consumption   | Consumption (kg/person/year) |\n",
  76 |     "|co2_emmission | Co2 Emission (Kg CO2/person/year) |\n",
  77 |     "\n",
  78 |     "\n",
  79 |     "Import the dataset as a dataframe named `df` from this url: <https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-18/food_consumption.csv>"
  80 |    ]
  81 |   },
  82 |   {
  83 |    "cell_type": "code",
  84 |    "execution_count": 4,
  85 |    "metadata": {},
  86 |    "outputs": [],
  87 |    "source": [
  88 |     "# Your answer here."
  89 |    ]
  90 |   },
  91 |   {
  92 |    "cell_type": "markdown",
  93 |    "metadata": {},
  94 |    "source": [
  95 |     "### 3."
  96 |    ]
  97 |   },
  98 |   {
  99 |    "cell_type": "markdown",
 100 |    "metadata": {},
 101 |    "source": [
 102 |     "How many rows and columns are there in the dataframe?"
 103 |    ]
 104 |   },
 105 |   {
 106 |    "cell_type": "code",
 107 |    "execution_count": 5,
 108 |    "metadata": {},
 109 |    "outputs": [],
 110 |    "source": [
 111 |     "# Your answer here."
 112 |    ]
 113 |   },
 114 |   {
 115 |    "cell_type": "markdown",
 116 |    "metadata": {},
 117 |    "source": [
 118 |     "### 4."
 119 |    ]
 120 |   },
 121 |   {
 122 |    "cell_type": "markdown",
 123 |    "metadata": {},
 124 |    "source": [
 125 |     "What is the type of data in each column of `df`?"
 126 |    ]
 127 |   },
 128 |   {
 129 |    "cell_type": "code",
 130 |    "execution_count": 6,
 131 |    "metadata": {},
 132 |    "outputs": [],
 133 |    "source": [
 134 |     "# Your answer here."
 135 |    ]
 136 |   },
 137 |   {
 138 |    "cell_type": "markdown",
 139 |    "metadata": {},
 140 |    "source": [
 141 |     "### 5."
 142 |    ]
 143 |   },
 144 |   {
 145 |    "cell_type": "markdown",
 146 |    "metadata": {},
 147 |    "source": [
 148 |     "What is the mean `co2_emission` of the whole dataset?"
 149 |    ]
 150 |   },
 151 |   {
 152 |    "cell_type": "code",
 153 |    "execution_count": 6,
 154 |    "metadata": {},
 155 |    "outputs": [],
 156 |    "source": [
 157 |     "# Your answer here."
 158 |    ]
 159 |   },
 160 |   {
 161 |    "cell_type": "markdown",
 162 |    "metadata": {},
 163 |    "source": [
 164 |     "### 6."
 165 |    ]
 166 |   },
 167 |   {
 168 |    "cell_type": "markdown",
 169 |    "metadata": {},
 170 |    "source": [
 171 |     "How many different kinds of foods are there in the dataset? How many countries are in the dataset?"
 172 |    ]
 173 |   },
 174 |   {
 175 |    "cell_type": "code",
 176 |    "execution_count": 8,
 177 |    "metadata": {},
 178 |    "outputs": [],
 179 |    "source": [
 180 |     "# Your answer here."
 181 |    ]
 182 |   },
 183 |   {
 184 |    "cell_type": "markdown",
 185 |    "metadata": {},
 186 |    "source": [
 187 |     "### 7."
 188 |    ]
 189 |   },
 190 |   {
 191 |    "cell_type": "markdown",
 192 |    "metadata": {},
 193 |    "source": [
 194 |     "What is the maximum `co2_emmission` in the dataset and which food type and country does it belong to?"
 195 |    ]
 196 |   },
 197 |   {
 198 |    "cell_type": "code",
 199 |    "execution_count": 8,
 200 |    "metadata": {},
 201 |    "outputs": [],
 202 |    "source": [
 203 |     "# Your answer here."
 204 |    ]
 205 |   },
 206 |   {
 207 |    "cell_type": "markdown",
 208 |    "metadata": {},
 209 |    "source": [
 210 |     "### 8."
 211 |    ]
 212 |   },
 213 |   {
 214 |    "cell_type": "markdown",
 215 |    "metadata": {},
 216 |    "source": [
 217 |     "How many countries produce more than 1000 Kg CO2/person/year for at least one food type?"
 218 |    ]
 219 |   },
 220 |   {
 221 |    "cell_type": "code",
 222 |    "execution_count": 8,
 223 |    "metadata": {},
 224 |    "outputs": [],
 225 |    "source": [
 226 |     "# Your answer here."
 227 |    ]
 228 |   },
 229 |   {
 230 |    "cell_type": "markdown",
 231 |    "metadata": {},
 232 |    "source": [
 233 |     "### 9."
 234 |    ]
 235 |   },
 236 |   {
 237 |    "cell_type": "markdown",
 238 |    "metadata": {},
 239 |    "source": [
 240 |     "Which country consumes the least amount of beef per person per year?"
 241 |    ]
 242 |   },
 243 |   {
 244 |    "cell_type": "code",
 245 |    "execution_count": 8,
 246 |    "metadata": {},
 247 |    "outputs": [],
 248 |    "source": [
 249 |     "# Your answer here."
 250 |    ]
 251 |   },
 252 |   {
 253 |    "cell_type": "markdown",
 254 |    "metadata": {},
 255 |    "source": [
 256 |     "### 10."
 257 |    ]
 258 |   },
 259 |   {
 260 |    "cell_type": "markdown",
 261 |    "metadata": {},
 262 |    "source": [
 263 |     "Which country consumes the most amount of soybeans per person per year?"
 264 |    ]
 265 |   },
 266 |   {
 267 |    "cell_type": "code",
 268 |    "execution_count": 8,
 269 |    "metadata": {},
 270 |    "outputs": [],
 271 |    "source": [
 272 |     "# Your answer here."
 273 |    ]
 274 |   },
 275 |   {
 276 |    "cell_type": "markdown",
 277 |    "metadata": {},
 278 |    "source": [
 279 |     "### 11."
 280 |    ]
 281 |   },
 282 |   {
 283 |    "cell_type": "markdown",
 284 |    "metadata": {},
 285 |    "source": [
 286 |     "What is the total emissions of all the meat products (Pork, Poultry, Fish, Lamb & Goat, Beef) in the dataset combined?"
 287 |    ]
 288 |   },
 289 |   {
 290 |    "cell_type": "code",
 291 |    "execution_count": 8,
 292 |    "metadata": {},
 293 |    "outputs": [],
 294 |    "source": [
 295 |     "# Your answer here."
 296 |    ]
 297 |   },
 298 |   {
 299 |    "cell_type": "markdown",
 300 |    "metadata": {},
 301 |    "source": [
 302 |     "### 12."
 303 |    ]
 304 |   },
 305 |   {
 306 |    "cell_type": "markdown",
 307 |    "metadata": {},
 308 |    "source": [
 309 |     "What is the total emissions of all other (non-meat) products in the dataset combined?"
 310 |    ]
 311 |   },
 312 |   {
 313 |    "cell_type": "code",
 314 |    "execution_count": 8,
 315 |    "metadata": {},
 316 |    "outputs": [],
 317 |    "source": [
 318 |     "# Your answer here."
 319 |    ]
 320 |   },
 321 |   {
 322 |    "cell_type": "markdown",
 323 |    "metadata": {},
 324 |    "source": [
 325 |     "<hr>\n",
 326 |     "<hr>\n",
 327 |     "<hr>"
 328 |    ]
 329 |   },
 330 |   {
 331 |    "cell_type": "markdown",
 332 |    "metadata": {},
 333 |    "source": [
 334 |     "## Solutions"
 335 |    ]
 336 |   },
 337 |   {
 338 |    "cell_type": "markdown",
 339 |    "metadata": {},
 340 |    "source": [
 341 |     "### 1."
 342 |    ]
 343 |   },
 344 |   {
 345 |    "cell_type": "markdown",
 346 |    "metadata": {},
 347 |    "source": [
 348 |     "In this set of practice exercises we'll be investigating the carbon footprint of different foods. We'll be leveraging a dataset compiled by [Kasia Kulma](https://r-tastic.co.uk/post/from-messy-to-tidy/) and contributed to [R's Tidy Tuesday project](https://github.com/rfordatascience/tidytuesday).\n",
 349 |     "\n",
 350 |     "Start by importing pandas with the alias `pd`."
 351 |    ]
 352 |   },
 353 |   {
 354 |    "cell_type": "code",
 355 |    "execution_count": 1,
 356 |    "metadata": {},
 357 |    "outputs": [],
 358 |    "source": [
 359 |     "import pandas as pd"
 360 |    ]
 361 |   },
 362 |   {
 363 |    "cell_type": "markdown",
 364 |    "metadata": {},
 365 |    "source": [
 366 |     "### 2."
 367 |    ]
 368 |   },
 369 |   {
 370 |    "cell_type": "markdown",
 371 |    "metadata": {},
 372 |    "source": [
 373 |     "The dataset we'll be working with has the following columns:\n",
 374 |     "\n",
 375 |     "|column      |description |\n",
 376 |     "|:-------------|:-----------|\n",
 377 |     "|country       | Country Name |\n",
 378 |     "|food_category | Food Category |\n",
 379 |     "|consumption   | Consumption (kg/person/year) |\n",
 380 |     "|co2_emmission | Co2 Emission (Kg CO2/person/year) |\n",
 381 |     "\n",
 382 |     "\n",
 383 |     "Import the dataset as a dataframe named `df` from this url: <https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-18/food_consumption.csv>"
 384 |    ]
 385 |   },
 386 |   {
 387 |    "cell_type": "code",
 388 |    "execution_count": 3,
 389 |    "metadata": {},
 390 |    "outputs": [
 391 |     {
 392 |      "data": {
 393 |       "text/html": [
 394 |        "<div>\n",
 395 |        "<style scoped>\n",
 396 |        "    .dataframe tbody tr th:only-of-type {\n",
 397 |        "        vertical-align: middle;\n",
 398 |        "    }\n",
 399 |        "\n",
 400 |        "    .dataframe tbody tr th {\n",
 401 |        "        vertical-align: top;\n",
 402 |        "    }\n",
 403 |        "\n",
 404 |        "    .dataframe thead th {\n",
 405 |        "        text-align: right;\n",
 406 |        "    }\n",
 407 |        "</style>\n",
 408 |        "<table border=\"1\" class=\"dataframe\">\n",
 409 |        "  <thead>\n",
 410 |        "    <tr style=\"text-align: right;\">\n",
 411 |        "      <th></th>\n",
 412 |        "      <th>country</th>\n",
 413 |        "      <th>food_category</th>\n",
 414 |        "      <th>consumption</th>\n",
 415 |        "      <th>co2_emmission</th>\n",
 416 |        "    </tr>\n",
 417 |        "  </thead>\n",
 418 |        "  <tbody>\n",
 419 |        "    <tr>\n",
 420 |        "      <th>0</th>\n",
 421 |        "      <td>Argentina</td>\n",
 422 |        "      <td>Pork</td>\n",
 423 |        "      <td>10.51</td>\n",
 424 |        "      <td>37.20</td>\n",
 425 |        "    </tr>\n",
 426 |        "    <tr>\n",
 427 |        "      <th>1</th>\n",
 428 |        "      <td>Argentina</td>\n",
 429 |        "      <td>Poultry</td>\n",
 430 |        "      <td>38.66</td>\n",
 431 |        "      <td>41.53</td>\n",
 432 |        "    </tr>\n",
 433 |        "    <tr>\n",
 434 |        "      <th>2</th>\n",
 435 |        "      <td>Argentina</td>\n",
 436 |        "      <td>Beef</td>\n",
 437 |        "      <td>55.48</td>\n",
 438 |        "      <td>1712.00</td>\n",
 439 |        "    </tr>\n",
 440 |        "    <tr>\n",
 441 |        "      <th>3</th>\n",
 442 |        "      <td>Argentina</td>\n",
 443 |        "      <td>Lamb &amp; Goat</td>\n",
 444 |        "      <td>1.56</td>\n",
 445 |        "      <td>54.63</td>\n",
 446 |        "    </tr>\n",
 447 |        "    <tr>\n",
 448 |        "      <th>4</th>\n",
 449 |        "      <td>Argentina</td>\n",
 450 |        "      <td>Fish</td>\n",
 451 |        "      <td>4.36</td>\n",
 452 |        "      <td>6.96</td>\n",
 453 |        "    </tr>\n",
 454 |        "    <tr>\n",
 455 |        "      <th>...</th>\n",
 456 |        "      <td>...</td>\n",
 457 |        "      <td>...</td>\n",
 458 |        "      <td>...</td>\n",
 459 |        "      <td>...</td>\n",
 460 |        "    </tr>\n",
 461 |        "    <tr>\n",
 462 |        "      <th>1425</th>\n",
 463 |        "      <td>Bangladesh</td>\n",
 464 |        "      <td>Milk - inc. cheese</td>\n",
 465 |        "      <td>21.91</td>\n",
 466 |        "      <td>31.21</td>\n",
 467 |        "    </tr>\n",
 468 |        "    <tr>\n",
 469 |        "      <th>1426</th>\n",
 470 |        "      <td>Bangladesh</td>\n",
 471 |        "      <td>Wheat and Wheat Products</td>\n",
 472 |        "      <td>17.47</td>\n",
 473 |        "      <td>3.33</td>\n",
 474 |        "    </tr>\n",
 475 |        "    <tr>\n",
 476 |        "      <th>1427</th>\n",
 477 |        "      <td>Bangladesh</td>\n",
 478 |        "      <td>Rice</td>\n",
 479 |        "      <td>171.73</td>\n",
 480 |        "      <td>219.76</td>\n",
 481 |        "    </tr>\n",
 482 |        "    <tr>\n",
 483 |        "      <th>1428</th>\n",
 484 |        "      <td>Bangladesh</td>\n",
 485 |        "      <td>Soybeans</td>\n",
 486 |        "      <td>0.61</td>\n",
 487 |        "      <td>0.27</td>\n",
 488 |        "    </tr>\n",
 489 |        "    <tr>\n",
 490 |        "      <th>1429</th>\n",
 491 |        "      <td>Bangladesh</td>\n",
 492 |        "      <td>Nuts inc. Peanut Butter</td>\n",
 493 |        "      <td>0.72</td>\n",
 494 |        "      <td>1.27</td>\n",
 495 |        "    </tr>\n",
 496 |        "  </tbody>\n",
 497 |        "</table>\n",
 498 |        "<p>1430 rows × 4 columns</p>\n",
 499 |        "</div>"
 500 |       ],
 501 |       "text/plain": [
 502 |        "         country             food_category  consumption  co2_emmission\n",
 503 |        "0      Argentina                      Pork        10.51          37.20\n",
 504 |        "1      Argentina                   Poultry        38.66          41.53\n",
 505 |        "2      Argentina                      Beef        55.48        1712.00\n",
 506 |        "3      Argentina               Lamb & Goat         1.56          54.63\n",
 507 |        "4      Argentina                      Fish         4.36           6.96\n",
 508 |        "...          ...                       ...          ...            ...\n",
 509 |        "1425  Bangladesh        Milk - inc. cheese        21.91          31.21\n",
 510 |        "1426  Bangladesh  Wheat and Wheat Products        17.47           3.33\n",
 511 |        "1427  Bangladesh                      Rice       171.73         219.76\n",
 512 |        "1428  Bangladesh                  Soybeans         0.61           0.27\n",
 513 |        "1429  Bangladesh   Nuts inc. Peanut Butter         0.72           1.27\n",
 514 |        "\n",
 515 |        "[1430 rows x 4 columns]"
 516 |       ]
 517 |      },
 518 |      "execution_count": 3,
 519 |      "metadata": {},
 520 |      "output_type": "execute_result"
 521 |     }
 522 |    ],
 523 |    "source": [
 524 |     "url = \"https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-18/food_consumption.csv\"\n",
 525 |     "df = pd.read_csv(url)\n",
 526 |     "df"
 527 |    ]
 528 |   },
 529 |   {
 530 |    "cell_type": "markdown",
 531 |    "metadata": {},
 532 |    "source": [
 533 |     "### 3."
 534 |    ]
 535 |   },
 536 |   {
 537 |    "cell_type": "markdown",
 538 |    "metadata": {},
 539 |    "source": [
 540 |     "How many rows and columns are there in the dataframe?"
 541 |    ]
 542 |   },
 543 |   {
 544 |    "cell_type": "code",
 545 |    "execution_count": 4,
 546 |    "metadata": {},
 547 |    "outputs": [
 548 |     {
 549 |      "data": {
 550 |       "text/plain": [
 551 |        "(1430, 4)"
 552 |       ]
 553 |      },
 554 |      "execution_count": 4,
 555 |      "metadata": {},
 556 |      "output_type": "execute_result"
 557 |     }
 558 |    ],
 559 |    "source": [
 560 |     "df.shape"
 561 |    ]
 562 |   },
 563 |   {
 564 |    "cell_type": "markdown",
 565 |    "metadata": {},
 566 |    "source": [
 567 |     "### 4."
 568 |    ]
 569 |   },
 570 |   {
 571 |    "cell_type": "markdown",
 572 |    "metadata": {},
 573 |    "source": [
 574 |     "What is the type of data in each column of `df`?"
 575 |    ]
 576 |   },
 577 |   {
 578 |    "cell_type": "code",
 579 |    "execution_count": 5,
 580 |    "metadata": {},
 581 |    "outputs": [
 582 |     {
 583 |      "name": "stdout",
 584 |      "output_type": "stream",
 585 |      "text": [
 586 |       "<class 'pandas.core.frame.DataFrame'>\n",
 587 |       "RangeIndex: 1430 entries, 0 to 1429\n",
 588 |       "Data columns (total 4 columns):\n",
 589 |       " #   Column         Non-Null Count  Dtype  \n",
 590 |       "---  ------         --------------  -----  \n",
 591 |       " 0   country        1430 non-null   object \n",
 592 |       " 1   food_category  1430 non-null   object \n",
 593 |       " 2   consumption    1430 non-null   float64\n",
 594 |       " 3   co2_emmission  1430 non-null   float64\n",
 595 |       "dtypes: float64(2), object(2)\n",
 596 |       "memory usage: 44.8+ KB\n"
 597 |      ]
 598 |     }
 599 |    ],
 600 |    "source": [
 601 |     "df.info()"
 602 |    ]
 603 |   },
 604 |   {
 605 |    "cell_type": "markdown",
 606 |    "metadata": {},
 607 |    "source": [
 608 |     "### 5."
 609 |    ]
 610 |   },
 611 |   {
 612 |    "cell_type": "markdown",
 613 |    "metadata": {},
 614 |    "source": [
 615 |     "What is the mean `co2_emission` of the whole dataset?"
 616 |    ]
 617 |   },
 618 |   {
 619 |    "cell_type": "code",
 620 |    "execution_count": 7,
 621 |    "metadata": {
 622 |     "tags": [
 623 |      "raises-exception"
 624 |     ]
 625 |    },
 626 |    "outputs": [
 627 |     {
 628 |      "data": {
 629 |       "text/plain": [
 630 |        "74.383993006993"
 631 |       ]
 632 |      },
 633 |      "execution_count": 7,
 634 |      "metadata": {},
 635 |      "output_type": "execute_result"
 636 |     }
 637 |    ],
 638 |    "source": [
 639 |     "df[\"co2_emmission\"].mean()"
 640 |    ]
 641 |   },
 642 |   {
 643 |    "cell_type": "markdown",
 644 |    "metadata": {},
 645 |    "source": [
 646 |     "### 6."
 647 |    ]
 648 |   },
 649 |   {
 650 |    "cell_type": "markdown",
 651 |    "metadata": {},
 652 |    "source": [
 653 |     "How many different kinds of foods are there in the dataset? How many countries are in the dataset?"
 654 |    ]
 655 |   },
 656 |   {
 657 |    "cell_type": "code",
 658 |    "execution_count": 35,
 659 |    "metadata": {},
 660 |    "outputs": [
 661 |     {
 662 |      "name": "stdout",
 663 |      "output_type": "stream",
 664 |      "text": [
 665 |       "There are 11 foods.\n",
 666 |       "There are 130 countries.\n"
 667 |      ]
 668 |     }
 669 |    ],
 670 |    "source": [
 671 |     "print(f\"There are {df['food_category'].nunique()} foods.\")\n",
 672 |     "print(f\"There are {df['country'].nunique()} countries.\")"
 673 |    ]
 674 |   },
 675 |   {
 676 |    "cell_type": "markdown",
 677 |    "metadata": {},
 678 |    "source": [
 679 |     "### 7."
 680 |    ]
 681 |   },
 682 |   {
 683 |    "cell_type": "markdown",
 684 |    "metadata": {},
 685 |    "source": [
 686 |     "What is the maximum `co2_emmission` in the dataset and which food type and country does it belong to?"
 687 |    ]
 688 |   },
 689 |   {
 690 |    "cell_type": "code",
 691 |    "execution_count": 46,
 692 |    "metadata": {},
 693 |    "outputs": [
 694 |     {
 695 |      "data": {
 696 |       "text/plain": [
 697 |        "country          Argentina\n",
 698 |        "food_category         Beef\n",
 699 |        "consumption          55.48\n",
 700 |        "co2_emmission         1712\n",
 701 |        "Name: 2, dtype: object"
 702 |       ]
 703 |      },
 704 |      "execution_count": 46,
 705 |      "metadata": {},
 706 |      "output_type": "execute_result"
 707 |     }
 708 |    ],
 709 |    "source": [
 710 |     "df.iloc[df['co2_emmission'].idxmax()]"
 711 |    ]
 712 |   },
 713 |   {
 714 |    "cell_type": "markdown",
 715 |    "metadata": {},
 716 |    "source": [
 717 |     "### 8."
 718 |    ]
 719 |   },
 720 |   {
 721 |    "cell_type": "markdown",
 722 |    "metadata": {},
 723 |    "source": [
 724 |     "How many countries produce more than 1000 Kg CO2/person/year for at least one food type?"
 725 |    ]
 726 |   },
 727 |   {
 728 |    "cell_type": "code",
 729 |    "execution_count": 52,
 730 |    "metadata": {},
 731 |    "outputs": [
 732 |     {
 733 |      "data": {
 734 |       "text/html": [
 735 |        "<div>\n",
 736 |        "<style scoped>\n",
 737 |        "    .dataframe tbody tr th:only-of-type {\n",
 738 |        "        vertical-align: middle;\n",
 739 |        "    }\n",
 740 |        "\n",
 741 |        "    .dataframe tbody tr th {\n",
 742 |        "        vertical-align: top;\n",
 743 |        "    }\n",
 744 |        "\n",
 745 |        "    .dataframe thead th {\n",
 746 |        "        text-align: right;\n",
 747 |        "    }\n",
 748 |        "</style>\n",
 749 |        "<table border=\"1\" class=\"dataframe\">\n",
 750 |        "  <thead>\n",
 751 |        "    <tr style=\"text-align: right;\">\n",
 752 |        "      <th></th>\n",
 753 |        "      <th>country</th>\n",
 754 |        "      <th>food_category</th>\n",
 755 |        "      <th>consumption</th>\n",
 756 |        "      <th>co2_emmission</th>\n",
 757 |        "    </tr>\n",
 758 |        "  </thead>\n",
 759 |        "  <tbody>\n",
 760 |        "    <tr>\n",
 761 |        "      <th>2</th>\n",
 762 |        "      <td>Argentina</td>\n",
 763 |        "      <td>Beef</td>\n",
 764 |        "      <td>55.48</td>\n",
 765 |        "      <td>1712.00</td>\n",
 766 |        "    </tr>\n",
 767 |        "    <tr>\n",
 768 |        "      <th>13</th>\n",
 769 |        "      <td>Australia</td>\n",
 770 |        "      <td>Beef</td>\n",
 771 |        "      <td>33.86</td>\n",
 772 |        "      <td>1044.85</td>\n",
 773 |        "    </tr>\n",
 774 |        "    <tr>\n",
 775 |        "      <th>57</th>\n",
 776 |        "      <td>USA</td>\n",
 777 |        "      <td>Beef</td>\n",
 778 |        "      <td>36.24</td>\n",
 779 |        "      <td>1118.29</td>\n",
 780 |        "    </tr>\n",
 781 |        "    <tr>\n",
 782 |        "      <th>90</th>\n",
 783 |        "      <td>Brazil</td>\n",
 784 |        "      <td>Beef</td>\n",
 785 |        "      <td>39.25</td>\n",
 786 |        "      <td>1211.17</td>\n",
 787 |        "    </tr>\n",
 788 |        "    <tr>\n",
 789 |        "      <th>123</th>\n",
 790 |        "      <td>Bermuda</td>\n",
 791 |        "      <td>Beef</td>\n",
 792 |        "      <td>33.15</td>\n",
 793 |        "      <td>1022.94</td>\n",
 794 |        "    </tr>\n",
 795 |        "  </tbody>\n",
 796 |        "</table>\n",
 797 |        "</div>"
 798 |       ],
 799 |       "text/plain": [
 800 |        "       country food_category  consumption  co2_emmission\n",
 801 |        "2    Argentina          Beef        55.48        1712.00\n",
 802 |        "13   Australia          Beef        33.86        1044.85\n",
 803 |        "57         USA          Beef        36.24        1118.29\n",
 804 |        "90      Brazil          Beef        39.25        1211.17\n",
 805 |        "123    Bermuda          Beef        33.15        1022.94"
 806 |       ]
 807 |      },
 808 |      "execution_count": 52,
 809 |      "metadata": {},
 810 |      "output_type": "execute_result"
 811 |     }
 812 |    ],
 813 |    "source": [
 814 |     "df.query(\"co2_emmission > 1000\")"
 815 |    ]
 816 |   },
 817 |   {
 818 |    "cell_type": "markdown",
 819 |    "metadata": {},
 820 |    "source": [
 821 |     "### 9."
 822 |    ]
 823 |   },
 824 |   {
 825 |    "cell_type": "markdown",
 826 |    "metadata": {},
 827 |    "source": [
 828 |     "Which country consumes the least amount of beef per person per year?"
 829 |    ]
 830 |   },
 831 |   {
 832 |    "cell_type": "code",
 833 |    "execution_count": 66,
 834 |    "metadata": {},
 835 |    "outputs": [
 836 |     {
 837 |      "data": {
 838 |       "text/html": [
 839 |        "<div>\n",
 840 |        "<style scoped>\n",
 841 |        "    .dataframe tbody tr th:only-of-type {\n",
 842 |        "        vertical-align: middle;\n",
 843 |        "    }\n",
 844 |        "\n",
 845 |        "    .dataframe tbody tr th {\n",
 846 |        "        vertical-align: top;\n",
 847 |        "    }\n",
 848 |        "\n",
 849 |        "    .dataframe thead th {\n",
 850 |        "        text-align: right;\n",
 851 |        "    }\n",
 852 |        "</style>\n",
 853 |        "<table border=\"1\" class=\"dataframe\">\n",
 854 |        "  <thead>\n",
 855 |        "    <tr style=\"text-align: right;\">\n",
 856 |        "      <th></th>\n",
 857 |        "      <th>country</th>\n",
 858 |        "      <th>food_category</th>\n",
 859 |        "      <th>consumption</th>\n",
 860 |        "      <th>co2_emmission</th>\n",
 861 |        "    </tr>\n",
 862 |        "  </thead>\n",
 863 |        "  <tbody>\n",
 864 |        "    <tr>\n",
 865 |        "      <th>1410</th>\n",
 866 |        "      <td>Liberia</td>\n",
 867 |        "      <td>Beef</td>\n",
 868 |        "      <td>0.78</td>\n",
 869 |        "      <td>24.07</td>\n",
 870 |        "    </tr>\n",
 871 |        "  </tbody>\n",
 872 |        "</table>\n",
 873 |        "</div>"
 874 |       ],
 875 |       "text/plain": [
 876 |        "      country food_category  consumption  co2_emmission\n",
 877 |        "1410  Liberia          Beef         0.78          24.07"
 878 |       ]
 879 |      },
 880 |      "execution_count": 66,
 881 |      "metadata": {},
 882 |      "output_type": "execute_result"
 883 |     }
 884 |    ],
 885 |    "source": [
 886 |     "(df.query(\"food_category == 'Beef'\")\n",
 887 |     "   .sort_values(by=\"consumption\")\n",
 888 |     "   .head(1))"
 889 |    ]
 890 |   },
 891 |   {
 892 |    "cell_type": "markdown",
 893 |    "metadata": {},
 894 |    "source": [
 895 |     "### 10."
 896 |    ]
 897 |   },
 898 |   {
 899 |    "cell_type": "markdown",
 900 |    "metadata": {},
 901 |    "source": [
 902 |     "Which country consumes the most amount of soybeans per person per year?"
 903 |    ]
 904 |   },
 905 |   {
 906 |    "cell_type": "code",
 907 |    "execution_count": 68,
 908 |    "metadata": {},
 909 |    "outputs": [
 910 |     {
 911 |      "data": {
 912 |       "text/html": [
 913 |        "<div>\n",
 914 |        "<style scoped>\n",
 915 |        "    .dataframe tbody tr th:only-of-type {\n",
 916 |        "        vertical-align: middle;\n",
 917 |        "    }\n",
 918 |        "\n",
 919 |        "    .dataframe tbody tr th {\n",
 920 |        "        vertical-align: top;\n",
 921 |        "    }\n",
 922 |        "\n",
 923 |        "    .dataframe thead th {\n",
 924 |        "        text-align: right;\n",
 925 |        "    }\n",
 926 |        "</style>\n",
 927 |        "<table border=\"1\" class=\"dataframe\">\n",
 928 |        "  <thead>\n",
 929 |        "    <tr style=\"text-align: right;\">\n",
 930 |        "      <th></th>\n",
 931 |        "      <th>country</th>\n",
 932 |        "      <th>food_category</th>\n",
 933 |        "      <th>consumption</th>\n",
 934 |        "      <th>co2_emmission</th>\n",
 935 |        "    </tr>\n",
 936 |        "  </thead>\n",
 937 |        "  <tbody>\n",
 938 |        "    <tr>\n",
 939 |        "      <th>1010</th>\n",
 940 |        "      <td>Taiwan. ROC</td>\n",
 941 |        "      <td>Soybeans</td>\n",
 942 |        "      <td>16.95</td>\n",
 943 |        "      <td>7.63</td>\n",
 944 |        "    </tr>\n",
 945 |        "  </tbody>\n",
 946 |        "</table>\n",
 947 |        "</div>"
 948 |       ],
 949 |       "text/plain": [
 950 |        "          country food_category  consumption  co2_emmission\n",
 951 |        "1010  Taiwan. ROC      Soybeans        16.95           7.63"
 952 |       ]
 953 |      },
 954 |      "execution_count": 68,
 955 |      "metadata": {},
 956 |      "output_type": "execute_result"
 957 |     }
 958 |    ],
 959 |    "source": [
 960 |     "(df.query(\"food_category == 'Soybeans'\")\n",
 961 |     "   .sort_values(by=\"consumption\", ascending=False)\n",
 962 |     "   .head(1))"
 963 |    ]
 964 |   },
 965 |   {
 966 |    "cell_type": "markdown",
 967 |    "metadata": {},
 968 |    "source": [
 969 |     "### 11."
 970 |    ]
 971 |   },
 972 |   {
 973 |    "cell_type": "markdown",
 974 |    "metadata": {},
 975 |    "source": [
 976 |     "What is the total emissions of all the meat products (Pork, Poultry, Fish, Lamb & Goat, Beef) in the dataset combined?"
 977 |    ]
 978 |   },
 979 |   {
 980 |    "cell_type": "code",
 981 |    "execution_count": 78,
 982 |    "metadata": {},
 983 |    "outputs": [
 984 |     {
 985 |      "data": {
 986 |       "text/plain": [
 987 |        "74441.13"
 988 |       ]
 989 |      },
 990 |      "execution_count": 78,
 991 |      "metadata": {},
 992 |      "output_type": "execute_result"
 993 |     }
 994 |    ],
 995 |    "source": [
 996 |     "meat = ['Poultry', 'Pork', 'Fish', 'Lamb & Goat', 'Beef']\n",
 997 |     "df[\"co2_emmission\"][df['food_category'].isin(meat)].sum()"
 998 |    ]
 999 |   },
1000 |   {
1001 |    "cell_type": "markdown",
1002 |    "metadata": {},
1003 |    "source": [
1004 |     "### 12."
1005 |    ]
1006 |   },
1007 |   {
1008 |    "cell_type": "markdown",
1009 |    "metadata": {},
1010 |    "source": [
1011 |     "What is the total emissions of all other (non-meat) products in the dataset combined?"
1012 |    ]
1013 |   },
1014 |   {
1015 |    "cell_type": "code",
1016 |    "execution_count": 83,
1017 |    "metadata": {},
1018 |    "outputs": [
1019 |     {
1020 |      "data": {
1021 |       "text/plain": [
1022 |        "31927.98"
1023 |       ]
1024 |      },
1025 |      "execution_count": 83,
1026 |      "metadata": {},
1027 |      "output_type": "execute_result"
1028 |     }
1029 |    ],
1030 |    "source": [
1031 |     "meat = ['Poultry', 'Pork', 'Fish', 'Lamb & Goat', 'Beef']\n",
1032 |     "df[\"co2_emmission\"][~df['food_category'].isin(meat)].sum()"
1033 |    ]
1034 |   }
1035 |  ],
1036 |  "metadata": {
1037 |   "kernelspec": {
1038 |    "display_name": "Python 3",
1039 |    "language": "python",
1040 |    "name": "python3"
1041 |   },
1042 |   "language_info": {
1043 |    "codemirror_mode": {
1044 |     "name": "ipython",
1045 |     "version": 3
1046 |    },
1047 |    "file_extension": ".py",
1048 |    "mimetype": "text/x-python",
1049 |    "name": "python",
1050 |    "nbconvert_exporter": "python",
1051 |    "pygments_lexer": "ipython3",
1052 |    "version": "3.7.8"
1053 |   }
1054 |  },
1055 |  "nbformat": 4,
1056 |  "nbformat_minor": 4
1057 | }
1058 | 


--------------------------------------------------------------------------------
/practice-exercises/chapter3-tests-classes-practice.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "![](../docs/banner.png)"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "# Unit Tests & Classes\n",
 15 |     "\n",
 16 |     "**Tomas Beuzen, September 2020**"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "These exercises complement [Chapter 3](../chapters/chapter3-tests-classes.ipynb)."
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 1,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "import math"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "## Exercises"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "### 1."
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "metadata": {},
 52 |    "source": [
 53 |     "The function `area()` accepts the argument `radius` and calculates the area of a circle. Write three tests using `assert` statements for the following conditions:\n",
 54 |     "1. Assert that `area(1)` returns a `float`;\n",
 55 |     "2. Assert that `area(0)` returns a value of 0;\n",
 56 |     "3. Assert that `area(5)` is approximately equal to 78.5 (hint: `math.isclose(..., abs_tol=0.1)`)"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 2,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "def area(radius):\n",
 66 |     "    \"\"\"Calculate the area of a circle based on the given radius.\"\"\"\n",
 67 |     "    return math.pi * radius ** 2"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 3,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "# Your answer here."
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "### 2."
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "In the spirit of the EAFP (easier to ask for forgiveness than permission) philosophy. Modify the code of the function `area()` and add a `try`/`except` statement to catch the type error raised by passing a string to `area()` as shown below:"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 4,
 96 |    "metadata": {
 97 |     "tags": [
 98 |      "raises-exception"
 99 |     ]
100 |    },
101 |    "outputs": [
102 |     {
103 |      "ename": "TypeError",
104 |      "evalue": "unsupported operand type(s) for ** or pow(): 'str' and 'int'",
105 |      "output_type": "error",
106 |      "traceback": [
107 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
108 |       "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
109 |       "\u001b[0;32m<ipython-input-4-28e1bc493b84>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0marea\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'10'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
110 |       "\u001b[0;32m<ipython-input-2-13e66cca8177>\u001b[0m in \u001b[0;36marea\u001b[0;34m(radius)\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0marea\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mradius\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m     \u001b[0;34m\"\"\"Calculate the area of a circle based on the given radius.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpi\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mradius\u001b[0m \u001b[0;34m**\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
111 |       "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for ** or pow(): 'str' and 'int'"
112 |      ]
113 |     }
114 |    ],
115 |    "source": [
116 |     "area('10')"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 5,
122 |    "metadata": {},
123 |    "outputs": [],
124 |    "source": [
125 |     "def area(radius):\n",
126 |     "    \"\"\"Calculate the area of a circle based on the given radius.\"\"\"\n",
127 |     "    pass # Remove this line and add your answer here."
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "metadata": {},
133 |    "source": [
134 |     "### 3."
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "markdown",
139 |    "metadata": {},
140 |    "source": [
141 |     "In the spirit of the LBYL (look before you leap) philosophy. Modify the code of the function `area()` and add a conditional `if`/`else` statement to make sure that a user has passed a number (`int` or `float`) to the `area()` function. If they pass something else, raise a `TypeError`."
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 6,
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": [
150 |     "def area(radius):\n",
151 |     "    \"\"\"Calculate the area of a circle based on the given radius.\"\"\"\n",
152 |     "    pass # Remove this line and add your answer here."
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "markdown",
157 |    "metadata": {},
158 |    "source": [
159 |     "### 4."
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "markdown",
164 |    "metadata": {},
165 |    "source": [
166 |     "For this exercise I want you to create a class called `Circle`. It should have the following characteristics:\n",
167 |     "1. It should be initiated with the argument `radius` and store this as an instance attribute.\n",
168 |     "2. Have a method `area()` which calculates the area of the circle.\n",
169 |     "3. Have a method `circumference()` which calculates the circumference of the circle.\n",
170 |     "4. Have the method `__str__()` which is a special method in Python and controls what is output to the screen when you `print()` an instance of your class (learn more [here](https://realpython.com/lessons/how-and-when-use-__str__/)). The `print()` statement should print the string `f\"A Circle with radius {self.radius}\"`.\n",
171 |     "\n",
172 |     "I've provided some tests for you to check your class."
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": 7,
178 |    "metadata": {},
179 |    "outputs": [],
180 |    "source": [
181 |     "class Circle:\n",
182 |     "    \"\"\"A circle with a radius r.\"\"\"\n",
183 |     "\n",
184 |     "    pass # Remove this line and add your answer here."
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": 8,
190 |    "metadata": {
191 |     "tags": [
192 |      "raises-exception"
193 |     ]
194 |    },
195 |    "outputs": [
196 |     {
197 |      "ename": "TypeError",
198 |      "evalue": "Circle() takes no arguments",
199 |      "output_type": "error",
200 |      "traceback": [
201 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
202 |       "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
203 |       "\u001b[0;32m<ipython-input-8-5dbdbcaa4346>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32massert\u001b[0m \u001b[0mCircle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mradius\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 1 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCircle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marea\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m28.3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mabs_tol\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 2 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCircle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcircumference\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m18.8\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mabs_tol\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 3 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mCircle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__str__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"A Circle with radius 3\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 4 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
204 |       "\u001b[0;31mTypeError\u001b[0m: Circle() takes no arguments"
205 |      ]
206 |     }
207 |    ],
208 |    "source": [
209 |     "assert Circle(3).radius == 3, \"Test 1 failed.\"\n",
210 |     "assert math.isclose(Circle(3).area(), 28.3, abs_tol=0.1), \"Test 2 failed.\"\n",
211 |     "assert math.isclose(Circle(3).circumference(), 18.8, abs_tol=0.1), \"Test 3 failed.\"\n",
212 |     "assert Circle(3).__str__() == \"A Circle with radius 3\", \"Test 4 failed.\""
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "markdown",
217 |    "metadata": {},
218 |    "source": [
219 |     "### 5."
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "markdown",
224 |    "metadata": {},
225 |    "source": [
226 |     "Now, let's create a new class `sphere` that inherits from the `circle` class we created above. It should have the following characteristics:\n",
227 |     "\n",
228 |     "1. It should be initiated exactly the same as `Circle` was, with the single argument `radius` which is stored as an instance attribute.\n",
229 |     "2. Have a method `volume()` which calculates the volume of the sphere ($\\frac{4}{3}{\\pi}{r^3}$).\n",
230 |     "3. Outputs the string `f\"A Sphere with volume 4.19\"` when you call `print(Sphere(1))` (hint: recall the `__str__()` method from the previous question).\n",
231 |     "\n",
232 |     "I've provided some tests for you to check your class."
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "code",
237 |    "execution_count": 9,
238 |    "metadata": {},
239 |    "outputs": [],
240 |    "source": [
241 |     "# Your answer here."
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "code",
246 |    "execution_count": 10,
247 |    "metadata": {
248 |     "tags": [
249 |      "raises-exception"
250 |     ]
251 |    },
252 |    "outputs": [
253 |     {
254 |      "ename": "NameError",
255 |      "evalue": "name 'Sphere' is not defined",
256 |      "output_type": "error",
257 |      "traceback": [
258 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
259 |       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
260 |       "\u001b[0;32m<ipython-input-10-605d8a1c6bb6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32massert\u001b[0m \u001b[0mSphere\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mradius\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 1 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mSphere\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marea\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m28.3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mabs_tol\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 2 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mSphere\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcircumference\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m18.8\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mabs_tol\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 3 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mSphere\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvolume\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m113.1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mabs_tol\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 3 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mSphere\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__str__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"A Sphere with volume 4.19\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 4 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
261 |       "\u001b[0;31mNameError\u001b[0m: name 'Sphere' is not defined"
262 |      ]
263 |     }
264 |    ],
265 |    "source": [
266 |     "assert Sphere(3).radius == 3, \"Test 1 failed.\"\n",
267 |     "assert math.isclose(Sphere(3).area(), 28.3, abs_tol=0.1), \"Test 2 failed.\"\n",
268 |     "assert math.isclose(Sphere(3).circumference(), 18.8, abs_tol=0.1), \"Test 3 failed.\"\n",
269 |     "assert math.isclose(Sphere(3).volume(), 113.1, abs_tol=0.1), \"Test 3 failed.\"\n",
270 |     "assert Sphere(1).__str__() == \"A Sphere with volume 4.19\", \"Test 4 failed.\""
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "markdown",
275 |    "metadata": {},
276 |    "source": [
277 |     "### 6."
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "markdown",
282 |    "metadata": {},
283 |    "source": [
284 |     "Imagine that users of our `Sphere` class often want to instantiate our class with a `circumference` instead of a `radius`. Add a [class method](https://pages.github.ubc.ca/MDS-2020-21/DSCI_511_py-prog_students/lectures/lecture3-tests-classes.html#methods-class-methods-static-methods) called `from_circ()` to the `Sphere` class that allows users to do this. The method should calculate the `radius` from the passed `circumference`, and then use that `radius` to make an instance of `Sphere`.\n",
285 |     "\n",
286 |     "I've provided some tests for you to check your modified class."
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "code",
291 |    "execution_count": 11,
292 |    "metadata": {},
293 |    "outputs": [],
294 |    "source": [
295 |     "# Your answer here."
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "code",
300 |    "execution_count": 12,
301 |    "metadata": {
302 |     "tags": [
303 |      "raises-exception"
304 |     ]
305 |    },
306 |    "outputs": [
307 |     {
308 |      "ename": "NameError",
309 |      "evalue": "name 'Sphere' is not defined",
310 |      "output_type": "error",
311 |      "traceback": [
312 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
313 |       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
314 |       "\u001b[0;32m<ipython-input-12-c64f2af0ae75>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32massert\u001b[0m \u001b[0mSphere\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_circ\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mradius\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 1 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mSphere\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_circ\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mradius\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1.5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 2 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mSphere\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_circ\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mradius\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0.95\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mabs_tol\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 3 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mSphere\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_circ\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvolume\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m3.65\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mabs_tol\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 4 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mSphere\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_circ\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__str__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"A Sphere with volume 3.65\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Test 5 failed.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
315 |       "\u001b[0;31mNameError\u001b[0m: name 'Sphere' is not defined"
316 |      ]
317 |     }
318 |    ],
319 |    "source": [
320 |     "assert Sphere.from_circ(0).radius == 0, \"Test 1 failed.\"\n",
321 |     "assert Sphere.from_circ(3 * math.pi).radius == 1.5, \"Test 2 failed.\" \n",
322 |     "assert math.isclose(Sphere.from_circ(6).radius, 0.95, abs_tol=0.1), \"Test 3 failed.\"\n",
323 |     "assert math.isclose(Sphere.from_circ(6).volume(), 3.65, abs_tol=0.1), \"Test 4 failed.\"\n",
324 |     "assert Sphere.from_circ(6).__str__() == \"A Sphere with volume 3.65\", \"Test 5 failed.\""
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "markdown",
329 |    "metadata": {},
330 |    "source": [
331 |     "<hr>\n",
332 |     "<hr>\n",
333 |     "<hr>"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "markdown",
338 |    "metadata": {},
339 |    "source": [
340 |     "## Solutions"
341 |    ]
342 |   },
343 |   {
344 |    "cell_type": "markdown",
345 |    "metadata": {},
346 |    "source": [
347 |     "### 1."
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "markdown",
352 |    "metadata": {},
353 |    "source": [
354 |     "The function `area()` accepts the argument `radius` and calculates the area of a circle. Write three tests using `assert` statements for the following conditions:\n",
355 |     "1. Assert that `area(1)` returns a `float`;\n",
356 |     "2. Assert that `area(0)` returns a value of 0;\n",
357 |     "3. Assert that `area(5)` is approximately equal to 78.5 (hint: `math.isclose(..., abs_tol=0.1)`)"
358 |    ]
359 |   },
360 |   {
361 |    "cell_type": "code",
362 |    "execution_count": 13,
363 |    "metadata": {},
364 |    "outputs": [],
365 |    "source": [
366 |     "def area(radius):\n",
367 |     "    \"\"\"Calculate the area of a circle based on the given radius.\"\"\"\n",
368 |     "    return math.pi * radius ** 2"
369 |    ]
370 |   },
371 |   {
372 |    "cell_type": "code",
373 |    "execution_count": 14,
374 |    "metadata": {},
375 |    "outputs": [],
376 |    "source": [
377 |     "assert isinstance(area(1), float), 'Test 1 failed!'\n",
378 |     "assert area(0) == 0, 'Test 2 failed!'\n",
379 |     "assert math.isclose(area(5), 78.5, abs_tol=0.1)"
380 |    ]
381 |   },
382 |   {
383 |    "cell_type": "markdown",
384 |    "metadata": {},
385 |    "source": [
386 |     "### 2."
387 |    ]
388 |   },
389 |   {
390 |    "cell_type": "markdown",
391 |    "metadata": {},
392 |    "source": [
393 |     "In the spirit of the EAFP (easier to ask for forgiveness than permission) philosophy. Modify the code of the function `area()` and add a `try`/`except` statement to catch the type error raised by passing a string to `area()` as shown below:"
394 |    ]
395 |   },
396 |   {
397 |    "cell_type": "code",
398 |    "execution_count": 15,
399 |    "metadata": {
400 |     "tags": [
401 |      "raises-exception"
402 |     ]
403 |    },
404 |    "outputs": [
405 |     {
406 |      "ename": "TypeError",
407 |      "evalue": "unsupported operand type(s) for ** or pow(): 'str' and 'int'",
408 |      "output_type": "error",
409 |      "traceback": [
410 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
411 |       "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
412 |       "\u001b[0;32m<ipython-input-15-28e1bc493b84>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0marea\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'10'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
413 |       "\u001b[0;32m<ipython-input-13-13e66cca8177>\u001b[0m in \u001b[0;36marea\u001b[0;34m(radius)\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0marea\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mradius\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m     \u001b[0;34m\"\"\"Calculate the area of a circle based on the given radius.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0mmath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpi\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mradius\u001b[0m \u001b[0;34m**\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
414 |       "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for ** or pow(): 'str' and 'int'"
415 |      ]
416 |     }
417 |    ],
418 |    "source": [
419 |     "area('10')"
420 |    ]
421 |   },
422 |   {
423 |    "cell_type": "code",
424 |    "execution_count": 16,
425 |    "metadata": {},
426 |    "outputs": [],
427 |    "source": [
428 |     "def area(radius):\n",
429 |     "    \"\"\"Calculate the area of a circle based on the given radius.\"\"\"\n",
430 |     "    try:\n",
431 |     "        return math.pi * radius ** 2\n",
432 |     "    except TypeError:\n",
433 |     "        print(f\"radius should be a number but you entered a {type(radius)}\")\n",
434 |     "    except:\n",
435 |     "        print(\"Some other error occurred!\")"
436 |    ]
437 |   },
438 |   {
439 |    "cell_type": "markdown",
440 |    "metadata": {},
441 |    "source": [
442 |     "### 3."
443 |    ]
444 |   },
445 |   {
446 |    "cell_type": "markdown",
447 |    "metadata": {},
448 |    "source": [
449 |     "In the spirit of the LBYL (look before you leap) philosophy. Modify the code of the function `area()` and add a conditional `if`/`else` statement to make sure that a user has passed a number (`int` or `float`) to the `area()` function. If they pass something else, raise a `TypeError`."
450 |    ]
451 |   },
452 |   {
453 |    "cell_type": "code",
454 |    "execution_count": 17,
455 |    "metadata": {},
456 |    "outputs": [],
457 |    "source": [
458 |     "def area(radius):\n",
459 |     "    \"\"\"Calculate the area of a circle based on the given radius.\"\"\"\n",
460 |     "    if isinstance(radius, (int, float)):\n",
461 |     "        return math.pi * radius ** 2\n",
462 |     "    else:\n",
463 |     "        raise TypeError(f\"radius should be a number but you entered a {type(radius)}\")"
464 |    ]
465 |   },
466 |   {
467 |    "cell_type": "markdown",
468 |    "metadata": {},
469 |    "source": [
470 |     "### 4."
471 |    ]
472 |   },
473 |   {
474 |    "cell_type": "markdown",
475 |    "metadata": {},
476 |    "source": [
477 |     "For this exercise I want you to create a class called `circle`. It should have the following characteristics:\n",
478 |     "1. It should be initiated with the argument `radius` and store this as an instance attribute.\n",
479 |     "2. Have a method `area()` which calculates the area of the circle.\n",
480 |     "3. Have a method `circumference()` which calculates the circumference of the circle.\n",
481 |     "4. Have the method `__str__()` which is a special method in Python and controls what is output to the screen when you `print()` an instance of your class (learn more [here](https://realpython.com/lessons/how-and-when-use-__str__/)). The `print()` statement should print the string `f\"A Circle with radius {self.radius}\"`.\n",
482 |     "\n",
483 |     "I've provided some tests for you to check your class."
484 |    ]
485 |   },
486 |   {
487 |    "cell_type": "code",
488 |    "execution_count": 18,
489 |    "metadata": {},
490 |    "outputs": [],
491 |    "source": [
492 |     "class Circle:\n",
493 |     "    \"\"\"A circle with a radius r.\"\"\"\n",
494 |     "\n",
495 |     "    def __init__(self, radius):\n",
496 |     "        self.radius = radius\n",
497 |     "\n",
498 |     "    def area(self):\n",
499 |     "        \"\"\"Calculate the area of the circle.\"\"\"\n",
500 |     "        return math.pi * self.radius ** 2\n",
501 |     "\n",
502 |     "    def circumference(self):\n",
503 |     "        \"\"\"Calculate the circumference of the circle.\"\"\"\n",
504 |     "        return 2.0 * math.pi * self.radius\n",
505 |     "\n",
506 |     "    def __str__(self):\n",
507 |     "        return f\"A Circle with radius {self.radius}\""
508 |    ]
509 |   },
510 |   {
511 |    "cell_type": "code",
512 |    "execution_count": 19,
513 |    "metadata": {},
514 |    "outputs": [],
515 |    "source": [
516 |     "assert Circle(3).radius == 3, \"Test 1 failed.\"\n",
517 |     "assert math.isclose(Circle(3).area(), 28.3, abs_tol=0.1), \"Test 2 failed.\"\n",
518 |     "assert math.isclose(Circle(3).circumference(), 18.8, abs_tol=0.1), \"Test 3 failed.\"\n",
519 |     "assert Circle(3).__str__() == \"A Circle with radius 3\", \"Test 4 failed.\""
520 |    ]
521 |   },
522 |   {
523 |    "cell_type": "markdown",
524 |    "metadata": {},
525 |    "source": [
526 |     "### 5."
527 |    ]
528 |   },
529 |   {
530 |    "cell_type": "markdown",
531 |    "metadata": {},
532 |    "source": [
533 |     "Now, let's create a new class `sphere` that inherits from the `circle` class we created above. It should have the following characteristics:\n",
534 |     "\n",
535 |     "1. It should be initiated exactly the same as `Circle` was, with the single argument `radius` which is stored as an instance attribute.\n",
536 |     "2. Have a method `volume()` which calculates the volume of the sphere ($\\frac{4}{3}{\\pi}{r^3}$).\n",
537 |     "3. Outputs the string `f\"A Sphere with volume 4.19\"` when you call `print(Sphere(1))` (hint: recall the `__str__()` method from the previous question).\n",
538 |     "\n",
539 |     "I've provided some tests for you to check your class."
540 |    ]
541 |   },
542 |   {
543 |    "cell_type": "code",
544 |    "execution_count": 20,
545 |    "metadata": {},
546 |    "outputs": [],
547 |    "source": [
548 |     "class Sphere(Circle):\n",
549 |     "    \"\"\"A sphere with a radius r.\"\"\"\n",
550 |     "    \n",
551 |     "    def volume(self):\n",
552 |     "        \"\"\"Calculate the volume of the sphere.\"\"\"\n",
553 |     "        return 4 / 3 * math.pi * self.radius ** 3\n",
554 |     "\n",
555 |     "    def __str__(self):\n",
556 |     "        return f\"A Sphere with volume {self.volume():.2f}\""
557 |    ]
558 |   },
559 |   {
560 |    "cell_type": "code",
561 |    "execution_count": 21,
562 |    "metadata": {},
563 |    "outputs": [],
564 |    "source": [
565 |     "assert Sphere(3).radius == 3, \"Test 1 failed.\"\n",
566 |     "assert math.isclose(Sphere(3).area(), 28.3, abs_tol=0.1), \"Test 2 failed.\"\n",
567 |     "assert math.isclose(Sphere(3).circumference(), 18.8, abs_tol=0.1), \"Test 3 failed.\"\n",
568 |     "assert math.isclose(Sphere(3).volume(), 113.1, abs_tol=0.1), \"Test 3 failed.\"\n",
569 |     "assert Sphere(1).__str__() == \"A Sphere with volume 4.19\", \"Test 4 failed.\""
570 |    ]
571 |   },
572 |   {
573 |    "cell_type": "markdown",
574 |    "metadata": {},
575 |    "source": [
576 |     "### 6."
577 |    ]
578 |   },
579 |   {
580 |    "cell_type": "markdown",
581 |    "metadata": {},
582 |    "source": [
583 |     "Imagine that users of our `Sphere` class often want to instantiate our class with a `circumference` instead of a `radius`. Add a [class method](https://pages.github.ubc.ca/MDS-2020-21/DSCI_511_py-prog_students/lectures/lecture3-tests-classes.html#methods-class-methods-static-methods) called `from_circ()` to the `Sphere` class that allows users to do this. The method should calculate the `radius` from the passed `circumference`, and then use that `radius` to make an instance of `Sphere`.\n",
584 |     "\n",
585 |     "I've provided some tests for you to check your modified class."
586 |    ]
587 |   },
588 |   {
589 |    "cell_type": "code",
590 |    "execution_count": 22,
591 |    "metadata": {},
592 |    "outputs": [],
593 |    "source": [
594 |     "class Sphere(Circle):\n",
595 |     "    \"\"\"A sphere with a radius r.\"\"\"\n",
596 |     "    \n",
597 |     "    def volume(self):\n",
598 |     "        \"\"\"Calculate the volume of the sphere.\"\"\"\n",
599 |     "        return 4 / 3 * math.pi * self.radius ** 3\n",
600 |     "    \n",
601 |     "    @classmethod\n",
602 |     "    def from_circ(cls, circumference):\n",
603 |     "        \"\"\"Make an instance of Sphere from a circumference.\"\"\"\n",
604 |     "        radius = circumference / (2 * math.pi)\n",
605 |     "        return cls(radius)\n",
606 |     "\n",
607 |     "    def __str__(self):\n",
608 |     "        return f\"A Sphere with volume {self.volume():.2f}\""
609 |    ]
610 |   },
611 |   {
612 |    "cell_type": "code",
613 |    "execution_count": 23,
614 |    "metadata": {},
615 |    "outputs": [],
616 |    "source": [
617 |     "assert Sphere.from_circ(0).radius == 0, \"Test 1 failed.\"\n",
618 |     "assert Sphere.from_circ(3 * math.pi).radius == 1.5, \"Test 2 failed.\" \n",
619 |     "assert math.isclose(Sphere.from_circ(6).radius, 0.95, abs_tol=0.1), \"Test 3 failed.\"\n",
620 |     "assert math.isclose(Sphere.from_circ(6).volume(), 3.65, abs_tol=0.1), \"Test 4 failed.\"\n",
621 |     "assert Sphere.from_circ(6).__str__() == \"A Sphere with volume 3.65\", \"Test 5 failed.\""
622 |    ]
623 |   }
624 |  ],
625 |  "metadata": {
626 |   "kernelspec": {
627 |    "display_name": "Python 3",
628 |    "language": "python",
629 |    "name": "python3"
630 |   },
631 |   "language_info": {
632 |    "codemirror_mode": {
633 |     "name": "ipython",
634 |     "version": 3
635 |    },
636 |    "file_extension": ".py",
637 |    "mimetype": "text/x-python",
638 |    "name": "python",
639 |    "nbconvert_exporter": "python",
640 |    "pygments_lexer": "ipython3",
641 |    "version": "3.7.8"
642 |   }
643 |  },
644 |  "nbformat": 4,
645 |  "nbformat_minor": 4
646 | }
647 | 


--------------------------------------------------------------------------------
/practice-exercises/chapter9-wrangling-advanced-practice.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "![](../docs/banner.png)"
   8 |    ]
   9 |   },
  10 |   {
  11 |    "cell_type": "markdown",
  12 |    "metadata": {},
  13 |    "source": [
  14 |     "# Advanced Wrangling With Pandas\n",
  15 |     "\n",
  16 |     "**Tomas Beuzen, September 2020**"
  17 |    ]
  18 |   },
  19 |   {
  20 |    "cell_type": "markdown",
  21 |    "metadata": {},
  22 |    "source": [
  23 |     "These exercises complement [Chapter 9](../chapters/chapter9-wrangling-advanced.ipynb)."
  24 |    ]
  25 |   },
  26 |   {
  27 |    "cell_type": "markdown",
  28 |    "metadata": {},
  29 |    "source": [
  30 |     "## Exercises"
  31 |    ]
  32 |   },
  33 |   {
  34 |    "cell_type": "markdown",
  35 |    "metadata": {},
  36 |    "source": [
  37 |     "### 1."
  38 |    ]
  39 |   },
  40 |   {
  41 |    "cell_type": "markdown",
  42 |    "metadata": {},
  43 |    "source": [
  44 |     "In this set of practice exercises we'll be looking at a cool dataset of real passwords (made available from actual data breaches) sourced and compiled from [Information is Beautiful](https://informationisbeautiful.net/visualizations/top-500-passwords-visualized/?utm_content=buffer994fa&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer) and contributed to [R's Tidy Tuesday project](https://github.com/rfordatascience/tidytuesday). These passwords are common (\"bad\") passwords that you should avoid using! But we're going to use this dataset to practice some regex skills.\n",
  45 |     "\n",
  46 |     "Let's start by importing pandas with the alias `pd`."
  47 |    ]
  48 |   },
  49 |   {
  50 |    "cell_type": "code",
  51 |    "execution_count": 1,
  52 |    "metadata": {},
  53 |    "outputs": [],
  54 |    "source": [
  55 |     "# Your answer here."
  56 |    ]
  57 |   },
  58 |   {
  59 |    "cell_type": "markdown",
  60 |    "metadata": {},
  61 |    "source": [
  62 |     "### 2."
  63 |    ]
  64 |   },
  65 |   {
  66 |    "cell_type": "markdown",
  67 |    "metadata": {},
  68 |    "source": [
  69 |     "The dataset has the following columns:\n",
  70 |     "\n",
  71 |     "|variable          |class     |description |\n",
  72 |     "|:-----------------|:---------|:-----------|\n",
  73 |     "|rank              |int    | popularity in their database of released passwords |\n",
  74 |     "|password          |str | Actual text of the password |\n",
  75 |     "|category          |str | What category does the password fall in to?|\n",
  76 |     "|value             |float    | Time to crack by online guessing |\n",
  77 |     "|time_unit         |str | Time unit to match with value |\n",
  78 |     "|offline_crack_sec |float    | Time to crack offline in seconds |\n",
  79 |     "|rank_alt          |int    | Rank 2 |\n",
  80 |     "|strength          |int    | Strength = quality of password where 10 is highest, 1 is lowest, please note that these are relative to these generally bad passwords |\n",
  81 |     "|font_size         |int    | Used to create the graphic for KIB |\n",
  82 |     "\n",
  83 |     "\n",
  84 |     "In these exercises, we're only interested in the `password`, `value` and `time_unit` columns so import only these two columns as a dataframe named `df` from this url: <https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-01-14/passwords.csv>"
  85 |    ]
  86 |   },
  87 |   {
  88 |    "cell_type": "code",
  89 |    "execution_count": 2,
  90 |    "metadata": {},
  91 |    "outputs": [],
  92 |    "source": [
  93 |     "# Your answer here."
  94 |    ]
  95 |   },
  96 |   {
  97 |    "cell_type": "markdown",
  98 |    "metadata": {},
  99 |    "source": [
 100 |     "### 3."
 101 |    ]
 102 |   },
 103 |   {
 104 |    "cell_type": "markdown",
 105 |    "metadata": {},
 106 |    "source": [
 107 |     "An online password attack is when someone tries to hack your account by simply trying a very large number of username/password combinations to access your account. For each `password` in our dataset, the `value` column shows the amount of time it is estimated to take an \"online password attack\" to hack your account. The column `time_unit` shows the units of that time value (e.g., hours, days, years, etc.)\n",
 108 |     "\n",
 109 |     "It would be much nicer if our `value`s were of the same units so we can more easily compare the \"online password guessing time\" for each password. So your first task is to convert all of the values to units of hours (assume the conversion units I've provided below, e.g., 1 day is 24 hours, 1 week is 168 hours, etc)."
 110 |    ]
 111 |   },
 112 |   {
 113 |    "cell_type": "code",
 114 |    "execution_count": 3,
 115 |    "metadata": {},
 116 |    "outputs": [],
 117 |    "source": [
 118 |     "units = {\n",
 119 |     "    \"seconds\": 1 / 3600,\n",
 120 |     "    \"minutes\": 1 / 60,\n",
 121 |     "    \"days\": 24,\n",
 122 |     "    \"weeks\": 168,\n",
 123 |     "    \"months\": 720,\n",
 124 |     "    \"years\": 8760,\n",
 125 |     "}"
 126 |    ]
 127 |   },
 128 |   {
 129 |    "cell_type": "code",
 130 |    "execution_count": 4,
 131 |    "metadata": {},
 132 |    "outputs": [],
 133 |    "source": [
 134 |     "# Your answer here."
 135 |    ]
 136 |   },
 137 |   {
 138 |    "cell_type": "markdown",
 139 |    "metadata": {},
 140 |    "source": [
 141 |     "### 4."
 142 |    ]
 143 |   },
 144 |   {
 145 |    "cell_type": "markdown",
 146 |    "metadata": {},
 147 |    "source": [
 148 |     "How many password begin with the sequence `123`?"
 149 |    ]
 150 |   },
 151 |   {
 152 |    "cell_type": "code",
 153 |    "execution_count": 5,
 154 |    "metadata": {},
 155 |    "outputs": [],
 156 |    "source": [
 157 |     "# Your answer here."
 158 |    ]
 159 |   },
 160 |   {
 161 |    "cell_type": "markdown",
 162 |    "metadata": {},
 163 |    "source": [
 164 |     "### 5."
 165 |    ]
 166 |   },
 167 |   {
 168 |    "cell_type": "markdown",
 169 |    "metadata": {},
 170 |    "source": [
 171 |     "What is the average time in hours needed to crack these passwords that begin with `123`? How does this compare to the average of all passwords in the dataset?"
 172 |    ]
 173 |   },
 174 |   {
 175 |    "cell_type": "code",
 176 |    "execution_count": 6,
 177 |    "metadata": {},
 178 |    "outputs": [],
 179 |    "source": [
 180 |     "# Your answer here."
 181 |    ]
 182 |   },
 183 |   {
 184 |    "cell_type": "markdown",
 185 |    "metadata": {},
 186 |    "source": [
 187 |     "### 6."
 188 |    ]
 189 |   },
 190 |   {
 191 |    "cell_type": "markdown",
 192 |    "metadata": {},
 193 |    "source": [
 194 |     "How many passwords do not contain a number?"
 195 |    ]
 196 |   },
 197 |   {
 198 |    "cell_type": "code",
 199 |    "execution_count": 7,
 200 |    "metadata": {},
 201 |    "outputs": [],
 202 |    "source": [
 203 |     "# Your answer here."
 204 |    ]
 205 |   },
 206 |   {
 207 |    "cell_type": "markdown",
 208 |    "metadata": {},
 209 |    "source": [
 210 |     "### 7."
 211 |    ]
 212 |   },
 213 |   {
 214 |    "cell_type": "markdown",
 215 |    "metadata": {},
 216 |    "source": [
 217 |     "How many passwords contain at least one number?"
 218 |    ]
 219 |   },
 220 |   {
 221 |    "cell_type": "code",
 222 |    "execution_count": 8,
 223 |    "metadata": {},
 224 |    "outputs": [],
 225 |    "source": [
 226 |     "# Your answer here."
 227 |    ]
 228 |   },
 229 |   {
 230 |    "cell_type": "markdown",
 231 |    "metadata": {},
 232 |    "source": [
 233 |     "### 8."
 234 |    ]
 235 |   },
 236 |   {
 237 |    "cell_type": "markdown",
 238 |    "metadata": {},
 239 |    "source": [
 240 |     "Is there an obvious difference in online cracking time between passwords that don't contain a number vs passwords that contain at least one number?"
 241 |    ]
 242 |   },
 243 |   {
 244 |    "cell_type": "code",
 245 |    "execution_count": 9,
 246 |    "metadata": {},
 247 |    "outputs": [],
 248 |    "source": [
 249 |     "# Your answer here."
 250 |    ]
 251 |   },
 252 |   {
 253 |    "cell_type": "markdown",
 254 |    "metadata": {},
 255 |    "source": [
 256 |     "### 9."
 257 |    ]
 258 |   },
 259 |   {
 260 |    "cell_type": "markdown",
 261 |    "metadata": {},
 262 |    "source": [
 263 |     "How many passwords contain at least one of the following punctuations: `[.!?\\\\-]` (hint: remember this dataset contains *weak* passwords...)?"
 264 |    ]
 265 |   },
 266 |   {
 267 |    "cell_type": "code",
 268 |    "execution_count": 10,
 269 |    "metadata": {},
 270 |    "outputs": [],
 271 |    "source": [
 272 |     "# Your answer here."
 273 |    ]
 274 |   },
 275 |   {
 276 |    "cell_type": "markdown",
 277 |    "metadata": {},
 278 |    "source": [
 279 |     "### 10."
 280 |    ]
 281 |   },
 282 |   {
 283 |    "cell_type": "markdown",
 284 |    "metadata": {},
 285 |    "source": [
 286 |     "Which password(s) in the datasets took the shortest time to crack by online guessing? Which took the longest?"
 287 |    ]
 288 |   },
 289 |   {
 290 |    "cell_type": "code",
 291 |    "execution_count": 11,
 292 |    "metadata": {},
 293 |    "outputs": [],
 294 |    "source": [
 295 |     "# Your answer here."
 296 |    ]
 297 |   },
 298 |   {
 299 |    "cell_type": "markdown",
 300 |    "metadata": {},
 301 |    "source": [
 302 |     "<hr>\n",
 303 |     "<hr>\n",
 304 |     "<hr>"
 305 |    ]
 306 |   },
 307 |   {
 308 |    "cell_type": "markdown",
 309 |    "metadata": {},
 310 |    "source": [
 311 |     "## Solutions"
 312 |    ]
 313 |   },
 314 |   {
 315 |    "cell_type": "markdown",
 316 |    "metadata": {},
 317 |    "source": [
 318 |     "### 1."
 319 |    ]
 320 |   },
 321 |   {
 322 |    "cell_type": "markdown",
 323 |    "metadata": {},
 324 |    "source": [
 325 |     "In this set of practice exercises we'll be looking at a cool dataset of real passwords (made available from actual data breaches) sourced and compiled from [Information is Beautiful](https://informationisbeautiful.net/visualizations/top-500-passwords-visualized/?utm_content=buffer994fa&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer) and contributed to [R's Tidy Tuesday project](https://github.com/rfordatascience/tidytuesday). These passwords are common (\"bad\") passwords that you should avoid using! But we're going to use this dataset to practice some regex skills.\n",
 326 |     "\n",
 327 |     "Let's start by importing pandas with the alias `pd`."
 328 |    ]
 329 |   },
 330 |   {
 331 |    "cell_type": "code",
 332 |    "execution_count": 12,
 333 |    "metadata": {},
 334 |    "outputs": [],
 335 |    "source": [
 336 |     "import pandas as pd"
 337 |    ]
 338 |   },
 339 |   {
 340 |    "cell_type": "markdown",
 341 |    "metadata": {},
 342 |    "source": [
 343 |     "### 2."
 344 |    ]
 345 |   },
 346 |   {
 347 |    "cell_type": "markdown",
 348 |    "metadata": {},
 349 |    "source": [
 350 |     "The dataset has the following columns:\n",
 351 |     "\n",
 352 |     "|variable          |class     |description |\n",
 353 |     "|:-----------------|:---------|:-----------|\n",
 354 |     "|rank              |int    | popularity in their database of released passwords |\n",
 355 |     "|password          |str | Actual text of the password |\n",
 356 |     "|category          |str | What category does the password fall in to?|\n",
 357 |     "|value             |float    | Time to crack by online guessing |\n",
 358 |     "|time_unit         |str | Time unit to match with value |\n",
 359 |     "|offline_crack_sec |float    | Time to crack offline in seconds |\n",
 360 |     "|rank_alt          |int    | Rank 2 |\n",
 361 |     "|strength          |int    | Strength = quality of password where 10 is highest, 1 is lowest, please note that these are relative to these generally bad passwords |\n",
 362 |     "|font_size         |int    | Used to create the graphic for KIB |\n",
 363 |     "\n",
 364 |     "\n",
 365 |     "In these exercises, we're only interested in the `password`, `value` and `time_unit` columns so import only these two columns as a dataframe named `df` from this url: <https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-01-14/passwords.csv>"
 366 |    ]
 367 |   },
 368 |   {
 369 |    "cell_type": "code",
 370 |    "execution_count": 13,
 371 |    "metadata": {},
 372 |    "outputs": [
 373 |     {
 374 |      "data": {
 375 |       "text/html": [
 376 |        "<div>\n",
 377 |        "<style scoped>\n",
 378 |        "    .dataframe tbody tr th:only-of-type {\n",
 379 |        "        vertical-align: middle;\n",
 380 |        "    }\n",
 381 |        "\n",
 382 |        "    .dataframe tbody tr th {\n",
 383 |        "        vertical-align: top;\n",
 384 |        "    }\n",
 385 |        "\n",
 386 |        "    .dataframe thead th {\n",
 387 |        "        text-align: right;\n",
 388 |        "    }\n",
 389 |        "</style>\n",
 390 |        "<table border=\"1\" class=\"dataframe\">\n",
 391 |        "  <thead>\n",
 392 |        "    <tr style=\"text-align: right;\">\n",
 393 |        "      <th></th>\n",
 394 |        "      <th>password</th>\n",
 395 |        "      <th>value</th>\n",
 396 |        "      <th>time_unit</th>\n",
 397 |        "    </tr>\n",
 398 |        "  </thead>\n",
 399 |        "  <tbody>\n",
 400 |        "    <tr>\n",
 401 |        "      <th>0</th>\n",
 402 |        "      <td>password</td>\n",
 403 |        "      <td>6.91</td>\n",
 404 |        "      <td>years</td>\n",
 405 |        "    </tr>\n",
 406 |        "    <tr>\n",
 407 |        "      <th>1</th>\n",
 408 |        "      <td>123456</td>\n",
 409 |        "      <td>18.52</td>\n",
 410 |        "      <td>minutes</td>\n",
 411 |        "    </tr>\n",
 412 |        "    <tr>\n",
 413 |        "      <th>2</th>\n",
 414 |        "      <td>12345678</td>\n",
 415 |        "      <td>1.29</td>\n",
 416 |        "      <td>days</td>\n",
 417 |        "    </tr>\n",
 418 |        "    <tr>\n",
 419 |        "      <th>3</th>\n",
 420 |        "      <td>1234</td>\n",
 421 |        "      <td>11.11</td>\n",
 422 |        "      <td>seconds</td>\n",
 423 |        "    </tr>\n",
 424 |        "    <tr>\n",
 425 |        "      <th>4</th>\n",
 426 |        "      <td>qwerty</td>\n",
 427 |        "      <td>3.72</td>\n",
 428 |        "      <td>days</td>\n",
 429 |        "    </tr>\n",
 430 |        "  </tbody>\n",
 431 |        "</table>\n",
 432 |        "</div>"
 433 |       ],
 434 |       "text/plain": [
 435 |        "   password  value time_unit\n",
 436 |        "0  password   6.91     years\n",
 437 |        "1    123456  18.52   minutes\n",
 438 |        "2  12345678   1.29      days\n",
 439 |        "3      1234  11.11   seconds\n",
 440 |        "4    qwerty   3.72      days"
 441 |       ]
 442 |      },
 443 |      "execution_count": 13,
 444 |      "metadata": {},
 445 |      "output_type": "execute_result"
 446 |     }
 447 |    ],
 448 |    "source": [
 449 |     "df = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-01-14/passwords.csv',\n",
 450 |     "                 usecols=['password', 'value', 'time_unit'],\n",
 451 |     "                 skipfooter = 7,\n",
 452 |     "                 engine='python')\n",
 453 |     "df.head()"
 454 |    ]
 455 |   },
 456 |   {
 457 |    "cell_type": "markdown",
 458 |    "metadata": {},
 459 |    "source": [
 460 |     "### 3."
 461 |    ]
 462 |   },
 463 |   {
 464 |    "cell_type": "markdown",
 465 |    "metadata": {},
 466 |    "source": [
 467 |     "An online password attack is when someone tries to hack your account by simply trying a very large number of username/password combinations to access your account. For each `password` in our dataset, the `value` column shows the amount of time it is estimated to take an \"online password attack\" to hack your account. The column `time_unit` shows the units of that time value (e.g., hours, days, years, etc.)\n",
 468 |     "\n",
 469 |     "It would be much nicer if our `value`s were of the same units so we can more easily compare the \"online password guessing time\" for each password. So your first task is to convert all of the values to units of hours (assume the conversion units I've provided below, e.g., 1 day is 24 hours, 1 week is 168 hours, etc)."
 470 |    ]
 471 |   },
 472 |   {
 473 |    "cell_type": "code",
 474 |    "execution_count": 14,
 475 |    "metadata": {},
 476 |    "outputs": [
 477 |     {
 478 |      "data": {
 479 |       "text/html": [
 480 |        "<div>\n",
 481 |        "<style scoped>\n",
 482 |        "    .dataframe tbody tr th:only-of-type {\n",
 483 |        "        vertical-align: middle;\n",
 484 |        "    }\n",
 485 |        "\n",
 486 |        "    .dataframe tbody tr th {\n",
 487 |        "        vertical-align: top;\n",
 488 |        "    }\n",
 489 |        "\n",
 490 |        "    .dataframe thead th {\n",
 491 |        "        text-align: right;\n",
 492 |        "    }\n",
 493 |        "</style>\n",
 494 |        "<table border=\"1\" class=\"dataframe\">\n",
 495 |        "  <thead>\n",
 496 |        "    <tr style=\"text-align: right;\">\n",
 497 |        "      <th></th>\n",
 498 |        "      <th>password</th>\n",
 499 |        "      <th>value</th>\n",
 500 |        "      <th>time_unit</th>\n",
 501 |        "    </tr>\n",
 502 |        "  </thead>\n",
 503 |        "  <tbody>\n",
 504 |        "    <tr>\n",
 505 |        "      <th>0</th>\n",
 506 |        "      <td>password</td>\n",
 507 |        "      <td>60531.600000</td>\n",
 508 |        "      <td>hours</td>\n",
 509 |        "    </tr>\n",
 510 |        "    <tr>\n",
 511 |        "      <th>1</th>\n",
 512 |        "      <td>123456</td>\n",
 513 |        "      <td>0.308667</td>\n",
 514 |        "      <td>hours</td>\n",
 515 |        "    </tr>\n",
 516 |        "    <tr>\n",
 517 |        "      <th>2</th>\n",
 518 |        "      <td>12345678</td>\n",
 519 |        "      <td>30.960000</td>\n",
 520 |        "      <td>hours</td>\n",
 521 |        "    </tr>\n",
 522 |        "    <tr>\n",
 523 |        "      <th>3</th>\n",
 524 |        "      <td>1234</td>\n",
 525 |        "      <td>0.003086</td>\n",
 526 |        "      <td>hours</td>\n",
 527 |        "    </tr>\n",
 528 |        "    <tr>\n",
 529 |        "      <th>4</th>\n",
 530 |        "      <td>qwerty</td>\n",
 531 |        "      <td>89.280000</td>\n",
 532 |        "      <td>hours</td>\n",
 533 |        "    </tr>\n",
 534 |        "  </tbody>\n",
 535 |        "</table>\n",
 536 |        "</div>"
 537 |       ],
 538 |       "text/plain": [
 539 |        "   password         value time_unit\n",
 540 |        "0  password  60531.600000     hours\n",
 541 |        "1    123456      0.308667     hours\n",
 542 |        "2  12345678     30.960000     hours\n",
 543 |        "3      1234      0.003086     hours\n",
 544 |        "4    qwerty     89.280000     hours"
 545 |       ]
 546 |      },
 547 |      "execution_count": 14,
 548 |      "metadata": {},
 549 |      "output_type": "execute_result"
 550 |     }
 551 |    ],
 552 |    "source": [
 553 |     "units = {\n",
 554 |     "    \"seconds\": 1 / 3600,\n",
 555 |     "    \"minutes\": 1 / 60,\n",
 556 |     "    \"days\": 24,\n",
 557 |     "    \"weeks\": 168,\n",
 558 |     "    \"months\": 720,\n",
 559 |     "    \"years\": 8760,\n",
 560 |     "}\n",
 561 |     "\n",
 562 |     "for key, val in units.items():\n",
 563 |     "    df.loc[df['time_unit'] == key, 'value'] *= val \n",
 564 |     "\n",
 565 |     "df['time_unit'] = 'hours'\n",
 566 |     "df.head()"
 567 |    ]
 568 |   },
 569 |   {
 570 |    "cell_type": "markdown",
 571 |    "metadata": {},
 572 |    "source": [
 573 |     "### 4."
 574 |    ]
 575 |   },
 576 |   {
 577 |    "cell_type": "markdown",
 578 |    "metadata": {},
 579 |    "source": [
 580 |     "How many password begin with the sequence `123`?"
 581 |    ]
 582 |   },
 583 |   {
 584 |    "cell_type": "code",
 585 |    "execution_count": 15,
 586 |    "metadata": {},
 587 |    "outputs": [
 588 |     {
 589 |      "data": {
 590 |       "text/plain": [
 591 |        "9"
 592 |       ]
 593 |      },
 594 |      "execution_count": 15,
 595 |      "metadata": {},
 596 |      "output_type": "execute_result"
 597 |     }
 598 |    ],
 599 |    "source": [
 600 |     "df['password'].str.contains(r\"^123\").sum()"
 601 |    ]
 602 |   },
 603 |   {
 604 |    "cell_type": "markdown",
 605 |    "metadata": {},
 606 |    "source": [
 607 |     "### 5."
 608 |    ]
 609 |   },
 610 |   {
 611 |    "cell_type": "markdown",
 612 |    "metadata": {},
 613 |    "source": [
 614 |     "What is the average time in hours needed to crack these passwords that begin with `123`? How does this compare to the average of all passwords in the dataset?"
 615 |    ]
 616 |   },
 617 |   {
 618 |    "cell_type": "code",
 619 |    "execution_count": 16,
 620 |    "metadata": {},
 621 |    "outputs": [
 622 |     {
 623 |      "name": "stdout",
 624 |      "output_type": "stream",
 625 |      "text": [
 626 |       "Avg. time to crack passwords beginning with 123: 107 hrs\n",
 627 |       "Avg. time to crack for all passwords in dataset: 13918 hrs\n"
 628 |      ]
 629 |     }
 630 |    ],
 631 |    "source": [
 632 |     "print(f\"Avg. time to crack passwords beginning with 123: {df[df['password'].str.contains(r'^123')]['value'].mean():.0f} hrs\")\n",
 633 |     "print(f\"Avg. time to crack for all passwords in dataset: {df['value'].mean():.0f} hrs\")"
 634 |    ]
 635 |   },
 636 |   {
 637 |    "cell_type": "markdown",
 638 |    "metadata": {},
 639 |    "source": [
 640 |     "### 6."
 641 |    ]
 642 |   },
 643 |   {
 644 |    "cell_type": "markdown",
 645 |    "metadata": {},
 646 |    "source": [
 647 |     "How many passwords do not contain a number?"
 648 |    ]
 649 |   },
 650 |   {
 651 |    "cell_type": "code",
 652 |    "execution_count": 17,
 653 |    "metadata": {},
 654 |    "outputs": [
 655 |     {
 656 |      "data": {
 657 |       "text/html": [
 658 |        "<div>\n",
 659 |        "<style scoped>\n",
 660 |        "    .dataframe tbody tr th:only-of-type {\n",
 661 |        "        vertical-align: middle;\n",
 662 |        "    }\n",
 663 |        "\n",
 664 |        "    .dataframe tbody tr th {\n",
 665 |        "        vertical-align: top;\n",
 666 |        "    }\n",
 667 |        "\n",
 668 |        "    .dataframe thead th {\n",
 669 |        "        text-align: right;\n",
 670 |        "    }\n",
 671 |        "</style>\n",
 672 |        "<table border=\"1\" class=\"dataframe\">\n",
 673 |        "  <thead>\n",
 674 |        "    <tr style=\"text-align: right;\">\n",
 675 |        "      <th></th>\n",
 676 |        "      <th>password</th>\n",
 677 |        "      <th>value</th>\n",
 678 |        "      <th>time_unit</th>\n",
 679 |        "    </tr>\n",
 680 |        "  </thead>\n",
 681 |        "  <tbody>\n",
 682 |        "    <tr>\n",
 683 |        "      <th>0</th>\n",
 684 |        "      <td>password</td>\n",
 685 |        "      <td>60531.60</td>\n",
 686 |        "      <td>hours</td>\n",
 687 |        "    </tr>\n",
 688 |        "    <tr>\n",
 689 |        "      <th>4</th>\n",
 690 |        "      <td>qwerty</td>\n",
 691 |        "      <td>89.28</td>\n",
 692 |        "      <td>hours</td>\n",
 693 |        "    </tr>\n",
 694 |        "    <tr>\n",
 695 |        "      <th>6</th>\n",
 696 |        "      <td>dragon</td>\n",
 697 |        "      <td>89.28</td>\n",
 698 |        "      <td>hours</td>\n",
 699 |        "    </tr>\n",
 700 |        "    <tr>\n",
 701 |        "      <th>7</th>\n",
 702 |        "      <td>baseball</td>\n",
 703 |        "      <td>60531.60</td>\n",
 704 |        "      <td>hours</td>\n",
 705 |        "    </tr>\n",
 706 |        "    <tr>\n",
 707 |        "      <th>8</th>\n",
 708 |        "      <td>football</td>\n",
 709 |        "      <td>60531.60</td>\n",
 710 |        "      <td>hours</td>\n",
 711 |        "    </tr>\n",
 712 |        "  </tbody>\n",
 713 |        "</table>\n",
 714 |        "</div>"
 715 |       ],
 716 |       "text/plain": [
 717 |        "   password     value time_unit\n",
 718 |        "0  password  60531.60     hours\n",
 719 |        "4    qwerty     89.28     hours\n",
 720 |        "6    dragon     89.28     hours\n",
 721 |        "7  baseball  60531.60     hours\n",
 722 |        "8  football  60531.60     hours"
 723 |       ]
 724 |      },
 725 |      "execution_count": 17,
 726 |      "metadata": {},
 727 |      "output_type": "execute_result"
 728 |     }
 729 |    ],
 730 |    "source": [
 731 |     "df[df['password'].str.contains(r\"^[^0-9]*$\")].head()"
 732 |    ]
 733 |   },
 734 |   {
 735 |    "cell_type": "markdown",
 736 |    "metadata": {},
 737 |    "source": [
 738 |     "### 7."
 739 |    ]
 740 |   },
 741 |   {
 742 |    "cell_type": "markdown",
 743 |    "metadata": {},
 744 |    "source": [
 745 |     "How many passwords contain at least one number?"
 746 |    ]
 747 |   },
 748 |   {
 749 |    "cell_type": "code",
 750 |    "execution_count": 18,
 751 |    "metadata": {},
 752 |    "outputs": [
 753 |     {
 754 |      "data": {
 755 |       "text/html": [
 756 |        "<div>\n",
 757 |        "<style scoped>\n",
 758 |        "    .dataframe tbody tr th:only-of-type {\n",
 759 |        "        vertical-align: middle;\n",
 760 |        "    }\n",
 761 |        "\n",
 762 |        "    .dataframe tbody tr th {\n",
 763 |        "        vertical-align: top;\n",
 764 |        "    }\n",
 765 |        "\n",
 766 |        "    .dataframe thead th {\n",
 767 |        "        text-align: right;\n",
 768 |        "    }\n",
 769 |        "</style>\n",
 770 |        "<table border=\"1\" class=\"dataframe\">\n",
 771 |        "  <thead>\n",
 772 |        "    <tr style=\"text-align: right;\">\n",
 773 |        "      <th></th>\n",
 774 |        "      <th>password</th>\n",
 775 |        "      <th>value</th>\n",
 776 |        "      <th>time_unit</th>\n",
 777 |        "    </tr>\n",
 778 |        "  </thead>\n",
 779 |        "  <tbody>\n",
 780 |        "    <tr>\n",
 781 |        "      <th>1</th>\n",
 782 |        "      <td>123456</td>\n",
 783 |        "      <td>0.308667</td>\n",
 784 |        "      <td>hours</td>\n",
 785 |        "    </tr>\n",
 786 |        "    <tr>\n",
 787 |        "      <th>2</th>\n",
 788 |        "      <td>12345678</td>\n",
 789 |        "      <td>30.960000</td>\n",
 790 |        "      <td>hours</td>\n",
 791 |        "    </tr>\n",
 792 |        "    <tr>\n",
 793 |        "      <th>3</th>\n",
 794 |        "      <td>1234</td>\n",
 795 |        "      <td>0.003086</td>\n",
 796 |        "      <td>hours</td>\n",
 797 |        "    </tr>\n",
 798 |        "    <tr>\n",
 799 |        "      <th>5</th>\n",
 800 |        "      <td>12345</td>\n",
 801 |        "      <td>0.030833</td>\n",
 802 |        "      <td>hours</td>\n",
 803 |        "    </tr>\n",
 804 |        "    <tr>\n",
 805 |        "      <th>11</th>\n",
 806 |        "      <td>696969</td>\n",
 807 |        "      <td>0.308667</td>\n",
 808 |        "      <td>hours</td>\n",
 809 |        "    </tr>\n",
 810 |        "  </tbody>\n",
 811 |        "</table>\n",
 812 |        "</div>"
 813 |       ],
 814 |       "text/plain": [
 815 |        "    password      value time_unit\n",
 816 |        "1     123456   0.308667     hours\n",
 817 |        "2   12345678  30.960000     hours\n",
 818 |        "3       1234   0.003086     hours\n",
 819 |        "5      12345   0.030833     hours\n",
 820 |        "11    696969   0.308667     hours"
 821 |       ]
 822 |      },
 823 |      "execution_count": 18,
 824 |      "metadata": {},
 825 |      "output_type": "execute_result"
 826 |     }
 827 |    ],
 828 |    "source": [
 829 |     "df[df['password'].str.contains(r\".*[0-9].*\")].head()"
 830 |    ]
 831 |   },
 832 |   {
 833 |    "cell_type": "markdown",
 834 |    "metadata": {},
 835 |    "source": [
 836 |     "### 8."
 837 |    ]
 838 |   },
 839 |   {
 840 |    "cell_type": "markdown",
 841 |    "metadata": {},
 842 |    "source": [
 843 |     "Is there an obvious difference in online cracking time between passwords that don't contain a number vs passwords that contain at least one number?"
 844 |    ]
 845 |   },
 846 |   {
 847 |    "cell_type": "code",
 848 |    "execution_count": 19,
 849 |    "metadata": {},
 850 |    "outputs": [
 851 |     {
 852 |      "name": "stdout",
 853 |      "output_type": "stream",
 854 |      "text": [
 855 |       "        Avg. time to crack passwords without a number: 8095 hrs\n",
 856 |       "Avg. time to crack passwords with at least one number: 62005 hrs\n"
 857 |      ]
 858 |     }
 859 |    ],
 860 |    "source": [
 861 |     "print(f\"        Avg. time to crack passwords without a number: {df[df['password'].str.contains(r'^[^0-9]*$')]['value'].mean():.0f} hrs\")\n",
 862 |     "print(f\"Avg. time to crack passwords with at least one number: {df[df['password'].str.contains(r'.*[0-9].*')]['value'].mean():.0f} hrs\")"
 863 |    ]
 864 |   },
 865 |   {
 866 |    "cell_type": "markdown",
 867 |    "metadata": {},
 868 |    "source": [
 869 |     "### 9."
 870 |    ]
 871 |   },
 872 |   {
 873 |    "cell_type": "markdown",
 874 |    "metadata": {},
 875 |    "source": [
 876 |     "How many passwords contain at least one of the following punctuations: `[.!?\\\\-]` (hint: remember this dataset contains *weak* passwords...)?"
 877 |    ]
 878 |   },
 879 |   {
 880 |    "cell_type": "code",
 881 |    "execution_count": 20,
 882 |    "metadata": {},
 883 |    "outputs": [
 884 |     {
 885 |      "data": {
 886 |       "text/html": [
 887 |        "<div>\n",
 888 |        "<style scoped>\n",
 889 |        "    .dataframe tbody tr th:only-of-type {\n",
 890 |        "        vertical-align: middle;\n",
 891 |        "    }\n",
 892 |        "\n",
 893 |        "    .dataframe tbody tr th {\n",
 894 |        "        vertical-align: top;\n",
 895 |        "    }\n",
 896 |        "\n",
 897 |        "    .dataframe thead th {\n",
 898 |        "        text-align: right;\n",
 899 |        "    }\n",
 900 |        "</style>\n",
 901 |        "<table border=\"1\" class=\"dataframe\">\n",
 902 |        "  <thead>\n",
 903 |        "    <tr style=\"text-align: right;\">\n",
 904 |        "      <th></th>\n",
 905 |        "      <th>password</th>\n",
 906 |        "      <th>value</th>\n",
 907 |        "      <th>time_unit</th>\n",
 908 |        "    </tr>\n",
 909 |        "  </thead>\n",
 910 |        "  <tbody>\n",
 911 |        "  </tbody>\n",
 912 |        "</table>\n",
 913 |        "</div>"
 914 |       ],
 915 |       "text/plain": [
 916 |        "Empty DataFrame\n",
 917 |        "Columns: [password, value, time_unit]\n",
 918 |        "Index: []"
 919 |       ]
 920 |      },
 921 |      "execution_count": 20,
 922 |      "metadata": {},
 923 |      "output_type": "execute_result"
 924 |     }
 925 |    ],
 926 |    "source": [
 927 |     "df[df['password'].str.contains(r'[.!?\\\\-]')]"
 928 |    ]
 929 |   },
 930 |   {
 931 |    "cell_type": "markdown",
 932 |    "metadata": {},
 933 |    "source": [
 934 |     "### 10."
 935 |    ]
 936 |   },
 937 |   {
 938 |    "cell_type": "markdown",
 939 |    "metadata": {},
 940 |    "source": [
 941 |     "Which password(s) in the datasets took the shortest time to crack by online guessing? Which took the longest?"
 942 |    ]
 943 |   },
 944 |   {
 945 |    "cell_type": "code",
 946 |    "execution_count": 21,
 947 |    "metadata": {},
 948 |    "outputs": [
 949 |     {
 950 |      "data": {
 951 |       "text/html": [
 952 |        "<div>\n",
 953 |        "<style scoped>\n",
 954 |        "    .dataframe tbody tr th:only-of-type {\n",
 955 |        "        vertical-align: middle;\n",
 956 |        "    }\n",
 957 |        "\n",
 958 |        "    .dataframe tbody tr th {\n",
 959 |        "        vertical-align: top;\n",
 960 |        "    }\n",
 961 |        "\n",
 962 |        "    .dataframe thead th {\n",
 963 |        "        text-align: right;\n",
 964 |        "    }\n",
 965 |        "</style>\n",
 966 |        "<table border=\"1\" class=\"dataframe\">\n",
 967 |        "  <thead>\n",
 968 |        "    <tr style=\"text-align: right;\">\n",
 969 |        "      <th></th>\n",
 970 |        "      <th>password</th>\n",
 971 |        "      <th>value</th>\n",
 972 |        "      <th>time_unit</th>\n",
 973 |        "    </tr>\n",
 974 |        "  </thead>\n",
 975 |        "  <tbody>\n",
 976 |        "    <tr>\n",
 977 |        "      <th>3</th>\n",
 978 |        "      <td>1234</td>\n",
 979 |        "      <td>0.003086</td>\n",
 980 |        "      <td>hours</td>\n",
 981 |        "    </tr>\n",
 982 |        "    <tr>\n",
 983 |        "      <th>19</th>\n",
 984 |        "      <td>2000</td>\n",
 985 |        "      <td>0.003086</td>\n",
 986 |        "      <td>hours</td>\n",
 987 |        "    </tr>\n",
 988 |        "    <tr>\n",
 989 |        "      <th>44</th>\n",
 990 |        "      <td>6969</td>\n",
 991 |        "      <td>0.003086</td>\n",
 992 |        "      <td>hours</td>\n",
 993 |        "    </tr>\n",
 994 |        "    <tr>\n",
 995 |        "      <th>76</th>\n",
 996 |        "      <td>1111</td>\n",
 997 |        "      <td>0.003086</td>\n",
 998 |        "      <td>hours</td>\n",
 999 |        "    </tr>\n",
1000 |        "    <tr>\n",
1001 |        "      <th>276</th>\n",
1002 |        "      <td>5150</td>\n",
1003 |        "      <td>0.003086</td>\n",
1004 |        "      <td>hours</td>\n",
1005 |        "    </tr>\n",
1006 |        "    <tr>\n",
1007 |        "      <th>314</th>\n",
1008 |        "      <td>2112</td>\n",
1009 |        "      <td>0.003086</td>\n",
1010 |        "      <td>hours</td>\n",
1011 |        "    </tr>\n",
1012 |        "    <tr>\n",
1013 |        "      <th>315</th>\n",
1014 |        "      <td>1212</td>\n",
1015 |        "      <td>0.003086</td>\n",
1016 |        "      <td>hours</td>\n",
1017 |        "    </tr>\n",
1018 |        "    <tr>\n",
1019 |        "      <th>324</th>\n",
1020 |        "      <td>7777</td>\n",
1021 |        "      <td>0.003086</td>\n",
1022 |        "      <td>hours</td>\n",
1023 |        "    </tr>\n",
1024 |        "    <tr>\n",
1025 |        "      <th>371</th>\n",
1026 |        "      <td>2222</td>\n",
1027 |        "      <td>0.003086</td>\n",
1028 |        "      <td>hours</td>\n",
1029 |        "    </tr>\n",
1030 |        "    <tr>\n",
1031 |        "      <th>373</th>\n",
1032 |        "      <td>4444</td>\n",
1033 |        "      <td>0.003086</td>\n",
1034 |        "      <td>hours</td>\n",
1035 |        "    </tr>\n",
1036 |        "    <tr>\n",
1037 |        "      <th>429</th>\n",
1038 |        "      <td>1313</td>\n",
1039 |        "      <td>0.003086</td>\n",
1040 |        "      <td>hours</td>\n",
1041 |        "    </tr>\n",
1042 |        "  </tbody>\n",
1043 |        "</table>\n",
1044 |        "</div>"
1045 |       ],
1046 |       "text/plain": [
1047 |        "    password     value time_unit\n",
1048 |        "3       1234  0.003086     hours\n",
1049 |        "19      2000  0.003086     hours\n",
1050 |        "44      6969  0.003086     hours\n",
1051 |        "76      1111  0.003086     hours\n",
1052 |        "276     5150  0.003086     hours\n",
1053 |        "314     2112  0.003086     hours\n",
1054 |        "315     1212  0.003086     hours\n",
1055 |        "324     7777  0.003086     hours\n",
1056 |        "371     2222  0.003086     hours\n",
1057 |        "373     4444  0.003086     hours\n",
1058 |        "429     1313  0.003086     hours"
1059 |       ]
1060 |      },
1061 |      "execution_count": 21,
1062 |      "metadata": {},
1063 |      "output_type": "execute_result"
1064 |     }
1065 |    ],
1066 |    "source": [
1067 |     "df.query(\"value == value.min()\")"
1068 |    ]
1069 |   },
1070 |   {
1071 |    "cell_type": "code",
1072 |    "execution_count": 22,
1073 |    "metadata": {},
1074 |    "outputs": [
1075 |     {
1076 |      "data": {
1077 |       "text/html": [
1078 |        "<div>\n",
1079 |        "<style scoped>\n",
1080 |        "    .dataframe tbody tr th:only-of-type {\n",
1081 |        "        vertical-align: middle;\n",
1082 |        "    }\n",
1083 |        "\n",
1084 |        "    .dataframe tbody tr th {\n",
1085 |        "        vertical-align: top;\n",
1086 |        "    }\n",
1087 |        "\n",
1088 |        "    .dataframe thead th {\n",
1089 |        "        text-align: right;\n",
1090 |        "    }\n",
1091 |        "</style>\n",
1092 |        "<table border=\"1\" class=\"dataframe\">\n",
1093 |        "  <thead>\n",
1094 |        "    <tr style=\"text-align: right;\">\n",
1095 |        "      <th></th>\n",
1096 |        "      <th>password</th>\n",
1097 |        "      <th>value</th>\n",
1098 |        "      <th>time_unit</th>\n",
1099 |        "    </tr>\n",
1100 |        "  </thead>\n",
1101 |        "  <tbody>\n",
1102 |        "    <tr>\n",
1103 |        "      <th>25</th>\n",
1104 |        "      <td>trustno1</td>\n",
1105 |        "      <td>808285.2</td>\n",
1106 |        "      <td>hours</td>\n",
1107 |        "    </tr>\n",
1108 |        "    <tr>\n",
1109 |        "      <th>335</th>\n",
1110 |        "      <td>rush2112</td>\n",
1111 |        "      <td>808285.2</td>\n",
1112 |        "      <td>hours</td>\n",
1113 |        "    </tr>\n",
1114 |        "    <tr>\n",
1115 |        "      <th>405</th>\n",
1116 |        "      <td>jordan23</td>\n",
1117 |        "      <td>808285.2</td>\n",
1118 |        "      <td>hours</td>\n",
1119 |        "    </tr>\n",
1120 |        "    <tr>\n",
1121 |        "      <th>499</th>\n",
1122 |        "      <td>passw0rd</td>\n",
1123 |        "      <td>808285.2</td>\n",
1124 |        "      <td>hours</td>\n",
1125 |        "    </tr>\n",
1126 |        "  </tbody>\n",
1127 |        "</table>\n",
1128 |        "</div>"
1129 |       ],
1130 |       "text/plain": [
1131 |        "     password     value time_unit\n",
1132 |        "25   trustno1  808285.2     hours\n",
1133 |        "335  rush2112  808285.2     hours\n",
1134 |        "405  jordan23  808285.2     hours\n",
1135 |        "499  passw0rd  808285.2     hours"
1136 |       ]
1137 |      },
1138 |      "execution_count": 22,
1139 |      "metadata": {},
1140 |      "output_type": "execute_result"
1141 |     }
1142 |    ],
1143 |    "source": [
1144 |     "df.query(\"value == value.max()\")"
1145 |    ]
1146 |   }
1147 |  ],
1148 |  "metadata": {
1149 |   "kernelspec": {
1150 |    "display_name": "Python 3",
1151 |    "language": "python",
1152 |    "name": "python3"
1153 |   },
1154 |   "language_info": {
1155 |    "codemirror_mode": {
1156 |     "name": "ipython",
1157 |     "version": 3
1158 |    },
1159 |    "file_extension": ".py",
1160 |    "mimetype": "text/x-python",
1161 |    "name": "python",
1162 |    "nbconvert_exporter": "python",
1163 |    "pygments_lexer": "ipython3",
1164 |    "version": "3.7.8"
1165 |   }
1166 |  },
1167 |  "nbformat": 4,
1168 |  "nbformat_minor": 4
1169 | }
1170 | 


--------------------------------------------------------------------------------