├── EDA ├── EDA.pdf └── Notebooks │ └── eda_empty.ipynb ├── Intro ├── Intro.pdf ├── Notebooks │ ├── data │ │ └── array.txt │ ├── 100_Numpy_exercises_with_solutions.md │ └── 100_Numpy_exercises.ipynb └── NumPy.pdf ├── Statistics └── Statistics.pdf ├── requirements.txt ├── Data Manipulation ├── Data Manipulation.pdf └── Notebooks │ ├── data_cleaning_empty.ipynb │ ├── pandas_basics_empty.ipynb │ ├── pandas_basics.ipynb │ ├── data_cleaning.ipynb │ └── data │ └── titanic.csv ├── Data Visualization ├── Data Visualization pt.1.pdf ├── Data Visualization pt.2.pdf ├── Data Visualization pt.3.pdf └── Notebooks │ ├── data │ ├── salary_by_vivodership.csv │ ├── eu_gdp_per_capita_by_country_2019.csv │ ├── US_energy_sector.csv │ └── titanic.csv │ ├── data_visualization_pt3_empty.ipynb │ ├── data_visualization_pt2_empty.ipynb │ └── data_visualization_pt1_empty.ipynb ├── .gitignore ├── LICENSE └── README.md /EDA/EDA.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GHOST-Science-Club/Intro-To-Data-Science-Notebooks/HEAD/EDA/EDA.pdf -------------------------------------------------------------------------------- /Intro/Intro.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GHOST-Science-Club/Intro-To-Data-Science-Notebooks/HEAD/Intro/Intro.pdf -------------------------------------------------------------------------------- /Intro/Notebooks/data/array.txt: -------------------------------------------------------------------------------- 1 | 1.000000000000000000e+00 2 | 2.000000000000000000e+00 3 | 3.000000000000000000e+00 4 | -------------------------------------------------------------------------------- /Intro/NumPy.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GHOST-Science-Club/Intro-To-Data-Science-Notebooks/HEAD/Intro/NumPy.pdf -------------------------------------------------------------------------------- /Statistics/Statistics.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GHOST-Science-Club/Intro-To-Data-Science-Notebooks/HEAD/Statistics/Statistics.pdf -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scikit-learn==1.5.1 2 | scipy==1.11.4 3 | numpy==1.26.4 4 | pandas==2.2.2 5 | statsmodels==0.14.4 6 | matplotlib==3.9.1 7 | seaborn==0.13.2 -------------------------------------------------------------------------------- /Data Manipulation/Data Manipulation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GHOST-Science-Club/Intro-To-Data-Science-Notebooks/HEAD/Data Manipulation/Data Manipulation.pdf -------------------------------------------------------------------------------- /Data Visualization/Data Visualization pt.1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GHOST-Science-Club/Intro-To-Data-Science-Notebooks/HEAD/Data Visualization/Data Visualization pt.1.pdf -------------------------------------------------------------------------------- /Data Visualization/Data Visualization pt.2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GHOST-Science-Club/Intro-To-Data-Science-Notebooks/HEAD/Data Visualization/Data Visualization pt.2.pdf -------------------------------------------------------------------------------- /Data Visualization/Data Visualization pt.3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GHOST-Science-Club/Intro-To-Data-Science-Notebooks/HEAD/Data Visualization/Data Visualization pt.3.pdf -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ignore 2 | Data manipulation pt. 1/ignore 3 | Data manipulation pt. 2/ignore 4 | Data Visualization pt. 1/ignore 5 | Data Visualization pt. 2/ignore 6 | Data Visualization pt. 3/ignore 7 | Project -------------------------------------------------------------------------------- /Data Visualization/Notebooks/data/salary_by_vivodership.csv: -------------------------------------------------------------------------------- 1 | voivodeship_name,salary 2 | dolnośląskie,6562.04 3 | kujawsko-pomorskie,5633.52 4 | lubelskie,5646.97 5 | lubuskie,5706.47 6 | łódzkie,5903.19 7 | małopolskie,6411.04 8 | mazowieckie,7508.56 9 | opolskie,5835.92 10 | podkarpackie,5452.12 11 | podlaskie,5742.12 12 | pomorskie,6297.51 13 | śląskie,6388.82 14 | świętokrzyskie,5505.49 15 | warmińsko-mazurskie,5427.81 16 | wielkopolskie,5789.84 17 | zachodniopomorskie,5809.68 -------------------------------------------------------------------------------- /Data Visualization/Notebooks/data/eu_gdp_per_capita_by_country_2019.csv: -------------------------------------------------------------------------------- 1 | country,gdp_per_capita (USD) 2 | Austria,55060 3 | Belgium,46421 4 | Bulgaria,9828 5 | Croation,14936 6 | Cyprus,27858 7 | Czech Republic,23495 8 | Denmark,60170 9 | Estonia,23723 10 | Finland,48783 11 | France,40494 12 | Germany,46445 13 | Greece,19583 14 | Hungary,16732 15 | Ireland,78661 16 | Italy,33228 17 | Latvia,17829 18 | Lithuania,19602 19 | Luxembourg,114705 20 | Malta,29821 21 | Netherlands,52331 22 | Poland,13989 23 | Portugal,23252 24 | Romania,16222 25 | Slovakia,19266 26 | Slovenia,25946 27 | Spain,29600 28 | Sweden,51615 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Jędrzej Ogrodowski & Maksymilian Norkiewicz 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Data Visualization/Notebooks/data_visualization_pt3_empty.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Libraries" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import pandas as pd\n", 18 | "import matplotlib.pyplot as plt\n", 19 | "\n", 20 | "import geopandas as gpd\n", 21 | "import geoplot as gplt\n", 22 | "import geoplot.crs as gcrs\n", 23 | "\n", 24 | "import warnings\n", 25 | "warnings.filterwarnings('ignore')" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "# Data" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "# Map" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [] 55 | } 56 | ], 57 | "metadata": { 58 | "kernelspec": { 59 | "display_name": "Python 3", 60 | "language": "python", 61 | "name": "python3" 62 | }, 63 | "language_info": { 64 | "codemirror_mode": { 65 | "name": "ipython", 66 | "version": 3 67 | }, 68 | "file_extension": ".py", 69 | "mimetype": "text/x-python", 70 | "name": "python", 71 | "nbconvert_exporter": "python", 72 | "pygments_lexer": "ipython3", 73 | "version": "3.12.2" 74 | } 75 | }, 76 | "nbformat": 4, 77 | "nbformat_minor": 2 78 | } 79 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Intro to Data Science 2 | 3 | This repository is dedicated to the "Intro to Data Science" section of the GHOST science club. The section is designed to introduce participants to key concepts and methodologies related to data analysis and data science. 4 | 5 | ![Image](https://github.com/user-attachments/assets/bfecdfe4-0ae2-4857-956b-d08802af64a3) 6 | 7 | **Scope** 8 | 9 | The section focuses on the Data Analysis Process, covering the following topics: 10 | 11 | - **Linear Algebra**: Fundamental mathematical operations used in data analysis, such as matrices, vectors, and transformations. 12 | - **Statistics**: Statistical concepts that aid in understanding and interpreting data. 13 | - **Data Cleaning**: The process of cleaning data, identifying and correcting errors in datasets. 14 | - **Data Visualization**: Tools and techniques for presenting analysis results in graphical form. 15 | - **Exploratory Data Analysis (EDA)**: A process of exploring data to discover patterns, relationships, and trends. 16 | 17 | **Repository Contents** 18 | 19 | This repository contains educational resources that support participants in gaining practical skills in data science: 20 | 21 | - **Presentations**: Slides used during the meetings, covering the theoretical foundations of the discussed topics. 22 | - **Datasets**: Data used during the meetings for analysis and hands-on tasks. 23 | - **Notebooks**: Interactive notebooks with Python code that allow participants to work independently and experiment with data analysis. 24 | 25 |
26 | 27 | # Data Analysis Competition 2025 28 |

29 | 30 | 31 |

32 | 33 | Winning projects:
34 | [First place 🥇](https://github.com/janekandrz/sp_final)
35 | [Second place 🥈](https://github.com/AntoniPoszkuta/poker_stats)
36 | [Third place 🥉](https://github.com/gruuubcioo/Alcohol-consumption-in-Europe) 37 | -------------------------------------------------------------------------------- /Data Visualization/Notebooks/data_visualization_pt2_empty.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Libraries" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import pandas as pd\n", 18 | "import matplotlib.pyplot as plt\n", 19 | "import seaborn as sns\n", 20 | "\n", 21 | "import warnings\n", 22 | "warnings.filterwarnings(\"ignore\")" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# Data" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "london_bikes = pd.read_csv(\"./data/london_bikes.csv\")\n", 39 | "london_bikes.head()" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "titanic = pd.read_csv(\"./data/titanic.csv\")\n", 49 | "titanic.head()" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "energy = pd.read_csv(\"./data/US_energy_sector.csv\")\n", 59 | "energy.head()" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "# Scatter plot" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "# Pie vs. Bar plot comparison" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "# Line/Area plot" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [] 103 | } 104 | ], 105 | "metadata": { 106 | "kernelspec": { 107 | "display_name": "Python 3", 108 | "language": "python", 109 | "name": "python3" 110 | }, 111 | "language_info": { 112 | "codemirror_mode": { 113 | "name": "ipython", 114 | "version": 3 115 | }, 116 | "file_extension": ".py", 117 | "mimetype": "text/x-python", 118 | "name": "python", 119 | "nbconvert_exporter": "python", 120 | "pygments_lexer": "ipython3", 121 | "version": "3.12.2" 122 | } 123 | }, 124 | "nbformat": 4, 125 | "nbformat_minor": 2 126 | } 127 | -------------------------------------------------------------------------------- /EDA/Notebooks/eda_empty.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Libraries" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 2, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import pandas as pd\n", 18 | "import matplotlib.pyplot as plt\n", 19 | "import seaborn as sns\n", 20 | "\n", 21 | "import warnings\n", 22 | "warnings.filterwarnings(\"ignore\")" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# Data" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "data = pd.read_csv(\"./data/wina.pl_clean.csv\")\n", 39 | "data.head(1)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "# Is *Amerena* exist in dataset?" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "---\n", 61 | "\n", 62 | "# In which country the median price of wine is the highest?" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "---\n", 77 | "\n", 78 | "# What is correlation between alcohol content and price?" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "---\n", 93 | "\n", 94 | "# What is correlation between vintage and price?" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "\n", 109 | "\n", 110 | "---\n", 111 | "\n", 112 | "# Regional Specialties. Which regions are associated with specific kinds of wine?" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [] 121 | } 122 | ], 123 | "metadata": { 124 | "kernelspec": { 125 | "display_name": "Python 3", 126 | "language": "python", 127 | "name": "python3" 128 | }, 129 | "language_info": { 130 | "codemirror_mode": { 131 | "name": "ipython", 132 | "version": 3 133 | }, 134 | "file_extension": ".py", 135 | "mimetype": "text/x-python", 136 | "name": "python", 137 | "nbconvert_exporter": "python", 138 | "pygments_lexer": "ipython3", 139 | "version": "3.12.2" 140 | } 141 | }, 142 | "nbformat": 4, 143 | "nbformat_minor": 2 144 | } 145 | -------------------------------------------------------------------------------- /Data Visualization/Notebooks/data_visualization_pt1_empty.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Libraries" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import pandas as pd\n", 18 | "import matplotlib.pyplot as plt\n", 19 | "import seaborn as sns\n", 20 | "\n", 21 | "import warnings\n", 22 | "warnings.filterwarnings(\"ignore\")" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# Data" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "data = pd.read_csv(\"./data/titanic.csv\")\n", 39 | "data.head()" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "# Histogram" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "# KDE plot" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "# Stacked histograms" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "# Stacked KDE plots" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "# Overlapping histograms" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "# Overlapping KDE plots" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "# Box plot" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "# Violin plot" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "# Strip plot" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "# Q-Q plot" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [] 181 | } 182 | ], 183 | "metadata": { 184 | "kernelspec": { 185 | "display_name": "Python 3", 186 | "language": "python", 187 | "name": "python3" 188 | }, 189 | "language_info": { 190 | "codemirror_mode": { 191 | "name": "ipython", 192 | "version": 3 193 | }, 194 | "file_extension": ".py", 195 | "mimetype": "text/x-python", 196 | "name": "python", 197 | "nbconvert_exporter": "python", 198 | "pygments_lexer": "ipython3", 199 | "version": "3.12.2" 200 | } 201 | }, 202 | "nbformat": 4, 203 | "nbformat_minor": 2 204 | } 205 | -------------------------------------------------------------------------------- /Data Manipulation/Notebooks/data_cleaning_empty.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Libraries" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# Data import\n", 22 | "\n", 23 | "source: https://www.kaggle.com/datasets/skamlo/wine-price-on-polish-market" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "# Print basic informations about columns" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "# Remove products which is not wines\n", 52 | "\n", 53 | "To remove: `idxs = [460, 462]`\n", 54 | "\n", 55 | "Also pay attention to the column: `ilość win`" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "# Remove unnecessary columns\n", 70 | "\n", 71 | "Related columns: `link`, `wysyłka w ciągu`, `wino koszerne`, `ilość win`" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "# Name\n", 86 | "\n", 87 | "Related columns: `name`\n", 88 | "\n", 89 | "Units in product name: `['ml', 'Ml', 'ML', 'l', 'L', 'litrów', 'NV']`" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "# Price\n", 104 | "\n", 105 | "Related columns: `price`\n", 106 | "\n", 107 | "Watch out for \\xa0 sign!" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "# Region\n", 122 | "\n", 123 | "Related columns: `region`, `region2`" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "# Vintage\n", 138 | "\n", 139 | "Related columns: `rocznik`, `rocznik2`, `rocznik3`" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "# Volume\n", 154 | "\n", 155 | "Related columns: `objętość`, `pojemność`" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "# Alcohol\n", 170 | "\n", 171 | "Related columns: `alkohol zawartość`, `zawartość alkoholu2`, `zawartość alkoholu`" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "# Serving temperature\n", 186 | "\n", 187 | "Related columns: `temperatura podawania`" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "# Color and kind\n", 202 | "\n", 203 | "Related columns: `rodzaj wina`, `rodzaj wina2`" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | "# Medals\n", 218 | "\n", 219 | "Related columns: `medale`, `medale2`, `medale3`" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "# Wegan and Natural\n", 234 | "\n", 235 | "Related columns: `bio`, `bio2`, `bio3`, `czy winnica bio`, `wino wegańskie`" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [] 244 | }, 245 | { 246 | "cell_type": "markdown", 247 | "metadata": {}, 248 | "source": [ 249 | "# Punctation\n", 250 | "\n", 251 | "Related columns: `guia penin`, `falstaff`, `james suckling`, `oceny prasy`, `wine spectator`" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [] 260 | }, 261 | { 262 | "cell_type": "markdown", 263 | "metadata": {}, 264 | "source": [ 265 | "# Grapes\n", 266 | "\n", 267 | "Related columns: `grona`, `grona[2-13]`" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": null, 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": {}, 280 | "source": [ 281 | "# Appellation\n", 282 | "\n", 283 | "Related columns: `apelacja`" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": null, 289 | "metadata": {}, 290 | "outputs": [], 291 | "source": [] 292 | } 293 | ], 294 | "metadata": { 295 | "kernelspec": { 296 | "display_name": "Python 3", 297 | "language": "python", 298 | "name": "python3" 299 | }, 300 | "language_info": { 301 | "codemirror_mode": { 302 | "name": "ipython", 303 | "version": 3 304 | }, 305 | "file_extension": ".py", 306 | "mimetype": "text/x-python", 307 | "name": "python", 308 | "nbconvert_exporter": "python", 309 | "pygments_lexer": "ipython3", 310 | "version": "3.12.2" 311 | }, 312 | "orig_nbformat": 4 313 | }, 314 | "nbformat": 4, 315 | "nbformat_minor": 2 316 | } 317 | -------------------------------------------------------------------------------- /Data Manipulation/Notebooks/pandas_basics_empty.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "
\n", 8 | "

\n", 9 | " Best knowledge source 😍 -\n", 10 | " here\n", 11 | "

\n", 12 | "
\n", 13 | "\n", 14 | "---" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# Library import" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "# Reading files\n", 36 | "\n", 37 | "Source: Brenda N - Kaggle" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "# Reading data from DataFrame" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "#### `.loc`" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "#### `.iloc`" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "# Basic informatin about dataset" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "#### `.info`" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "#### `.describe`" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "#### `.unique`" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "# Droping and filling empty cells" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "### `.dropna`" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "### `.dropna` inplace" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "### `.fillna`" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "# Droping rows and columns" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "#### Drop rows" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "#### Drop columns" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": {}, 212 | "outputs": [], 213 | "source": [] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "# Filtering" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "#### Filtering with multiple conditions" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "#### `.isnull()` and `.notnull()`" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": {}, 260 | "source": [ 261 | "#### `.isin`" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [] 270 | }, 271 | { 272 | "cell_type": "markdown", 273 | "metadata": {}, 274 | "source": [ 275 | "# Sorting" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "#### `.sort_values`" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": null, 288 | "metadata": {}, 289 | "outputs": [], 290 | "source": [] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "#### `.sort_index`" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": null, 302 | "metadata": {}, 303 | "outputs": [], 304 | "source": [] 305 | }, 306 | { 307 | "cell_type": "markdown", 308 | "metadata": {}, 309 | "source": [ 310 | "#### `.reset_index`" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": null, 316 | "metadata": {}, 317 | "outputs": [], 318 | "source": [] 319 | }, 320 | { 321 | "cell_type": "markdown", 322 | "metadata": {}, 323 | "source": [ 324 | "# Aggregation" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "#### `.mean`, `.count`, `.sum`, etc." 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": null, 337 | "metadata": {}, 338 | "outputs": [], 339 | "source": [] 340 | }, 341 | { 342 | "cell_type": "markdown", 343 | "metadata": {}, 344 | "source": [ 345 | "#### `.agg`" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": null, 351 | "metadata": {}, 352 | "outputs": [], 353 | "source": [] 354 | } 355 | ], 356 | "metadata": { 357 | "kernelspec": { 358 | "display_name": "Python 3", 359 | "language": "python", 360 | "name": "python3" 361 | }, 362 | "language_info": { 363 | "codemirror_mode": { 364 | "name": "ipython", 365 | "version": 3 366 | }, 367 | "file_extension": ".py", 368 | "mimetype": "text/x-python", 369 | "name": "python", 370 | "nbconvert_exporter": "python", 371 | "pygments_lexer": "ipython3", 372 | "version": "3.12.2" 373 | } 374 | }, 375 | "nbformat": 4, 376 | "nbformat_minor": 2 377 | } 378 | -------------------------------------------------------------------------------- /Data Manipulation/Notebooks/pandas_basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "
\n", 8 | "

\n", 9 | " Best knowledge source 😍 -\n", 10 | " here\n", 11 | "

\n", 12 | "
\n", 13 | "\n", 14 | "---" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# Library import" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "!pip install pandas" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 1, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "import pandas as pd" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "# Reading files\n", 47 | "\n", 48 | "Source: Brenda N - Titanic - Kaggle" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "data = pd.read_csv(\"./data/titanic.csv\")\n", 58 | "data.head() # tail or sample" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "# Reading data from DataFrame" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "data[\"Age\"]" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "#### `.loc`" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "data.loc[5]\n", 91 | "# data.loc[5:10]" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "data.loc[5:10, [\"Name\", \"Age\"]]" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "#### `.iloc`" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "data.iloc[5]" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "data.iloc[:, 3:6]" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "# Basic informatin about dataset" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "#### `.info`" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "data.info()" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "#### `.describe`" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "data.describe()" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 23, 170 | "metadata": {}, 171 | "outputs": [ 172 | { 173 | "data": { 174 | "text/plain": [ 175 | "30.272590361445783" 176 | ] 177 | }, 178 | "execution_count": 23, 179 | "metadata": {}, 180 | "output_type": "execute_result" 181 | } 182 | ], 183 | "source": [ 184 | "data['Age'].mean()" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "#### `.unique`" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [ 200 | "data['Pclass'].unique()" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": {}, 206 | "source": [ 207 | "# Droping and filling empty cells" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "### `.dropna`" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "metadata": {}, 221 | "outputs": [], 222 | "source": [ 223 | "data.sample(5)" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": {}, 230 | "outputs": [], 231 | "source": [ 232 | "data_copy = data.dropna(subset=[\"Age\"])\n", 233 | "# data_copy = data.dropna(subset=[\"Age\", \"Fare\"])\n", 234 | "data_copy" 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "metadata": {}, 240 | "source": [ 241 | "### `.dropna` inplace" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": null, 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [ 250 | "data.dropna(subset=[\"Age\"], inplace=True)\n", 251 | "len(data)" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": {}, 257 | "source": [ 258 | "### `.fillna`" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": {}, 265 | "outputs": [], 266 | "source": [ 267 | "data['Cabin'].fillna(\"not known\", inplace=True)\n", 268 | "data.head()" 269 | ] 270 | }, 271 | { 272 | "cell_type": "markdown", 273 | "metadata": {}, 274 | "source": [ 275 | "# Droping rows and columns" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "#### Drop rows" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": null, 288 | "metadata": {}, 289 | "outputs": [], 290 | "source": [ 291 | "data.drop(0).head()" 292 | ] 293 | }, 294 | { 295 | "cell_type": "markdown", 296 | "metadata": {}, 297 | "source": [ 298 | "#### Drop columns" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": null, 304 | "metadata": {}, 305 | "outputs": [], 306 | "source": [ 307 | "data.drop(\"PassengerId\", axis=1).head()" 308 | ] 309 | }, 310 | { 311 | "cell_type": "markdown", 312 | "metadata": {}, 313 | "source": [ 314 | "# Filtering" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": null, 320 | "metadata": {}, 321 | "outputs": [], 322 | "source": [ 323 | "data[data['Age'] > 30].head()" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": null, 329 | "metadata": {}, 330 | "outputs": [], 331 | "source": [ 332 | "data[data[\"Sex\"] == \"female\"].head()" 333 | ] 334 | }, 335 | { 336 | "cell_type": "markdown", 337 | "metadata": {}, 338 | "source": [ 339 | "#### Filtering with multiple conditions" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": null, 345 | "metadata": {}, 346 | "outputs": [], 347 | "source": [ 348 | "data_copy = data[(data['Age'] > 30) & (data['Sex'] == \"female\")]\n", 349 | "# data_copy = data[(data['Age'] > 30) | (data['Sex'] == \"female\")]\n", 350 | "print(len(data_copy))\n", 351 | "data_copy.head()" 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": {}, 357 | "source": [ 358 | "#### `.isnull()` and `.notnull()`" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": null, 364 | "metadata": {}, 365 | "outputs": [], 366 | "source": [ 367 | "data = pd.read_csv(\"./data/titanic.csv\")\n", 368 | "\n", 369 | "# data[data['Age'].notnull()].head()\n", 370 | "data[data['Age'].isnull()].head()" 371 | ] 372 | }, 373 | { 374 | "cell_type": "markdown", 375 | "metadata": {}, 376 | "source": [ 377 | "#### `.isin`" 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": null, 383 | "metadata": {}, 384 | "outputs": [], 385 | "source": [ 386 | "data[data['Pclass'].isin([1, 2])].head()" 387 | ] 388 | }, 389 | { 390 | "cell_type": "markdown", 391 | "metadata": {}, 392 | "source": [ 393 | "# Sorting" 394 | ] 395 | }, 396 | { 397 | "cell_type": "markdown", 398 | "metadata": {}, 399 | "source": [ 400 | "#### `.sort_values`" 401 | ] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": null, 406 | "metadata": {}, 407 | "outputs": [], 408 | "source": [ 409 | "data.sort_values(\"Age\")" 410 | ] 411 | }, 412 | { 413 | "cell_type": "markdown", 414 | "metadata": {}, 415 | "source": [ 416 | "#### `.sort_index`" 417 | ] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "execution_count": null, 422 | "metadata": {}, 423 | "outputs": [], 424 | "source": [ 425 | "data.sort_index()" 426 | ] 427 | }, 428 | { 429 | "cell_type": "markdown", 430 | "metadata": {}, 431 | "source": [ 432 | "#### `.reset_index`" 433 | ] 434 | }, 435 | { 436 | "cell_type": "code", 437 | "execution_count": null, 438 | "metadata": {}, 439 | "outputs": [], 440 | "source": [ 441 | "data.sort_values(\"Age\", inplace=True)\n", 442 | "data.reset_index(drop=True, inplace=True)\n", 443 | "data.head()" 444 | ] 445 | }, 446 | { 447 | "cell_type": "markdown", 448 | "metadata": {}, 449 | "source": [ 450 | "# Aggregation" 451 | ] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "execution_count": 94, 456 | "metadata": {}, 457 | "outputs": [], 458 | "source": [ 459 | "data = pd.read_csv(\"./data/titanic.csv\")" 460 | ] 461 | }, 462 | { 463 | "cell_type": "markdown", 464 | "metadata": {}, 465 | "source": [ 466 | "#### `.mean`, `.count`, `.sum`, etc." 467 | ] 468 | }, 469 | { 470 | "cell_type": "code", 471 | "execution_count": null, 472 | "metadata": {}, 473 | "outputs": [], 474 | "source": [ 475 | "data[[\"Sex\", \"Age\"]].groupby([\"Sex\"]).mean()" 476 | ] 477 | }, 478 | { 479 | "cell_type": "code", 480 | "execution_count": null, 481 | "metadata": {}, 482 | "outputs": [], 483 | "source": [ 484 | "data[[\"Sex\", \"PassengerId\"]].groupby(\"Sex\").count()" 485 | ] 486 | }, 487 | { 488 | "cell_type": "code", 489 | "execution_count": null, 490 | "metadata": {}, 491 | "outputs": [], 492 | "source": [ 493 | "data[[\"Sex\", \"Fare\"]].groupby(\"Sex\").sum()" 494 | ] 495 | }, 496 | { 497 | "cell_type": "markdown", 498 | "metadata": {}, 499 | "source": [ 500 | "#### `.agg`" 501 | ] 502 | }, 503 | { 504 | "cell_type": "code", 505 | "execution_count": null, 506 | "metadata": {}, 507 | "outputs": [], 508 | "source": [ 509 | "data[[\"Sex\", \"Fare\"]].groupby(\"Sex\").agg([\"sum\", \"mean\", \"count\"])" 510 | ] 511 | } 512 | ], 513 | "metadata": { 514 | "kernelspec": { 515 | "display_name": "Python 3", 516 | "language": "python", 517 | "name": "python3" 518 | }, 519 | "language_info": { 520 | "codemirror_mode": { 521 | "name": "ipython", 522 | "version": 3 523 | }, 524 | "file_extension": ".py", 525 | "mimetype": "text/x-python", 526 | "name": "python", 527 | "nbconvert_exporter": "python", 528 | "pygments_lexer": "ipython3", 529 | "version": "3.12.2" 530 | } 531 | }, 532 | "nbformat": 4, 533 | "nbformat_minor": 2 534 | } 535 | -------------------------------------------------------------------------------- /Data Manipulation/Notebooks/data_cleaning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Libraries" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import pandas as pd" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "# Data import\n", 25 | "\n", 26 | "source: https://www.kaggle.com/datasets/skamlo/wine-price-on-polish-market" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "data = pd.read_csv(\"./data/wina.pl_raw.csv\")\n", 36 | "data.head(1)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "# Print basic informations about columns" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "data[sorted(data.columns)].info()" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "# Remove products which is not wines\n", 60 | "\n", 61 | "There are a few product that are not a wine: `Chateau ISH Sparkling Rose w puszce (bezalkoholowe)`, `Chateau ISH Sparkling White w puszce (bezalkoholowe)`" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 70, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "data = data[~(data['name'] == 'Chateau ISH Sparkling Rose w puszce (bezalkoholowe)')]\n", 71 | "data = data[~(data['name'] == 'Chateau ISH Sparkling White w puszce (bezalkoholowe)')]\n", 72 | "data = data[data['ilość win'].isnull()]\n", 73 | "data.reset_index(inplace=True, drop=True)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "# Remove unnecessary columns" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 71, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "data.drop(['ilość win', 'wino koszerne', 'producent', 'wysyłka w ciągu', 'temperatura podawania2'], axis=1, inplace=True)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "# Name\n", 97 | "\n", 98 | "Related columns: `name`\n", 99 | "\n", 100 | "Units in product name: `['ml', 'Ml', 'ML', 'l', 'L', 'litrów', 'NV']`" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 72, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "def is_number(text: str) -> bool:\n", 110 | " '''Check if text contain a number.'''\n", 111 | " for digit in [str(i) for i in range(10)]:\n", 112 | " if digit in text:\n", 113 | " return True\n", 114 | " return False\n", 115 | "\n", 116 | "def is_unit(text: str) -> bool:\n", 117 | " if text in ['ml', 'Ml', 'ML', 'l', 'L', 'litrów', 'NV']:\n", 118 | " return True\n", 119 | " return False\n", 120 | "\n", 121 | "def convert_name(name: str) -> str:\n", 122 | " words = []\n", 123 | " for word in name.split():\n", 124 | " if is_number(word):\n", 125 | " continue\n", 126 | "\n", 127 | " if is_unit(word):\n", 128 | " continue\n", 129 | "\n", 130 | " words.append(word)\n", 131 | " \n", 132 | " return \" \".join(words)\n", 133 | "\n", 134 | "data['name'] = data[\"name\"].transform(convert_name)" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "# Price\n", 142 | "\n", 143 | "Related columns: `price`\n", 144 | "\n", 145 | "Watch out for \\xa0 sign!" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 73, 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "def convert_price(price:str) -> float:\n", 155 | " price = price.rstrip(\" zł\")\n", 156 | " price = price.replace(\",\", \".\").replace(\" \", \"\").replace(u\"\\xa0\", \"\")\n", 157 | " return float(price)\n", 158 | "\n", 159 | "data[\"price\"] = data[\"price\"].transform(convert_price)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "# Region\n", 167 | "\n", 168 | "Related columns: `region`, `region2`" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 74, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "def concat_region(row:pd.Series) -> pd.Series:\n", 178 | " if (not pd.isna(row[\"region\"])) and (not pd.isna(row[\"region2\"])):\n", 179 | " row[\"region\"] += \", \" + row[\"region2\"]\n", 180 | " return row\n", 181 | "\n", 182 | "data = data.transform(concat_region, axis=1)\n", 183 | "data.drop([\"region2\"], axis=1, inplace=True)" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "# Vintage\n", 191 | "\n", 192 | "Related columns: `rocznik`, `rocznik2`, `rocznik3`" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "data[(data['rocznik2'].notnull()) | (data['rocznik3'].notnull())][[\"rocznik\", \"rocznik2\", \"rocznik3\"]]" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "data['rocznik'] = data['rocznik'].replace('NV', np.nan)\n", 211 | "\n", 212 | "def choose_largest_vintage(row:pd.Series) -> pd.Series:\n", 213 | " vintages = []\n", 214 | " for vintage in row[[\"rocznik\", \"rocznik2\", \"rocznik3\"]].to_list():\n", 215 | " try:\n", 216 | " vintage = int(vintage)\n", 217 | " except:\n", 218 | " continue\n", 219 | " vintages.append(vintage)\n", 220 | " \n", 221 | " row[\"rocznik\"] = np.nan if vintages == [] else max(vintages)\n", 222 | "\n", 223 | " return row\n", 224 | "\n", 225 | "data = data.transform(choose_largest_vintage, axis=1)\n", 226 | "data.rename(columns={\"rocznik\": \"vintage\"}, inplace=True)\n", 227 | "data.drop(['rocznik2', 'rocznik3'], axis=1, inplace=True)" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "# Volume\n", 235 | "\n", 236 | "Related columns: `objętość`, `pojemność`" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 76, 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [ 245 | "def fill_empty_volume(row:pd.Series) -> pd.Series:\n", 246 | " if not pd.isna(row[\"objętość\"]) and pd.isna(row[\"pojemność\"]):\n", 247 | " row[\"pojemność\"] = row[\"objętość\"]\n", 248 | " return row\n", 249 | "\n", 250 | "data = data.transform(fill_empty_volume, axis=1)\n", 251 | "data.drop([\"objętość\"], axis=1, inplace=True)\n", 252 | "\n", 253 | "def convert_volume(volume:str) -> float:\n", 254 | " if not pd.isna(volume):\n", 255 | " volume, unit = volume.split()\n", 256 | " volume = volume.replace(\",\", \".\")\n", 257 | " if unit == \"ml\":\n", 258 | " volume = float(volume) / 1000\n", 259 | " elif unit == \"L\":\n", 260 | " volume = float(volume)\n", 261 | " return volume\n", 262 | "\n", 263 | "data['pojemność'] = data['pojemność'].transform(convert_volume)\n", 264 | "data.rename(columns={\"pojemność\": \"volume (liters)\"}, inplace=True)" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": {}, 270 | "source": [ 271 | "# Alcohol\n", 272 | "\n", 273 | "Related columns: `alkohol zawartość`, `zawartość alkoholu2`, `zawartość alkoholu`" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 77, 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [ 282 | "def fill_empty_alcohol(row:pd.Series) -> pd.Series:\n", 283 | " if not pd.isna(row[\"alkohol zawartość\"]) and pd.isna(row[\"zawartość alkoholu\"]):\n", 284 | " row[\"zawartość alkoholu\"] = row[\"alkohol zawartość\"]\n", 285 | " return row\n", 286 | "\n", 287 | "data = data.transform(fill_empty_alcohol, axis=1)\n", 288 | "data.drop([\"alkohol zawartość\", \"zawartość alkoholu2\"], axis=1, inplace=True)\n", 289 | "\n", 290 | "def convert_alcohol(alcohol:str) -> float:\n", 291 | " if pd.isna(alcohol):\n", 292 | " return alcohol\n", 293 | " \n", 294 | " alcohol = alcohol.rstrip(\"%\").rstrip(\" \").replace(\",\", \".\")\n", 295 | " return float(alcohol)\n", 296 | "\n", 297 | "data[\"zawartość alkoholu\"] = data[\"zawartość alkoholu\"].transform(convert_alcohol)\n", 298 | "data.rename(columns={\"zawartość alkoholu\": \"alcohol (%)\"}, inplace=True)" 299 | ] 300 | }, 301 | { 302 | "cell_type": "markdown", 303 | "metadata": {}, 304 | "source": [ 305 | "# Serving temperature\n", 306 | "\n", 307 | "Related columns: `temperatura podawania`" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 78, 313 | "metadata": {}, 314 | "outputs": [], 315 | "source": [ 316 | "def convert_temperature(temp:str) -> str:\n", 317 | " if pd.isnull(temp):\n", 318 | " return temp\n", 319 | " return temp.rstrip(\" st.C\")\n", 320 | "\n", 321 | "data[\"temperatura podawania\"] = data[\"temperatura podawania\"].transform(convert_temperature)\n", 322 | "data.rename(columns={\"temperatura podawania\": \"serving temperature (C)\"}, inplace=True)" 323 | ] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "metadata": {}, 328 | "source": [ 329 | "# Color and kind\n", 330 | "\n", 331 | "Related columns: `rodzaj wina`, `rodzaj wina2`" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": 79, 337 | "metadata": {}, 338 | "outputs": [], 339 | "source": [ 340 | "color = ['białe', 'czerwone', 'różowe', 'pomarańczowe']\n", 341 | "kind = ['szampan', 'musujące', 'sherry', 'porto']\n", 342 | "\n", 343 | "kolor = []\n", 344 | "rodzaj = []\n", 345 | "\n", 346 | "for rodzaj1, rodzaj2 in zip(data['rodzaj wina'], data['rodzaj wina2']):\n", 347 | " if rodzaj1 in color:\n", 348 | " kolor.append(rodzaj1)\n", 349 | " elif rodzaj2 in color:\n", 350 | " kolor.append(rodzaj2)\n", 351 | " else:\n", 352 | " kolor.append(np.nan)\n", 353 | "\n", 354 | " if rodzaj1 in kind:\n", 355 | " rodzaj.append(rodzaj1)\n", 356 | " elif rodzaj2 in kind:\n", 357 | " rodzaj.append(rodzaj2)\n", 358 | " else:\n", 359 | " rodzaj.append(np.nan)\n", 360 | "\n", 361 | "data[['color', 'kind']] = pd.DataFrame({\n", 362 | " 'color': kolor,\n", 363 | " 'kind': rodzaj\n", 364 | "})\n", 365 | "data.drop(['rodzaj wina', 'rodzaj wina2'], axis=1, inplace=True)" 366 | ] 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "metadata": {}, 371 | "source": [ 372 | "# Medals\n", 373 | "\n", 374 | "Related columns: `medale`, `medale2`, `medale3`" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": 80, 380 | "metadata": {}, 381 | "outputs": [], 382 | "source": [ 383 | "medals = []\n", 384 | "\n", 385 | "for row in range(len(data)):\n", 386 | " medal = ''\n", 387 | " for value in data.loc[row, ['medale', 'medale2', 'medale3']]:\n", 388 | " if pd.isnull(value):\n", 389 | " break\n", 390 | "\n", 391 | " medal += value + ', '\n", 392 | "\n", 393 | " if medal == '':\n", 394 | " medals.append(np.nan)\n", 395 | " else:\n", 396 | " medals.append(medal[:-2])\n", 397 | " \n", 398 | "data['medals'] = pd.DataFrame({'medals': medals})\n", 399 | "data.drop(['medale', 'medale2', 'medale3'], axis=1, inplace=True)" 400 | ] 401 | }, 402 | { 403 | "cell_type": "markdown", 404 | "metadata": {}, 405 | "source": [ 406 | "# Wegan and Natural\n", 407 | "\n", 408 | "Related columns: `bio`, `bio2`, `bio3`, `czy winnica bio`, `wino wegańskie`" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": 81, 414 | "metadata": {}, 415 | "outputs": [], 416 | "source": [ 417 | "wegan = []\n", 418 | "natural = []\n", 419 | "\n", 420 | "for bio, bio2, bio3, isBio, isWegan in zip(data['bio'], data['bio2'], data['bio3'], data['czy winnica bio'], data['wino wegańskie']):\n", 421 | " if bio == 'weganskie' or \\\n", 422 | " bio2 == 'weganskie' or \\\n", 423 | " bio3 == 'weganskie' or \\\n", 424 | " isWegan == 'tak':\n", 425 | " wegan.append(True)\n", 426 | " else:\n", 427 | " wegan.append(False)\n", 428 | "\n", 429 | " if (bio in ['organiczne', 'naturalne']) or \\\n", 430 | " (bio2 in ['organiczne', 'naturalne']) or \\\n", 431 | " (bio3 in ['organiczne', 'naturalne']) or \\\n", 432 | " isBio == 'tak – uprawa ograniczna/naturalna':\n", 433 | " natural.append(True)\n", 434 | " else:\n", 435 | " natural.append(False)\n", 436 | "\n", 437 | "bio = pd.DataFrame({\n", 438 | " 'wegan': wegan,\n", 439 | " 'natural': natural\n", 440 | "})\n", 441 | "\n", 442 | "data = pd.concat([data, bio], axis=1)\n", 443 | "data.drop(['bio', 'bio2', 'bio3', 'czy winnica bio', 'wino wegańskie'], axis=1, inplace=True)" 444 | ] 445 | }, 446 | { 447 | "cell_type": "markdown", 448 | "metadata": {}, 449 | "source": [ 450 | "# Punctation\n", 451 | "\n", 452 | "Related columns: `guia penin`, `falstaff`, `james suckling`, `oceny prasy`, `wine spectator`" 453 | ] 454 | }, 455 | { 456 | "cell_type": "code", 457 | "execution_count": 82, 458 | "metadata": {}, 459 | "outputs": [], 460 | "source": [ 461 | "punctation = []\n", 462 | "\n", 463 | "for i in range(len(data)):\n", 464 | " points = []\n", 465 | "\n", 466 | " if not pd.isnull(data.loc[i, 'guia penin']):\n", 467 | " points.append(int(data.loc[i, 'guia penin'][:2]))\n", 468 | "\n", 469 | " if not pd.isnull(data.loc[i, 'falstaff']):\n", 470 | " points.append(int(data.loc[i, 'falstaff'][-3:]))\n", 471 | "\n", 472 | " if not pd.isnull(data.loc[i, 'james suckling']):\n", 473 | " points.append(int(data.loc[i, 'james suckling'][:2]))\n", 474 | "\n", 475 | " if not pd.isnull(data.loc[i, 'oceny prasy']):\n", 476 | " points.append(int(data.loc[i, 'oceny prasy'][-3:]))\n", 477 | "\n", 478 | " if not pd.isnull(data.loc[i, 'wine spectator']):\n", 479 | " points.append(int(data.loc[i, 'wine spectator'][:2]))\n", 480 | "\n", 481 | " if len(points) > 0:\n", 482 | " punctation.append(np.mean(points))\n", 483 | " else:\n", 484 | " punctation.append(np.nan)\n", 485 | "\n", 486 | "data['punctation'] = pd.Series(punctation).astype('float32')\n", 487 | "data.drop(['guia penin', 'falstaff', 'james suckling', 'oceny prasy', 'wine spectator'], axis=1, inplace=True)" 488 | ] 489 | }, 490 | { 491 | "cell_type": "markdown", 492 | "metadata": {}, 493 | "source": [ 494 | "# Grapes\n", 495 | "\n", 496 | "Related columns: `grona`, `grona[2-13]`" 497 | ] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "execution_count": 83, 502 | "metadata": {}, 503 | "outputs": [], 504 | "source": [ 505 | "cols = ['grona'] + [f'grona{i}' for i in range(2, 14)]\n", 506 | "\n", 507 | "grapes = []\n", 508 | "\n", 509 | "for row in range(len(data)):\n", 510 | " grape = ''\n", 511 | " for value in data.loc[row, cols]:\n", 512 | " if pd.isnull(value):\n", 513 | " break\n", 514 | "\n", 515 | " grape += value + ', '\n", 516 | "\n", 517 | " if grape == '':\n", 518 | " grapes.append(np.nan)\n", 519 | " else:\n", 520 | " grapes.append(grape[:-2])\n", 521 | " \n", 522 | "data['grapes'] = pd.DataFrame({'grapes': grapes})\n", 523 | "data.drop(cols, axis=1, inplace=True)" 524 | ] 525 | }, 526 | { 527 | "cell_type": "markdown", 528 | "metadata": {}, 529 | "source": [ 530 | "# Appellation\n", 531 | "\n", 532 | "Related columns: `apelacja`\n", 533 | "\n", 534 | "Do something with label `- brak apelacji`" 535 | ] 536 | }, 537 | { 538 | "cell_type": "code", 539 | "execution_count": null, 540 | "metadata": {}, 541 | "outputs": [], 542 | "source": [ 543 | "data['apelacja'].replace('- brak apelacji', np.nan, inplace=True)\n", 544 | "data.rename(columns={\"apelacja\": \"appellation\"}, inplace=True)" 545 | ] 546 | }, 547 | { 548 | "cell_type": "markdown", 549 | "metadata": {}, 550 | "source": [ 551 | "# Translate other columns\n", 552 | "\n", 553 | "Replated columns: `kraj`, `winnica`, `smak`, `styl`" 554 | ] 555 | }, 556 | { 557 | "cell_type": "code", 558 | "execution_count": 90, 559 | "metadata": {}, 560 | "outputs": [], 561 | "source": [ 562 | "data.rename(columns={\n", 563 | " \"kraj\": \"country\",\n", 564 | " \"winnica\": \"vineyard\",\n", 565 | " \"smak\": \"taste\",\n", 566 | " \"styl\": \"style\"\n", 567 | "}, inplace=True)" 568 | ] 569 | }, 570 | { 571 | "cell_type": "markdown", 572 | "metadata": {}, 573 | "source": [ 574 | "# Export clean dataset" 575 | ] 576 | }, 577 | { 578 | "cell_type": "code", 579 | "execution_count": null, 580 | "metadata": {}, 581 | "outputs": [], 582 | "source": [ 583 | "data.to_csv(\"./data/wina.pl_new_clean.csv\", index=False)" 584 | ] 585 | } 586 | ], 587 | "metadata": { 588 | "kernelspec": { 589 | "display_name": "Python 3", 590 | "language": "python", 591 | "name": "python3" 592 | }, 593 | "language_info": { 594 | "codemirror_mode": { 595 | "name": "ipython", 596 | "version": 3 597 | }, 598 | "file_extension": ".py", 599 | "mimetype": "text/x-python", 600 | "name": "python", 601 | "nbconvert_exporter": "python", 602 | "pygments_lexer": "ipython3", 603 | "version": "3.12.2" 604 | }, 605 | "orig_nbformat": 4 606 | }, 607 | "nbformat": 4, 608 | "nbformat_minor": 2 609 | } 610 | -------------------------------------------------------------------------------- /Data Visualization/Notebooks/data/US_energy_sector.csv: -------------------------------------------------------------------------------- 1 | Month,All,Coal,Natural gas,Nuclear,Hydroelectric,Wind,Solar 2 | Jan-01,332493.16,177287.111,42388.663,68707.077,18852.048,389.25, 3 | Feb-01,282940.198,149735.484,37966.927,61272.407,17472.889,431.242, 4 | Mar-01,300706.544,155269.011,44364.414,62140.712,20477.189,532.12, 5 | Apr-01,278078.871,140670.654,45842.746,56003.026,18012.994,684.695, 6 | May-01,300491.621,151592.914,50934.205,61512.445,19175.635,635.029, 7 | Jun-01,327693.978,162615.804,57603.154,68023.098,20727.63,669.634, 8 | Jul-01,357613.7,179060.41,73030.142,69166.04,18079.119,634.992, 9 | Aug-01,370532.828,183116.073,78409.804,68389.496,18913.768,577.206, 10 | Sep-01,306928.866,154158.317,60181.14,63378.451,15256.027,490.229, 11 | Oct-01,294733.613,148930.816,56376.441,60460.967,15234.503,606.829, 12 | Nov-01,278933.942,144116.998,44490.622,62341.711,15412.929,470.448, 13 | Dec-01,305496.328,157402.35,47540.861,67430.878,19346.314,615.657, 14 | Jan-02,319941.476,164358.018,48412.832,70925.863,21794.932,811.402, 15 | Feb-02,281825.712,143048.822,44308.431,61658.274,20191.683,713.932, 16 | Mar-02,302549.011,151485.651,51214.464,63040.647,21008.813,852.102, 17 | Apr-02,289848.253,142304.595,49146.409,58437.062,24246.605,1024.417, 18 | May-02,307674.569,151406.401,50275.237,63032.192,26662.695,1077.641, 19 | Jun-02,341023.144,164667.695,65631.018,66371.896,28212.847,1126.315, 20 | Jul-02,381542.107,183194.702,83917.269,70420.827,25470.647,890.455, 21 | Aug-02,374585.819,179955.447,84476.867,70777.688,21083.924,976.691, 22 | Sep-02,331279.434,165366.309,68161.131,64480.79,17086.782,735.828, 23 | Oct-02,307059.46,159099.294,54200.82,60493.052,17171.334,734.355, 24 | Nov-02,296289.561,156053.655,45160.876,61520.324,19730.029,655.946, 25 | Dec-02,324833.705,172189.765,46100.393,68905.472,21668.54,755.194, 26 | Jan-03,341988.901,181313.153,50175.746,69211.084,20600.079,632.34, 27 | Feb-03,299248.777,156982.481,43546.573,60941.869,19779.86,745.375, 28 | Mar-03,304317.265,155001.775,46699.026,59933.261,24202.273,1036.104, 29 | Apr-03,285755.699,141959.981,45195.383,56775.591,24758.962,1092.766, 30 | May-03,307544.887,150262.977,49372.963,62201.96,29395.256,1006.383, 31 | Jun-03,328693.698,162284.63,54452.951,64180.727,28586.3,1047.144, 32 | Jul-03,374396.05,181852.318,76938.278,69652.754,24843.095,953.396, 33 | Aug-03,381816.319,185331.823,83249.694,69023.878,22972.03,815.429, 34 | Sep-03,323135.534,164910.002,59089.943,63583.645,18480.455,895.054, 35 | Oct-03,306740.933,159322.942,51824.169,60016.207,18428.423,897.161, 36 | Nov-03,297866.711,158223.194,45327.919,59600.02,19715.265,961.432, 37 | Dec-03,331680.43,176291.473,44034.895,68611.699,24044.33,1104.881, 38 | Jan-04,346545.627,180657.4205,48253.37213,70806.16,22983.30588,999.44521, 39 | Feb-04,314279.9095,161503.0922,50319.87478,64101.949,20913.61671,1021.56897, 40 | Mar-04,308812.0955,154288.4192,49801.1869,63285.116,22914.28123,1291.224, 41 | Apr-04,290559.9142,141470.9562,51821.98826,58620.255,20887.97481,1294.8568, 42 | May-04,327380.2636,157016.3772,62021.80807,64917.412,24019.71812,1701.65148, 43 | Jun-04,345085.0788,167641.9063,64685.66904,67734.073,25252.34503,1397.20574, 44 | Jul-04,377331.6971,181491.8206,79290.02224,71975.04,23318.27953,1164.37009, 45 | Aug-04,368439.0371,178181.3426,77820.68835,71068.483,21591.96742,1050.71885, 46 | Sep-04,335622.1979,164252.9233,67853.99334,65932.057,20525.26137,1089.61523, 47 | Oct-04,312450.2229,157605.0954,57228.81399,62530.352,18862.7954,1028.87272, 48 | Nov-04,302101.2562,157435.9942,49693.03398,58940.863,20937.04438,932.203, 49 | Dec-04,341947.9634,176755.2015,51309.56547,68616.627,26210.71849,1172.00867, 50 | Jan-05,343121.4534,177013.9066,51337.62707,69828.239,24272.16485,1131.5685, 51 | Feb-05,298500.3156,155818.0141,44912.61665,60946.934,21606.77394,966.48178, 52 | Mar-05,317458.2765,163612.7005,51896.97271,61538.53,22936.06749,1560.62565, 53 | Apr-05,289562.3999,143083.239,52016.28598,55484.01,23058.35863,1697.5037, 54 | May-05,315062.1099,153957.9615,54826.00402,62970.464,27278.51096,1746.42955, 55 | Jun-05,363671.636,174866.9973,75635.49254,66144.275,26782.98993,1796.92435, 56 | Jul-05,402273.8465,186090.7572,96819.41031,71070.011,25956.84152,1420.94183, 57 | Aug-05,404940.6811,187573.7544,100786.6544,71381.55,21565.67882,1138.03152, 58 | Sep-05,350217.8005,171656.2155,73355.48269,66738.979,17363.68401,1468.34696, 59 | Oct-05,316397.9236,162436.793,55940.88089,61235.597,18006.36026,1446.18982, 60 | Nov-05,306115.2362,158797.527,49440.22594,62912.709,19352.66779,1609.69148, 61 | Dec-05,348101.0706,177965.1796,53992.60122,71735.067,22141.15675,1827.81389, 62 | Jan-06,328657.7385,169236.0806,43806.92078,71911.532,27436.65597,2382.63335, 63 | Feb-06,307333.43,158616.1889,47408.9894,62615.654,24761.68318,1921.95844, 64 | Mar-06,318729.7996,161325.1153,54921.74189,63720.849,24624.60881,2358.63174, 65 | Apr-06,297858.0775,141426.2336,56090.85628,57567.204,28555.68297,2471.76225, 66 | May-06,330615.57,157009.7551,65585.60534,62775.701,30818.44294,2458.6826, 67 | Jun-06,364259.7976,169693.4782,81060.20993,68391.472,29757.28409,2051.76563, 68 | Jul-06,410420.7933,187820.8715,108093.5144,72186.494,25439.39237,1955.06423, 69 | Aug-06,407762.5364,189454.9297,106591.666,72016.095,21728.3537,1655.18688, 70 | Sep-06,332055.3208,161590.3542,72673.34798,66642.327,17201.46086,1879.14378, 71 | Oct-06,321567.0913,161389.6591,70640.03895,57509.306,17055.17699,2442.12988, 72 | Nov-06,309158.8264,159439.5173,53439.69272,61392.321,20271.72257,2540.48864, 73 | Dec-06,336283.2459,173508.9511,56128.18599,70489.681,21595.95122,2471.68958, 74 | Jan-07,353531.0948,175739.4377,61474.53193,74006.184,26044.71053,2452.31464, 75 | Feb-07,323230.3948,163602.8126,57622.07125,65224.794,18566.63325,2519.79285, 76 | Mar-07,320471.2425,159811.0739,56203.82094,64305.153,24163.45595,3047.09243, 77 | Apr-07,303129.0636,146249.5412,60152.84758,57301.424,23890.58301,3171.70315, 78 | May-07,330202.758,157513.2812,66469.7005,65024.555,26046.97643,2952.34063, 79 | Jun-07,362754.8772,173512.884,81511.45079,68923.074,22816.60812,2620.41645, 80 | Jul-07,393226.4323,185053.7876,97482.67572,72738.535,22477.76053,2158.37244, 81 | Aug-07,421796.6593,190134.8163,121338.4266,72750.91,19940.55104,2699.36469, 82 | Sep-07,355393.9119,169391.2728,88531.61471,67578.578,14742.53418,2866.72155, 83 | Oct-07,332615.4164,162234.3561,78358.02381,61689.542,14796.43375,3376.54028, 84 | Nov-07,314102.5426,159382.4546,60636.79925,64899.278,15682.053,3095.03643, 85 | Dec-07,346290.3305,173829.8655,66807.82797,71982.726,18341.67452,3490.23188, 86 | Jan-08,362998.429,182875.5043,72599.9011,70734.57,20779.13078,4273.1854, 87 | Feb-08,325105.5971,166666.3548,60042.0118,65130.385,18788.8861,3851.7487, 88 | Mar-08,324629.8496,160743.243,62170.57985,64716.468,21668.93847,4782.01995, 89 | Apr-08,305865.0312,146983.0239,63046.09796,57332.716,22233.93205,5225.28203, 90 | May-08,325244.8019,154915.9014,62270.44712,64825.901,27221.14742,5340.28392, 91 | Jun-08,373109.0269,171043.279,84619.97663,70319.366,29177.42712,5140.3764, 92 | Jul-08,402900.3362,186733.4563,100320.8781,74318.27,25555.08636,4008.40059, 93 | Aug-08,388986.7188,180576.493,99673.49348,72617.066,21229.2774,3264.40615, 94 | Sep-08,338056.13,161356.3473,79136.02998,67053.867,16178.19232,3111.45182, 95 | Oct-08,318547.3431,151840.7152,73283.33855,62820.353,15469.94246,4756.40094, 96 | Nov-08,310046.2412,154281.198,61454.19852,63408.145,15667.93401,4993.64564, 97 | Dec-08,343898.2545,167785.7305,64363.64603,72931.328,20861.49062,6615.89854, 98 | Jan-09,354992.6362,171925.1503,66390.11321,74102.489,23490.29569,5950.82465, 99 | Feb-09,300887.3977,140915.7848,62138.85332,64227.211,17812.03175,5852.17501, 100 | Mar-09,310602.9638,135530.3278,68202.74331,67240.567,21827.45353,7099.06292, 101 | Apr-09,289537.2551,125934.9731,61158.67362,59408.407,25769.8074,7457.69602, 102 | May-09,311305.8722,131673.0575,68145.64662,65395.452,29559.65512,6261.9611, 103 | Jun-09,347658.2413,148086.7711,84205.18154,69734.736,29233.39456,5599.42191, 104 | Jul-09,372542.2255,158233.8691,101893.8325,72948.979,23384.74817,4954.94132, 105 | Aug-09,381221.4169,163260.4831,109239.5541,72244.597,19580.41655,5464.47362, 106 | Sep-09,327400.6119,137144.648,92126.61784,65751.794,17358.71392,4650.70788, 107 | Oct-09,307040.3407,139955.5774,72602.72548,58020.927,19691.06826,6813.62592, 108 | Nov-09,296634.67,136809.5752,63285.13196,59069.208,21007.70089,6875.18302, 109 | Dec-09,350507.2953,166434.0356,71589.60705,70710.218,24729.80834,6906.05833, 110 | Jan-10,360957.167,173320.1609,74172.73601,72569.341,22383.07243,6854.33658, 111 | Feb-10,319734.7267,153044.1591,66198.08,65245.37,20589.82989,5431.85793, 112 | Mar-10,312167.6403,144405.6332,63430.66033,64634.721,20885.6628,8589.07682, 113 | Apr-10,287800.2266,126951.8853,64644.025,57611.073,19096.66257,9764.45598, 114 | May-10,327936.4916,143272.4936,73665.43824,66658.365,25079.21697,8697.52487, 115 | Jun-10,375759.4784,165491.4889,92268.43063,68301.32156,29853.82693,8049.02142, 116 | Jul-10,409725.4404,179600.4991,114624.2019,71913.468,24516.76133,6723.89118, 117 | Aug-10,408884.1554,177745.3201,121151.2686,71574.103,20119.23218,6685.85505, 118 | Sep-10,346045.1718,148745.6717,93004.17649,69371.195,17265.20646,7105.5022, 119 | Oct-10,307920.6906,132269.8351,77738.33651,62750.833,17682.97635,7943.80761, 120 | Nov-10,306009.6291,135185.0454,69226.55984,62655.206,19561.55003,9747.6191, 121 | Dec-10,362119.0813,167258.0862,77573.32016,73683.304,23169.07145,9059.29736, 122 | Jan-11,362871.893,170802.6793,74254.33381,72742.813,25531.09446,8550.4948, 123 | Feb-11,313126.6071,138311.1572,65923.98289,64789.078,24131.22605,10451.5619, 124 | Mar-11,318709.9409,134845.2314,65947.12043,65661.598,31134.47075,10544.64605, 125 | Apr-11,302400.7244,124488.2575,70028.99891,54547.338,31194.3065,12421.65683, 126 | May-11,323628.2381,137101.565,75242.79679,57012.914,32586.92609,11772.16075, 127 | Jun-11,367727.015,158055.2126,90691.11156,65270.134,32151.21433,10985.06912, 128 | Jul-11,418692.7546,176585.8197,119623.5911,72344.85,31284.63456,7488.62905, 129 | Aug-11,406511.3147,171280.5339,119855.7926,71338.566,25763.57824,7473.59402, 130 | Sep-11,337931.3176,140941.1491,91739.07009,66848.854,21377.59172,6869.0293, 131 | Oct-11,308698.5036,126626.9728,78819.20708,63336.97,19787.38961,10525.42896, 132 | Nov-11,304102.1544,121462.7108,75441.29818,64473.88,20680.52743,12438.55312, 133 | Dec-11,335740.4628,132928.7161,86121.62582,71837.372,23731.94371,10655.77473, 134 | Jan-12,339526.4801,129090.9898,90760.88378,72381.186,23107.14392,13630.85627, 135 | Feb-12,309389.4325,113872.0469,90609.78234,63847.023,20283.9644,11051.71417, 136 | Mar-12,309089.5521,105525.5468,92250.65327,61728.613,25906.63425,14027.34496, 137 | Apr-12,295229.2049,96285.44067,94828.59637,55870.931,26295.39842,12709.03261, 138 | May-12,336516.156,115982.6466,107351.8123,62081.445,28641.31224,12540.32427, 139 | Jun-12,360825.3809,131261.0361,115597.5032,65140.085,26657.66222,11972.15949, 140 | Jul-12,414641.2175,160450.2625,138862.85,69129.329,26491.04579,8823.083, 141 | Aug-12,395699.7492,152181.0552,131735.8644,69602.111,23033.72581,8469.41899, 142 | Sep-12,334585.5627,125588.6081,108012.3007,64510.882,17603.96419,8789.86546, 143 | Oct-12,311651.9307,120999.3388,91725.37056,59743.218,16501.69662,12635.90112, 144 | Nov-12,305975.851,128727.0657,80169.46107,56712.757,18733.31916,11648.4968, 145 | Dec-12,334634.7492,134078.9076,83989.0974,68583.669,22984.35228,14523.51758, 146 | Jan-13,348967.3779,138104.8211,88559.05007,71405.817,24828.52759,14738.50426, 147 | Feb-13,309728.08,123546.8048,80283.07377,61483.385,20418.46514,14075.59068, 148 | Mar-13,325398.834,130633.5604,84725.25919,62947.389,20534.35802,15755.65337, 149 | Apr-13,299332.8683,111834.6825,78036.43182,56766.882,25097.10474,17476.27429, 150 | May-13,322155.6404,119512.5,83815.59186,62848.155,28450.09411,16238.70213, 151 | Jun-13,356822.6472,138283.216,99615.06431,66429.69,27384.06656,13748.10576, 152 | Jul-13,394845.6208,152866.8527,120770.9808,70539.238,27254.5715,11093.61355, 153 | Aug-13,385285.7452,149426.0529,121156.397,71344.379,21633.31561,9633.88368, 154 | Sep-13,340940.9016,133110.4373,102063.2333,65798.971,16961.1523,11674.08115, 155 | Oct-13,314925.4625,120996.3929,88587.40151,63183.533,17198.59414,13635.01809, 156 | Nov-13,314539.6419,120939.7298,84286.78492,64975.456,17676.83384,15803.25968, 157 | Dec-13,353021.2477,141859.6654,92936.2913,71293.578,21128.29918,13967.05881, 158 | Jan-14,377251.3784,157097.3686,91062.48197,73162.609,21633.79467,17911.21433,1375.15155 159 | Feb-14,324347.0162,143294.4875,75945.20996,62638.95,17396.12893,14008.66045,1498.82093 160 | Mar-14,331820.9692,136442.6013,78154.34115,62397.08,24257.12925,17735.8829,2224.12366 161 | Apr-14,297628.2622,109280.5917,76784.84902,56384.588,25439.90661,18635.54917,2475.52102 162 | May-14,324719.3071,118785.8006,89120.78275,62947.43,26543.88511,15601.36684,2841.66641 163 | Jun-14,357840.1785,137576.9215,98469.55616,68138.232,25743.87767,15798.81712,3023.8867 164 | Jul-14,385775.4499,149627.0576,115083.4461,71940.126,24357.40238,12187.39242,2936.22665 165 | Aug-14,384336.5606,148451.6233,122350.0053,71128.746,19807.24668,10170.52076,3018.61669 166 | Sep-14,339882.6659,126110.3846,106583.6804,67534.5,16074.32674,11519.76842,2878.62126 167 | Oct-14,314518.0088,111295.9575,97684.69687,62390.988,17159.21225,14507.93111,2681.80098 168 | Nov-14,317491.3645,119127.4394,84355.8568,65140.185,18624.92296,18866.92881,2171.44148 169 | Dec-14,337952.9806,124620.1162,91040.2099,73362.548,22328.78862,14711.24947,1797.81256 170 | Jan-15,360452.8432,132450.5207,101690.9734,74269.974,24138.3839,15162.14633,1901.79647 171 | Feb-15,334595.611,126976.832,91440.16871,63461.493,22286.07601,14921.54534,2299.31836 172 | Mar-15,324313.5171,108487.5386,99550.14498,64546.799,24280.90476,15307.92894,3205.83426 173 | Apr-15,294177.1873,88989.1425,92855.54624,59784.495,22470.97723,17867.14921,3642.64641 174 | May-15,322189.1014,104584.5267,101624.6975,65826.525,20125.4156,17151.34131,3897.98556 175 | Jun-15,362493.0664,125673.1137,121567.8112,68516.165,20414.084,13421.26619,3965.61172 176 | Jul-15,400534.5676,139099.8686,141241.2463,71412.176,21014.22144,13675.44937,4113.74514 177 | Aug-15,392241.7185,134670.3089,139215.8854,72415.352,19122.10511,13080.02756,4155.60157 178 | Sep-15,350190.2263,117985.6322,123110.8314,66476.372,16094.1224,13971.56651,3546.77556 179 | Oct-15,312210.4157,96758.50415,110109.753,60570.921,16630.40155,16380.03853,3107.28733 180 | Nov-15,300779.3943,87227.13603,102368.8418,60263.941,19337.83088,19681.71564,2711.70957 181 | Dec-15,324536.1236,89495.07284,109892.1029,69633.664,23165.56195,20098.37277,2483.85893 182 | Jan-16,352713.7194,113459.3798,110043.7467,72524.775,25614.54233,18466.40151,2465.40732 183 | Feb-16,313816.4003,92704.80797,98688.19908,65638.141,24139.01548,20138.2278,3386.13613 184 | Mar-16,304427.4219,72172.52681,103933.1226,66148.894,27389.8772,21939.40933,4142.59215 185 | Apr-16,292986.5976,72112.8619,98973.7993,62731.845,25878.02878,20799.30345,4582.61853 186 | May-16,316866.9924,81694.51025,110518.5333,66576.493,25486.38143,18847.89867,5304.17484 187 | Jun-16,367903.5418,116034.3841,131522.9971,67175.324,23236.86739,16303.4413,5401.26493 188 | Jul-16,412043.3651,136316.434,151716.164,70349.347,21455.32093,17618.35712,5945.07716 189 | Aug-16,409861.386,135634.7333,154926.0252,71526.405,19569.54914,13589.32615,5910.82805 190 | Sep-16,351560.0438,114137.7683,125683.4508,65448.176,16367.68403,16403.64017,5370.44767 191 | Oct-16,312939.7279,99193.94367,102897.9861,60733.343,17338.85654,20335.38978,4743.28769 192 | Nov-16,297065.2608,86940.46241,93950.8325,65178.776,18808.26528,19405.64211,4023.9887 193 | Dec-16,345389.3435,118746.8419,96416.06528,71662.429,22527.76456,23145.52474,3590.66861 194 | Jan-17,344413.9697,115332.8083,95661.24708,73120.612,26627.88134,20798.77101,3570.01671 195 | Feb-17,291112.6381,86822.18986,82836.97484,63560.371,23881.76459,22091.10927,4135.35386 196 | Mar-17,319469.2801,89364.62027,95214.19971,65093.2,29613.19463,25730.61672,6486.54205 197 | Apr-17,295461.5182,81335.3131,88562.36959,56743.352,29409.46563,25377.94296,7101.80352 198 | May-17,323494.0217,92776.54135,98074.11624,61312.753,32607.11514,23067.63856,8208.21502 199 | Jun-17,358630.1022,107508.3222,117351.4929,67010.782,30575.29559,20141.74696,8602.15011 200 | Jul-17,404537.0098,127697.8203,147041.8025,71314.219,26598.24651,16119.99604,8123.75179 201 | Aug-17,384837.2763,119488.3849,141307.8281,72384.218,22033.85574,13878.84827,7848.17288 202 | Sep-17,336003.8361,98202.04611,118131.7052,68097.918,19151.75538,17911.77474,7284.82088 203 | Oct-17,318731.2011,89775.52365,106895.1991,65994.785,17698.17945,24368.70291,6639.60775 204 | Nov-17,308188.6823,90986.33924,95072.30676,66617.853,19888.33402,22614.8668,4769.85448 205 | Dec-17,350563.3593,106545.3667,111553.286,73699.572,22247.84164,22200.68098,4506.85847 206 | Jan-18,373379.3527,119284.1147,110441.9821,74649.04,25064.07663,25598.8222,4938.335 207 | Feb-18,307057.6575,82050.18241,98675.2857,64790.03,24902.25758,23189.48924,5662.7551 208 | Mar-18,321765.1897,80625.66254,106742.0159,67032.656,25860.604,26463.70391,7490.19618 209 | Apr-18,301056.9454,73346.00643,98671.58527,59133.155,28115.15452,26430.85718,8796.43825 210 | May-18,339228.2616,85227.3,115564.6383,67320.248,30444.28385,23953.12766,9860.22196 211 | Jun-18,372145.0843,101503.4267,131085.723,69687.556,27597.48847,24702.85593,10473.77801 212 | Jul-18,411616.6857,115376.3764,165075.4262,72456.009,25099.52896,16446.57439,9901.29598 213 | Aug-18,408352.1497,115129.4557,162000.634,72282.467,22016.90536,19846.43699,9712.3077 214 | Sep-18,356557.9597,96543.99239,142085.1857,64724.753,19165.6208,18519.67063,8635.23462 215 | Oct-18,325070.1501,87263.62736,123280.4465,59396.905,19548.19092,21193.89801,7361.46637 216 | Nov-18,322466.0338,92818.59372,108265.012,63954.37,21912.71643,22015.7339,5648.04713 217 | Dec-18,342292.2326,100318.6001,109955.0966,71657.288,22797.16131,24306.28369,4884.51281 218 | Jan-19,359728.8351,100904.7043,121808.2773,73700.844,24797.80842,24301.4495,5483.40662 219 | Feb-19,315281.7322,79929.40881,112397.1592,64714.894,22880.53778,22622.75342,5894.64724 220 | Mar-19,326903.2526,78351.61989,116059.1462,65079.691,26333.98533,25772.67842,8812.88461 221 | Apr-19,296952.6114,59922.45969,104348.6321,60580.927,27820.1768,28915.30603,9997.2463 222 | May-19,330660.8227,71884.62525,117296.649,67123.546,31982.04155,25779.38696,10710.6803 223 | Jun-19,353239.3549,78539.50426,138087.4187,68804.879,28077.76308,22445.97049,11575.14488 224 | Jul-19,410364.8914,100770.7854,172281.645,72198.595,24875.39844,22100.72207,11892.78073 225 | Aug-19,401731.6546,94039.70636,175270.2476,71910.684,22578.66752,19978.03073,11487.90701 226 | Sep-19,360759.5693,85706.69196,149938.1323,66063.58,18525.75999,24513.46786,10022.19806 227 | Oct-19,320517.6448,66777.23093,131113.378,62032.622,18305.81291,27624.5193,8925.99468 228 | Nov-19,315897.352,75549.33857,117958.726,64125.425,20217.59733,25184.35888,6591.54593 229 | Dec-19,338536.1584,72580.73606,131973.4283,73073.575,21478.18136,26643.83983,5499.8331 230 | Jan-20,342019.1095,65139.75937,136083.78,74169.646,24497.85718,28121.41138,6771.41485 231 | Feb-20,319698.1063,56201.46779,128017.9285,65910.574,25868.06177,29110.49454,8184.13587 232 | Mar-20,309869.6961,50730.62847,126186.5801,63997.21,23823.01748,29319.78293,9773.50166 233 | Apr-20,279846.2138,40675.20198,110563.8151,59170.016,23194.35466,29752.22898,11736.47773 234 | May-20,304836.8258,46526.62632,117185.646,64337.97,29976.16051,28377.50536,13920.70376 235 | Jun-20,351967.1898,65283.16993,143055.4234,67205.083,27999.3837,30212.40523,13923.04582 236 | Jul-20,409871.26,89709.48262,181568.0636,69385.44,26741.77947,22866.19821,15015.04895 237 | Aug-20,398535.5924,91145.41119,173644.417,68982.187,23283.5577,23029.3319,13513.9375 238 | Sep-20,333493.0367,68406.74534,141397.455,65727.317,18678.80991,23185.97755,11453.82366 239 | Oct-20,313703.4388,59804.58904,131412.5789,59362.465,18810.4061,28822.66268,10395.32693 240 | Nov-20,301402.9637,61182.18639,109810.7803,61759.977,20892.65917,33129.4051,8453.28574 241 | Dec-20,344523.4128,78587.62866,127863.1722,69870.978,21507.84718,32010.64517,7580.39219 242 | Jan-21,349209.7089,81239.77891,126529.5985,71732.463,24560.04066,30060.4714,8309.06463 243 | Feb-21,323899.5289,87470.49993,111182.9149,62954.16,20136.50917,26715.50092,9269.54941 244 | Mar-21,311397.2759,61903.96388,107018.7113,63708.238,21220.43408,39205.24802,13454.48883 245 | Apr-21,293307.9444,53956.13351,107416.2679,57092.024,19388.82781,36157.80676,15502.04998 246 | May-21,320180.9614,63872.72048,114675.7916,63394.115,23308.96396,33786.50791,17519.84666 247 | Jun-21,373856.4776,87264.60172,149375.5555,66070.373,23454.07921,26671.53368,17303.9438 248 | Jul-21,405624.0906,101536.5367,170189.3631,68831.593,22097.51226,21715.55151,17384.07738 249 | Aug-21,412864.7675,101854.5988,172716.0052,69471.331,20328.14723,27071.12204,16891.19396 250 | Sep-21,347743.7749,78876.84017,138214.4087,64520.031,17022.26652,28997.8029,15584.01297 251 | Oct-21,320201.7781,62572.1427,131851.6815,58401.112,17132.93623,32215.11144,13088.99226 252 | Nov-21,314309.5206,57426.34728,122433.2592,62749.318,19373.24966,35751.09063,11054.16073 253 | Dec-21,337103.5609,60024.59585,127586.4301,70719.837,23561.87556,39849.02762,9061.0693 254 | Jan-22,373765.919,87588.22693,134947.5141,70576.875,24197.61344,37416.32051,11198.35638 255 | Feb-22,324311.0552,70966.37798,114945.4142,61852.177,21320.56822,37644.62607,12743.87856 256 | Mar-22,324530.8509,61018.7735,112477.2522,63153.701,24436.41649,43030.54797,16815.69305 257 | Apr-22,303994.0502,55329.13354,105505.8211,55289.54,20065.83638,46167.09477,19073.03688 258 | May-22,342184.4058,62531.74438,127093.5551,63381.62449,23358.77956,42123.69758,21356.56223 259 | Jun-22,379134.4769,73463.10379,155517.0049,65715.42,25987.79518,33767.53068,22282.06831 260 | Jul-22,422975.6527,86414.97062,189042.1596,68856.919,24567.4546,29474.97824,22203.83016 261 | Aug-22,412133.7617,85214.64958,188859.6344,68896.917,21132.94901,24718.4162,20697.2015 262 | Sep-22,351655.4,64998.31931,156947.8524,63733.186,17026.14725,27331.13356,18830.90672 263 | Oct-22,313949.3854,54228.45722,133491.5392,58945.383,14367.41841,32744.63415,16964.17077 264 | Nov-22,321780.5598,56376.96293,127523.4363,62041.287,17898.14767,41198.8465,12437.95751 265 | Dec-22,360256.7151,73380.89229,140715.5326,69094.147,20429.54086,38679.53194,10475.48995 266 | Jan-23,347817.1455,61290.8751,137541.4208,70870.08,22288.30082,39166.5977,11963.78669 267 | Feb-23,309116.3793,46487.92774,123920.7292,60806.857,18682.48569,42058.41902,13641.70172 268 | Mar-23,329838.8224,50067.28079,132152.688,62820.443,20227.59757,44527.11026,18130.90619 269 | Apr-23,299673.0993,40078.66802,120477.9842,56662.458,17481.3198,43017.20057,21501.94859 270 | May-23,327532.6236,43852.22406,137795.2922,61472.883,27479.13413,32018.24592,24462.3751 271 | Jun-23,356672.3849,57697.75156,161692.7028,64965.076,19470.63281,27505.93547,25034.48613 272 | Jul-23,425611.3284,78909.85665,200506.7592,69887.588,21237.80849,27867.90946,26538.55725 273 | Aug-23,423934.2574,78184.50094,199993.0422,69744.023,21145.86624,28504.16694,25350.26673 274 | Sep-23,358136.2599,60005.6758,164466.3568,65559.71,16496.04043,28193.29153,22192.58066 275 | -------------------------------------------------------------------------------- /Data Manipulation/Notebooks/data/titanic.csv: -------------------------------------------------------------------------------- 1 | PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked 2 | 892,0,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q 3 | 893,1,3,"Wilkes, Mrs. James (Ellen Needs)",female,47,1,0,363272,7,,S 4 | 894,0,2,"Myles, Mr. Thomas Francis",male,62,0,0,240276,9.6875,,Q 5 | 895,0,3,"Wirz, Mr. Albert",male,27,0,0,315154,8.6625,,S 6 | 896,1,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22,1,1,3101298,12.2875,,S 7 | 897,0,3,"Svensson, Mr. Johan Cervin",male,14,0,0,7538,9.225,,S 8 | 898,1,3,"Connolly, Miss. Kate",female,30,0,0,330972,7.6292,,Q 9 | 899,0,2,"Caldwell, Mr. Albert Francis",male,26,1,1,248738,29,,S 10 | 900,1,3,"Abrahim, Mrs. Joseph (Sophie Halaut Easu)",female,18,0,0,2657,7.2292,,C 11 | 901,0,3,"Davies, Mr. John Samuel",male,21,2,0,A/4 48871,24.15,,S 12 | 902,0,3,"Ilieff, Mr. Ylio",male,,0,0,349220,7.8958,,S 13 | 903,0,1,"Jones, Mr. Charles Cresson",male,46,0,0,694,26,,S 14 | 904,1,1,"Snyder, Mrs. John Pillsbury (Nelle Stevenson)",female,23,1,0,21228,82.2667,B45,S 15 | 905,0,2,"Howard, Mr. Benjamin",male,63,1,0,24065,26,,S 16 | 906,1,1,"Chaffee, Mrs. Herbert Fuller (Carrie Constance Toogood)",female,47,1,0,W.E.P. 5734,61.175,E31,S 17 | 907,1,2,"del Carlo, Mrs. Sebastiano (Argenia Genovesi)",female,24,1,0,SC/PARIS 2167,27.7208,,C 18 | 908,0,2,"Keane, Mr. Daniel",male,35,0,0,233734,12.35,,Q 19 | 909,0,3,"Assaf, Mr. Gerios",male,21,0,0,2692,7.225,,C 20 | 910,1,3,"Ilmakangas, Miss. Ida Livija",female,27,1,0,STON/O2. 3101270,7.925,,S 21 | 911,1,3,"Assaf Khalil, Mrs. Mariana (Miriam"")""",female,45,0,0,2696,7.225,,C 22 | 912,0,1,"Rothschild, Mr. Martin",male,55,1,0,PC 17603,59.4,,C 23 | 913,0,3,"Olsen, Master. Artur Karl",male,9,0,1,C 17368,3.1708,,S 24 | 914,1,1,"Flegenheim, Mrs. Alfred (Antoinette)",female,,0,0,PC 17598,31.6833,,S 25 | 915,0,1,"Williams, Mr. Richard Norris II",male,21,0,1,PC 17597,61.3792,,C 26 | 916,1,1,"Ryerson, Mrs. Arthur Larned (Emily Maria Borie)",female,48,1,3,PC 17608,262.375,B57 B59 B63 B66,C 27 | 917,0,3,"Robins, Mr. Alexander A",male,50,1,0,A/5. 3337,14.5,,S 28 | 918,1,1,"Ostby, Miss. Helene Ragnhild",female,22,0,1,113509,61.9792,B36,C 29 | 919,0,3,"Daher, Mr. Shedid",male,22.5,0,0,2698,7.225,,C 30 | 920,0,1,"Brady, Mr. John Bertram",male,41,0,0,113054,30.5,A21,S 31 | 921,0,3,"Samaan, Mr. Elias",male,,2,0,2662,21.6792,,C 32 | 922,0,2,"Louch, Mr. Charles Alexander",male,50,1,0,SC/AH 3085,26,,S 33 | 923,0,2,"Jefferys, Mr. Clifford Thomas",male,24,2,0,C.A. 31029,31.5,,S 34 | 924,1,3,"Dean, Mrs. Bertram (Eva Georgetta Light)",female,33,1,2,C.A. 2315,20.575,,S 35 | 925,1,3,"Johnston, Mrs. Andrew G (Elizabeth Lily"" Watson)""",female,,1,2,W./C. 6607,23.45,,S 36 | 926,0,1,"Mock, Mr. Philipp Edmund",male,30,1,0,13236,57.75,C78,C 37 | 927,0,3,"Katavelas, Mr. Vassilios (Catavelas Vassilios"")""",male,18.5,0,0,2682,7.2292,,C 38 | 928,1,3,"Roth, Miss. Sarah A",female,,0,0,342712,8.05,,S 39 | 929,1,3,"Cacic, Miss. Manda",female,21,0,0,315087,8.6625,,S 40 | 930,0,3,"Sap, Mr. Julius",male,25,0,0,345768,9.5,,S 41 | 931,0,3,"Hee, Mr. Ling",male,,0,0,1601,56.4958,,S 42 | 932,0,3,"Karun, Mr. Franz",male,39,0,1,349256,13.4167,,C 43 | 933,0,1,"Franklin, Mr. Thomas Parham",male,,0,0,113778,26.55,D34,S 44 | 934,0,3,"Goldsmith, Mr. Nathan",male,41,0,0,SOTON/O.Q. 3101263,7.85,,S 45 | 935,1,2,"Corbett, Mrs. Walter H (Irene Colvin)",female,30,0,0,237249,13,,S 46 | 936,1,1,"Kimball, Mrs. Edwin Nelson Jr (Gertrude Parsons)",female,45,1,0,11753,52.5542,D19,S 47 | 937,0,3,"Peltomaki, Mr. Nikolai Johannes",male,25,0,0,STON/O 2. 3101291,7.925,,S 48 | 938,0,1,"Chevre, Mr. Paul Romaine",male,45,0,0,PC 17594,29.7,A9,C 49 | 939,0,3,"Shaughnessy, Mr. Patrick",male,,0,0,370374,7.75,,Q 50 | 940,1,1,"Bucknell, Mrs. William Robert (Emma Eliza Ward)",female,60,0,0,11813,76.2917,D15,C 51 | 941,1,3,"Coutts, Mrs. William (Winnie Minnie"" Treanor)""",female,36,0,2,C.A. 37671,15.9,,S 52 | 942,0,1,"Smith, Mr. Lucien Philip",male,24,1,0,13695,60,C31,S 53 | 943,0,2,"Pulbaum, Mr. Franz",male,27,0,0,SC/PARIS 2168,15.0333,,C 54 | 944,1,2,"Hocking, Miss. Ellen Nellie""""",female,20,2,1,29105,23,,S 55 | 945,1,1,"Fortune, Miss. Ethel Flora",female,28,3,2,19950,263,C23 C25 C27,S 56 | 946,0,2,"Mangiavacchi, Mr. Serafino Emilio",male,,0,0,SC/A.3 2861,15.5792,,C 57 | 947,0,3,"Rice, Master. Albert",male,10,4,1,382652,29.125,,Q 58 | 948,0,3,"Cor, Mr. Bartol",male,35,0,0,349230,7.8958,,S 59 | 949,0,3,"Abelseth, Mr. Olaus Jorgensen",male,25,0,0,348122,7.65,F G63,S 60 | 950,0,3,"Davison, Mr. Thomas Henry",male,,1,0,386525,16.1,,S 61 | 951,1,1,"Chaudanson, Miss. Victorine",female,36,0,0,PC 17608,262.375,B61,C 62 | 952,0,3,"Dika, Mr. Mirko",male,17,0,0,349232,7.8958,,S 63 | 953,0,2,"McCrae, Mr. Arthur Gordon",male,32,0,0,237216,13.5,,S 64 | 954,0,3,"Bjorklund, Mr. Ernst Herbert",male,18,0,0,347090,7.75,,S 65 | 955,1,3,"Bradley, Miss. Bridget Delia",female,22,0,0,334914,7.725,,Q 66 | 956,0,1,"Ryerson, Master. John Borie",male,13,2,2,PC 17608,262.375,B57 B59 B63 B66,C 67 | 957,1,2,"Corey, Mrs. Percy C (Mary Phyllis Elizabeth Miller)",female,,0,0,F.C.C. 13534,21,,S 68 | 958,1,3,"Burns, Miss. Mary Delia",female,18,0,0,330963,7.8792,,Q 69 | 959,0,1,"Moore, Mr. Clarence Bloomfield",male,47,0,0,113796,42.4,,S 70 | 960,0,1,"Tucker, Mr. Gilbert Milligan Jr",male,31,0,0,2543,28.5375,C53,C 71 | 961,1,1,"Fortune, Mrs. Mark (Mary McDougald)",female,60,1,4,19950,263,C23 C25 C27,S 72 | 962,1,3,"Mulvihill, Miss. Bertha E",female,24,0,0,382653,7.75,,Q 73 | 963,0,3,"Minkoff, Mr. Lazar",male,21,0,0,349211,7.8958,,S 74 | 964,1,3,"Nieminen, Miss. Manta Josefina",female,29,0,0,3101297,7.925,,S 75 | 965,0,1,"Ovies y Rodriguez, Mr. Servando",male,28.5,0,0,PC 17562,27.7208,D43,C 76 | 966,1,1,"Geiger, Miss. Amalie",female,35,0,0,113503,211.5,C130,C 77 | 967,0,1,"Keeping, Mr. Edwin",male,32.5,0,0,113503,211.5,C132,C 78 | 968,0,3,"Miles, Mr. Frank",male,,0,0,359306,8.05,,S 79 | 969,1,1,"Cornell, Mrs. Robert Clifford (Malvina Helen Lamson)",female,55,2,0,11770,25.7,C101,S 80 | 970,0,2,"Aldworth, Mr. Charles Augustus",male,30,0,0,248744,13,,S 81 | 971,1,3,"Doyle, Miss. Elizabeth",female,24,0,0,368702,7.75,,Q 82 | 972,0,3,"Boulos, Master. Akar",male,6,1,1,2678,15.2458,,C 83 | 973,0,1,"Straus, Mr. Isidor",male,67,1,0,PC 17483,221.7792,C55 C57,S 84 | 974,0,1,"Case, Mr. Howard Brown",male,49,0,0,19924,26,,S 85 | 975,0,3,"Demetri, Mr. Marinko",male,,0,0,349238,7.8958,,S 86 | 976,0,2,"Lamb, Mr. John Joseph",male,,0,0,240261,10.7083,,Q 87 | 977,0,3,"Khalil, Mr. Betros",male,,1,0,2660,14.4542,,C 88 | 978,1,3,"Barry, Miss. Julia",female,27,0,0,330844,7.8792,,Q 89 | 979,1,3,"Badman, Miss. Emily Louisa",female,18,0,0,A/4 31416,8.05,,S 90 | 980,1,3,"O'Donoghue, Ms. Bridget",female,,0,0,364856,7.75,,Q 91 | 981,0,2,"Wells, Master. Ralph Lester",male,2,1,1,29103,23,,S 92 | 982,1,3,"Dyker, Mrs. Adolf Fredrik (Anna Elisabeth Judith Andersson)",female,22,1,0,347072,13.9,,S 93 | 983,0,3,"Pedersen, Mr. Olaf",male,,0,0,345498,7.775,,S 94 | 984,1,1,"Davidson, Mrs. Thornton (Orian Hays)",female,27,1,2,F.C. 12750,52,B71,S 95 | 985,0,3,"Guest, Mr. Robert",male,,0,0,376563,8.05,,S 96 | 986,0,1,"Birnbaum, Mr. Jakob",male,25,0,0,13905,26,,C 97 | 987,0,3,"Tenglin, Mr. Gunnar Isidor",male,25,0,0,350033,7.7958,,S 98 | 988,1,1,"Cavendish, Mrs. Tyrell William (Julia Florence Siegel)",female,76,1,0,19877,78.85,C46,S 99 | 989,0,3,"Makinen, Mr. Kalle Edvard",male,29,0,0,STON/O 2. 3101268,7.925,,S 100 | 990,1,3,"Braf, Miss. Elin Ester Maria",female,20,0,0,347471,7.8542,,S 101 | 991,0,3,"Nancarrow, Mr. William Henry",male,33,0,0,A./5. 3338,8.05,,S 102 | 992,1,1,"Stengel, Mrs. Charles Emil Henry (Annie May Morris)",female,43,1,0,11778,55.4417,C116,C 103 | 993,0,2,"Weisz, Mr. Leopold",male,27,1,0,228414,26,,S 104 | 994,0,3,"Foley, Mr. William",male,,0,0,365235,7.75,,Q 105 | 995,0,3,"Johansson Palmquist, Mr. Oskar Leander",male,26,0,0,347070,7.775,,S 106 | 996,1,3,"Thomas, Mrs. Alexander (Thamine Thelma"")""",female,16,1,1,2625,8.5167,,C 107 | 997,0,3,"Holthen, Mr. Johan Martin",male,28,0,0,C 4001,22.525,,S 108 | 998,0,3,"Buckley, Mr. Daniel",male,21,0,0,330920,7.8208,,Q 109 | 999,0,3,"Ryan, Mr. Edward",male,,0,0,383162,7.75,,Q 110 | 1000,0,3,"Willer, Mr. Aaron (Abi Weller"")""",male,,0,0,3410,8.7125,,S 111 | 1001,0,2,"Swane, Mr. George",male,18.5,0,0,248734,13,F,S 112 | 1002,0,2,"Stanton, Mr. Samuel Ward",male,41,0,0,237734,15.0458,,C 113 | 1003,1,3,"Shine, Miss. Ellen Natalia",female,,0,0,330968,7.7792,,Q 114 | 1004,1,1,"Evans, Miss. Edith Corse",female,36,0,0,PC 17531,31.6792,A29,C 115 | 1005,1,3,"Buckley, Miss. Katherine",female,18.5,0,0,329944,7.2833,,Q 116 | 1006,1,1,"Straus, Mrs. Isidor (Rosalie Ida Blun)",female,63,1,0,PC 17483,221.7792,C55 C57,S 117 | 1007,0,3,"Chronopoulos, Mr. Demetrios",male,18,1,0,2680,14.4542,,C 118 | 1008,0,3,"Thomas, Mr. John",male,,0,0,2681,6.4375,,C 119 | 1009,1,3,"Sandstrom, Miss. Beatrice Irene",female,1,1,1,PP 9549,16.7,G6,S 120 | 1010,0,1,"Beattie, Mr. Thomson",male,36,0,0,13050,75.2417,C6,C 121 | 1011,1,2,"Chapman, Mrs. John Henry (Sara Elizabeth Lawry)",female,29,1,0,SC/AH 29037,26,,S 122 | 1012,1,2,"Watt, Miss. Bertha J",female,12,0,0,C.A. 33595,15.75,,S 123 | 1013,0,3,"Kiernan, Mr. John",male,,1,0,367227,7.75,,Q 124 | 1014,1,1,"Schabert, Mrs. Paul (Emma Mock)",female,35,1,0,13236,57.75,C28,C 125 | 1015,0,3,"Carver, Mr. Alfred John",male,28,0,0,392095,7.25,,S 126 | 1016,0,3,"Kennedy, Mr. John",male,,0,0,368783,7.75,,Q 127 | 1017,1,3,"Cribb, Miss. Laura Alice",female,17,0,1,371362,16.1,,S 128 | 1018,0,3,"Brobeck, Mr. Karl Rudolf",male,22,0,0,350045,7.7958,,S 129 | 1019,1,3,"McCoy, Miss. Alicia",female,,2,0,367226,23.25,,Q 130 | 1020,0,2,"Bowenur, Mr. Solomon",male,42,0,0,211535,13,,S 131 | 1021,0,3,"Petersen, Mr. Marius",male,24,0,0,342441,8.05,,S 132 | 1022,0,3,"Spinner, Mr. Henry John",male,32,0,0,STON/OQ. 369943,8.05,,S 133 | 1023,0,1,"Gracie, Col. Archibald IV",male,53,0,0,113780,28.5,C51,C 134 | 1024,1,3,"Lefebre, Mrs. Frank (Frances)",female,,0,4,4133,25.4667,,S 135 | 1025,0,3,"Thomas, Mr. Charles P",male,,1,0,2621,6.4375,,C 136 | 1026,0,3,"Dintcheff, Mr. Valtcho",male,43,0,0,349226,7.8958,,S 137 | 1027,0,3,"Carlsson, Mr. Carl Robert",male,24,0,0,350409,7.8542,,S 138 | 1028,0,3,"Zakarian, Mr. Mapriededer",male,26.5,0,0,2656,7.225,,C 139 | 1029,0,2,"Schmidt, Mr. August",male,26,0,0,248659,13,,S 140 | 1030,1,3,"Drapkin, Miss. Jennie",female,23,0,0,SOTON/OQ 392083,8.05,,S 141 | 1031,0,3,"Goodwin, Mr. Charles Frederick",male,40,1,6,CA 2144,46.9,,S 142 | 1032,1,3,"Goodwin, Miss. Jessie Allis",female,10,5,2,CA 2144,46.9,,S 143 | 1033,1,1,"Daniels, Miss. Sarah",female,33,0,0,113781,151.55,,S 144 | 1034,0,1,"Ryerson, Mr. Arthur Larned",male,61,1,3,PC 17608,262.375,B57 B59 B63 B66,C 145 | 1035,0,2,"Beauchamp, Mr. Henry James",male,28,0,0,244358,26,,S 146 | 1036,0,1,"Lindeberg-Lind, Mr. Erik Gustaf (Mr Edward Lingrey"")""",male,42,0,0,17475,26.55,,S 147 | 1037,0,3,"Vander Planke, Mr. Julius",male,31,3,0,345763,18,,S 148 | 1038,0,1,"Hilliard, Mr. Herbert Henry",male,,0,0,17463,51.8625,E46,S 149 | 1039,0,3,"Davies, Mr. Evan",male,22,0,0,SC/A4 23568,8.05,,S 150 | 1040,0,1,"Crafton, Mr. John Bertram",male,,0,0,113791,26.55,,S 151 | 1041,0,2,"Lahtinen, Rev. William",male,30,1,1,250651,26,,S 152 | 1042,1,1,"Earnshaw, Mrs. Boulton (Olive Potter)",female,23,0,1,11767,83.1583,C54,C 153 | 1043,0,3,"Matinoff, Mr. Nicola",male,,0,0,349255,7.8958,,C 154 | 1044,0,3,"Storey, Mr. Thomas",male,60.5,0,0,3701,,,S 155 | 1045,1,3,"Klasen, Mrs. (Hulda Kristina Eugenia Lofqvist)",female,36,0,2,350405,12.1833,,S 156 | 1046,0,3,"Asplund, Master. Filip Oscar",male,13,4,2,347077,31.3875,,S 157 | 1047,0,3,"Duquemin, Mr. Joseph",male,24,0,0,S.O./P.P. 752,7.55,,S 158 | 1048,1,1,"Bird, Miss. Ellen",female,29,0,0,PC 17483,221.7792,C97,S 159 | 1049,1,3,"Lundin, Miss. Olga Elida",female,23,0,0,347469,7.8542,,S 160 | 1050,0,1,"Borebank, Mr. John James",male,42,0,0,110489,26.55,D22,S 161 | 1051,1,3,"Peacock, Mrs. Benjamin (Edith Nile)",female,26,0,2,SOTON/O.Q. 3101315,13.775,,S 162 | 1052,1,3,"Smyth, Miss. Julia",female,,0,0,335432,7.7333,,Q 163 | 1053,0,3,"Touma, Master. Georges Youssef",male,7,1,1,2650,15.2458,,C 164 | 1054,1,2,"Wright, Miss. Marion",female,26,0,0,220844,13.5,,S 165 | 1055,0,3,"Pearce, Mr. Ernest",male,,0,0,343271,7,,S 166 | 1056,0,2,"Peruschitz, Rev. Joseph Maria",male,41,0,0,237393,13,,S 167 | 1057,1,3,"Kink-Heilmann, Mrs. Anton (Luise Heilmann)",female,26,1,1,315153,22.025,,S 168 | 1058,0,1,"Brandeis, Mr. Emil",male,48,0,0,PC 17591,50.4958,B10,C 169 | 1059,0,3,"Ford, Mr. Edward Watson",male,18,2,2,W./C. 6608,34.375,,S 170 | 1060,1,1,"Cassebeer, Mrs. Henry Arthur Jr (Eleanor Genevieve Fosdick)",female,,0,0,17770,27.7208,,C 171 | 1061,1,3,"Hellstrom, Miss. Hilda Maria",female,22,0,0,7548,8.9625,,S 172 | 1062,0,3,"Lithman, Mr. Simon",male,,0,0,S.O./P.P. 251,7.55,,S 173 | 1063,0,3,"Zakarian, Mr. Ortin",male,27,0,0,2670,7.225,,C 174 | 1064,0,3,"Dyker, Mr. Adolf Fredrik",male,23,1,0,347072,13.9,,S 175 | 1065,0,3,"Torfa, Mr. Assad",male,,0,0,2673,7.2292,,C 176 | 1066,0,3,"Asplund, Mr. Carl Oscar Vilhelm Gustafsson",male,40,1,5,347077,31.3875,,S 177 | 1067,1,2,"Brown, Miss. Edith Eileen",female,15,0,2,29750,39,,S 178 | 1068,1,2,"Sincock, Miss. Maude",female,20,0,0,C.A. 33112,36.75,,S 179 | 1069,0,1,"Stengel, Mr. Charles Emil Henry",male,54,1,0,11778,55.4417,C116,C 180 | 1070,1,2,"Becker, Mrs. Allen Oliver (Nellie E Baumgardner)",female,36,0,3,230136,39,F4,S 181 | 1071,1,1,"Compton, Mrs. Alexander Taylor (Mary Eliza Ingersoll)",female,64,0,2,PC 17756,83.1583,E45,C 182 | 1072,0,2,"McCrie, Mr. James Matthew",male,30,0,0,233478,13,,S 183 | 1073,0,1,"Compton, Mr. Alexander Taylor Jr",male,37,1,1,PC 17756,83.1583,E52,C 184 | 1074,1,1,"Marvin, Mrs. Daniel Warner (Mary Graham Carmichael Farquarson)",female,18,1,0,113773,53.1,D30,S 185 | 1075,0,3,"Lane, Mr. Patrick",male,,0,0,7935,7.75,,Q 186 | 1076,1,1,"Douglas, Mrs. Frederick Charles (Mary Helene Baxter)",female,27,1,1,PC 17558,247.5208,B58 B60,C 187 | 1077,0,2,"Maybery, Mr. Frank Hubert",male,40,0,0,239059,16,,S 188 | 1078,1,2,"Phillips, Miss. Alice Frances Louisa",female,21,0,1,S.O./P.P. 2,21,,S 189 | 1079,0,3,"Davies, Mr. Joseph",male,17,2,0,A/4 48873,8.05,,S 190 | 1080,1,3,"Sage, Miss. Ada",female,,8,2,CA. 2343,69.55,,S 191 | 1081,0,2,"Veal, Mr. James",male,40,0,0,28221,13,,S 192 | 1082,0,2,"Angle, Mr. William A",male,34,1,0,226875,26,,S 193 | 1083,0,1,"Salomon, Mr. Abraham L",male,,0,0,111163,26,,S 194 | 1084,0,3,"van Billiard, Master. Walter John",male,11.5,1,1,A/5. 851,14.5,,S 195 | 1085,0,2,"Lingane, Mr. John",male,61,0,0,235509,12.35,,Q 196 | 1086,0,2,"Drew, Master. Marshall Brines",male,8,0,2,28220,32.5,,S 197 | 1087,0,3,"Karlsson, Mr. Julius Konrad Eugen",male,33,0,0,347465,7.8542,,S 198 | 1088,0,1,"Spedden, Master. Robert Douglas",male,6,0,2,16966,134.5,E34,C 199 | 1089,1,3,"Nilsson, Miss. Berta Olivia",female,18,0,0,347066,7.775,,S 200 | 1090,0,2,"Baimbrigge, Mr. Charles Robert",male,23,0,0,C.A. 31030,10.5,,S 201 | 1091,1,3,"Rasmussen, Mrs. (Lena Jacobsen Solvang)",female,,0,0,65305,8.1125,,S 202 | 1092,1,3,"Murphy, Miss. Nora",female,,0,0,36568,15.5,,Q 203 | 1093,0,3,"Danbom, Master. Gilbert Sigvard Emanuel",male,0.33,0,2,347080,14.4,,S 204 | 1094,0,1,"Astor, Col. John Jacob",male,47,1,0,PC 17757,227.525,C62 C64,C 205 | 1095,1,2,"Quick, Miss. Winifred Vera",female,8,1,1,26360,26,,S 206 | 1096,0,2,"Andrew, Mr. Frank Thomas",male,25,0,0,C.A. 34050,10.5,,S 207 | 1097,0,1,"Omont, Mr. Alfred Fernand",male,,0,0,F.C. 12998,25.7417,,C 208 | 1098,1,3,"McGowan, Miss. Katherine",female,35,0,0,9232,7.75,,Q 209 | 1099,0,2,"Collett, Mr. Sidney C Stuart",male,24,0,0,28034,10.5,,S 210 | 1100,1,1,"Rosenbaum, Miss. Edith Louise",female,33,0,0,PC 17613,27.7208,A11,C 211 | 1101,0,3,"Delalic, Mr. Redjo",male,25,0,0,349250,7.8958,,S 212 | 1102,0,3,"Andersen, Mr. Albert Karvin",male,32,0,0,C 4001,22.525,,S 213 | 1103,0,3,"Finoli, Mr. Luigi",male,,0,0,SOTON/O.Q. 3101308,7.05,,S 214 | 1104,0,2,"Deacon, Mr. Percy William",male,17,0,0,S.O.C. 14879,73.5,,S 215 | 1105,1,2,"Howard, Mrs. Benjamin (Ellen Truelove Arman)",female,60,1,0,24065,26,,S 216 | 1106,1,3,"Andersson, Miss. Ida Augusta Margareta",female,38,4,2,347091,7.775,,S 217 | 1107,0,1,"Head, Mr. Christopher",male,42,0,0,113038,42.5,B11,S 218 | 1108,1,3,"Mahon, Miss. Bridget Delia",female,,0,0,330924,7.8792,,Q 219 | 1109,0,1,"Wick, Mr. George Dennick",male,57,1,1,36928,164.8667,,S 220 | 1110,1,1,"Widener, Mrs. George Dunton (Eleanor Elkins)",female,50,1,1,113503,211.5,C80,C 221 | 1111,0,3,"Thomson, Mr. Alexander Morrison",male,,0,0,32302,8.05,,S 222 | 1112,1,2,"Duran y More, Miss. Florentina",female,30,1,0,SC/PARIS 2148,13.8583,,C 223 | 1113,0,3,"Reynolds, Mr. Harold J",male,21,0,0,342684,8.05,,S 224 | 1114,1,2,"Cook, Mrs. (Selena Rogers)",female,22,0,0,W./C. 14266,10.5,F33,S 225 | 1115,0,3,"Karlsson, Mr. Einar Gervasius",male,21,0,0,350053,7.7958,,S 226 | 1116,1,1,"Candee, Mrs. Edward (Helen Churchill Hungerford)",female,53,0,0,PC 17606,27.4458,,C 227 | 1117,1,3,"Moubarek, Mrs. George (Omine Amenia"" Alexander)""",female,,0,2,2661,15.2458,,C 228 | 1118,0,3,"Asplund, Mr. Johan Charles",male,23,0,0,350054,7.7958,,S 229 | 1119,1,3,"McNeill, Miss. Bridget",female,,0,0,370368,7.75,,Q 230 | 1120,0,3,"Everett, Mr. Thomas James",male,40.5,0,0,C.A. 6212,15.1,,S 231 | 1121,0,2,"Hocking, Mr. Samuel James Metcalfe",male,36,0,0,242963,13,,S 232 | 1122,0,2,"Sweet, Mr. George Frederick",male,14,0,0,220845,65,,S 233 | 1123,1,1,"Willard, Miss. Constance",female,21,0,0,113795,26.55,,S 234 | 1124,0,3,"Wiklund, Mr. Karl Johan",male,21,1,0,3101266,6.4958,,S 235 | 1125,0,3,"Linehan, Mr. Michael",male,,0,0,330971,7.8792,,Q 236 | 1126,0,1,"Cumings, Mr. John Bradley",male,39,1,0,PC 17599,71.2833,C85,C 237 | 1127,0,3,"Vendel, Mr. Olof Edvin",male,20,0,0,350416,7.8542,,S 238 | 1128,0,1,"Warren, Mr. Frank Manley",male,64,1,0,110813,75.25,D37,C 239 | 1129,0,3,"Baccos, Mr. Raffull",male,20,0,0,2679,7.225,,C 240 | 1130,1,2,"Hiltunen, Miss. Marta",female,18,1,1,250650,13,,S 241 | 1131,1,1,"Douglas, Mrs. Walter Donald (Mahala Dutton)",female,48,1,0,PC 17761,106.425,C86,C 242 | 1132,1,1,"Lindstrom, Mrs. Carl Johan (Sigrid Posse)",female,55,0,0,112377,27.7208,,C 243 | 1133,1,2,"Christy, Mrs. (Alice Frances)",female,45,0,2,237789,30,,S 244 | 1134,0,1,"Spedden, Mr. Frederic Oakley",male,45,1,1,16966,134.5,E34,C 245 | 1135,0,3,"Hyman, Mr. Abraham",male,,0,0,3470,7.8875,,S 246 | 1136,0,3,"Johnston, Master. William Arthur Willie""""",male,,1,2,W./C. 6607,23.45,,S 247 | 1137,0,1,"Kenyon, Mr. Frederick R",male,41,1,0,17464,51.8625,D21,S 248 | 1138,1,2,"Karnes, Mrs. J Frank (Claire Bennett)",female,22,0,0,F.C.C. 13534,21,,S 249 | 1139,0,2,"Drew, Mr. James Vivian",male,42,1,1,28220,32.5,,S 250 | 1140,1,2,"Hold, Mrs. Stephen (Annie Margaret Hill)",female,29,1,0,26707,26,,S 251 | 1141,1,3,"Khalil, Mrs. Betros (Zahie Maria"" Elias)""",female,,1,0,2660,14.4542,,C 252 | 1142,1,2,"West, Miss. Barbara J",female,0.92,1,2,C.A. 34651,27.75,,S 253 | 1143,0,3,"Abrahamsson, Mr. Abraham August Johannes",male,20,0,0,SOTON/O2 3101284,7.925,,S 254 | 1144,0,1,"Clark, Mr. Walter Miller",male,27,1,0,13508,136.7792,C89,C 255 | 1145,0,3,"Salander, Mr. Karl Johan",male,24,0,0,7266,9.325,,S 256 | 1146,0,3,"Wenzel, Mr. Linhart",male,32.5,0,0,345775,9.5,,S 257 | 1147,0,3,"MacKay, Mr. George William",male,,0,0,C.A. 42795,7.55,,S 258 | 1148,0,3,"Mahon, Mr. John",male,,0,0,AQ/4 3130,7.75,,Q 259 | 1149,0,3,"Niklasson, Mr. Samuel",male,28,0,0,363611,8.05,,S 260 | 1150,1,2,"Bentham, Miss. Lilian W",female,19,0,0,28404,13,,S 261 | 1151,0,3,"Midtsjo, Mr. Karl Albert",male,21,0,0,345501,7.775,,S 262 | 1152,0,3,"de Messemaeker, Mr. Guillaume Joseph",male,36.5,1,0,345572,17.4,,S 263 | 1153,0,3,"Nilsson, Mr. August Ferdinand",male,21,0,0,350410,7.8542,,S 264 | 1154,1,2,"Wells, Mrs. Arthur Henry (Addie"" Dart Trevaskis)""",female,29,0,2,29103,23,,S 265 | 1155,1,3,"Klasen, Miss. Gertrud Emilia",female,1,1,1,350405,12.1833,,S 266 | 1156,0,2,"Portaluppi, Mr. Emilio Ilario Giuseppe",male,30,0,0,C.A. 34644,12.7375,,C 267 | 1157,0,3,"Lyntakoff, Mr. Stanko",male,,0,0,349235,7.8958,,S 268 | 1158,0,1,"Chisholm, Mr. Roderick Robert Crispin",male,,0,0,112051,0,,S 269 | 1159,0,3,"Warren, Mr. Charles William",male,,0,0,C.A. 49867,7.55,,S 270 | 1160,1,3,"Howard, Miss. May Elizabeth",female,,0,0,A. 2. 39186,8.05,,S 271 | 1161,0,3,"Pokrnic, Mr. Mate",male,17,0,0,315095,8.6625,,S 272 | 1162,0,1,"McCaffry, Mr. Thomas Francis",male,46,0,0,13050,75.2417,C6,C 273 | 1163,0,3,"Fox, Mr. Patrick",male,,0,0,368573,7.75,,Q 274 | 1164,1,1,"Clark, Mrs. Walter Miller (Virginia McDowell)",female,26,1,0,13508,136.7792,C89,C 275 | 1165,1,3,"Lennon, Miss. Mary",female,,1,0,370371,15.5,,Q 276 | 1166,0,3,"Saade, Mr. Jean Nassr",male,,0,0,2676,7.225,,C 277 | 1167,1,2,"Bryhl, Miss. Dagmar Jenny Ingeborg ",female,20,1,0,236853,26,,S 278 | 1168,0,2,"Parker, Mr. Clifford Richard",male,28,0,0,SC 14888,10.5,,S 279 | 1169,0,2,"Faunthorpe, Mr. Harry",male,40,1,0,2926,26,,S 280 | 1170,0,2,"Ware, Mr. John James",male,30,1,0,CA 31352,21,,S 281 | 1171,0,2,"Oxenham, Mr. Percy Thomas",male,22,0,0,W./C. 14260,10.5,,S 282 | 1172,1,3,"Oreskovic, Miss. Jelka",female,23,0,0,315085,8.6625,,S 283 | 1173,0,3,"Peacock, Master. Alfred Edward",male,0.75,1,1,SOTON/O.Q. 3101315,13.775,,S 284 | 1174,1,3,"Fleming, Miss. Honora",female,,0,0,364859,7.75,,Q 285 | 1175,1,3,"Touma, Miss. Maria Youssef",female,9,1,1,2650,15.2458,,C 286 | 1176,1,3,"Rosblom, Miss. Salli Helena",female,2,1,1,370129,20.2125,,S 287 | 1177,0,3,"Dennis, Mr. William",male,36,0,0,A/5 21175,7.25,,S 288 | 1178,0,3,"Franklin, Mr. Charles (Charles Fardon)",male,,0,0,SOTON/O.Q. 3101314,7.25,,S 289 | 1179,0,1,"Snyder, Mr. John Pillsbury",male,24,1,0,21228,82.2667,B45,S 290 | 1180,0,3,"Mardirosian, Mr. Sarkis",male,,0,0,2655,7.2292,F E46,C 291 | 1181,0,3,"Ford, Mr. Arthur",male,,0,0,A/5 1478,8.05,,S 292 | 1182,0,1,"Rheims, Mr. George Alexander Lucien",male,,0,0,PC 17607,39.6,,S 293 | 1183,1,3,"Daly, Miss. Margaret Marcella Maggie""""",female,30,0,0,382650,6.95,,Q 294 | 1184,0,3,"Nasr, Mr. Mustafa",male,,0,0,2652,7.2292,,C 295 | 1185,0,1,"Dodge, Dr. Washington",male,53,1,1,33638,81.8583,A34,S 296 | 1186,0,3,"Wittevrongel, Mr. Camille",male,36,0,0,345771,9.5,,S 297 | 1187,0,3,"Angheloff, Mr. Minko",male,26,0,0,349202,7.8958,,S 298 | 1188,1,2,"Laroche, Miss. Louise",female,1,1,2,SC/Paris 2123,41.5792,,C 299 | 1189,0,3,"Samaan, Mr. Hanna",male,,2,0,2662,21.6792,,C 300 | 1190,0,1,"Loring, Mr. Joseph Holland",male,30,0,0,113801,45.5,,S 301 | 1191,0,3,"Johansson, Mr. Nils",male,29,0,0,347467,7.8542,,S 302 | 1192,0,3,"Olsson, Mr. Oscar Wilhelm",male,32,0,0,347079,7.775,,S 303 | 1193,0,2,"Malachard, Mr. Noel",male,,0,0,237735,15.0458,D,C 304 | 1194,0,2,"Phillips, Mr. Escott Robert",male,43,0,1,S.O./P.P. 2,21,,S 305 | 1195,0,3,"Pokrnic, Mr. Tome",male,24,0,0,315092,8.6625,,S 306 | 1196,1,3,"McCarthy, Miss. Catherine Katie""""",female,,0,0,383123,7.75,,Q 307 | 1197,1,1,"Crosby, Mrs. Edward Gifford (Catherine Elizabeth Halstead)",female,64,1,1,112901,26.55,B26,S 308 | 1198,0,1,"Allison, Mr. Hudson Joshua Creighton",male,30,1,2,113781,151.55,C22 C26,S 309 | 1199,0,3,"Aks, Master. Philip Frank",male,0.83,0,1,392091,9.35,,S 310 | 1200,0,1,"Hays, Mr. Charles Melville",male,55,1,1,12749,93.5,B69,S 311 | 1201,1,3,"Hansen, Mrs. Claus Peter (Jennie L Howard)",female,45,1,0,350026,14.1083,,S 312 | 1202,0,3,"Cacic, Mr. Jego Grga",male,18,0,0,315091,8.6625,,S 313 | 1203,0,3,"Vartanian, Mr. David",male,22,0,0,2658,7.225,,C 314 | 1204,0,3,"Sadowitz, Mr. Harry",male,,0,0,LP 1588,7.575,,S 315 | 1205,1,3,"Carr, Miss. Jeannie",female,37,0,0,368364,7.75,,Q 316 | 1206,1,1,"White, Mrs. John Stuart (Ella Holmes)",female,55,0,0,PC 17760,135.6333,C32,C 317 | 1207,1,3,"Hagardon, Miss. Kate",female,17,0,0,AQ/3. 30631,7.7333,,Q 318 | 1208,0,1,"Spencer, Mr. William Augustus",male,57,1,0,PC 17569,146.5208,B78,C 319 | 1209,0,2,"Rogers, Mr. Reginald Harry",male,19,0,0,28004,10.5,,S 320 | 1210,0,3,"Jonsson, Mr. Nils Hilding",male,27,0,0,350408,7.8542,,S 321 | 1211,0,2,"Jefferys, Mr. Ernest Wilfred",male,22,2,0,C.A. 31029,31.5,,S 322 | 1212,0,3,"Andersson, Mr. Johan Samuel",male,26,0,0,347075,7.775,,S 323 | 1213,0,3,"Krekorian, Mr. Neshan",male,25,0,0,2654,7.2292,F E57,C 324 | 1214,0,2,"Nesson, Mr. Israel",male,26,0,0,244368,13,F2,S 325 | 1215,0,1,"Rowe, Mr. Alfred G",male,33,0,0,113790,26.55,,S 326 | 1216,1,1,"Kreuchen, Miss. Emilie",female,39,0,0,24160,211.3375,,S 327 | 1217,0,3,"Assam, Mr. Ali",male,23,0,0,SOTON/O.Q. 3101309,7.05,,S 328 | 1218,1,2,"Becker, Miss. Ruth Elizabeth",female,12,2,1,230136,39,F4,S 329 | 1219,0,1,"Rosenshine, Mr. George (Mr George Thorne"")""",male,46,0,0,PC 17585,79.2,,C 330 | 1220,0,2,"Clarke, Mr. Charles Valentine",male,29,1,0,2003,26,,S 331 | 1221,0,2,"Enander, Mr. Ingvar",male,21,0,0,236854,13,,S 332 | 1222,1,2,"Davies, Mrs. John Morgan (Elizabeth Agnes Mary White) ",female,48,0,2,C.A. 33112,36.75,,S 333 | 1223,0,1,"Dulles, Mr. William Crothers",male,39,0,0,PC 17580,29.7,A18,C 334 | 1224,0,3,"Thomas, Mr. Tannous",male,,0,0,2684,7.225,,C 335 | 1225,1,3,"Nakid, Mrs. Said (Waika Mary"" Mowad)""",female,19,1,1,2653,15.7417,,C 336 | 1226,0,3,"Cor, Mr. Ivan",male,27,0,0,349229,7.8958,,S 337 | 1227,0,1,"Maguire, Mr. John Edward",male,30,0,0,110469,26,C106,S 338 | 1228,0,2,"de Brito, Mr. Jose Joaquim",male,32,0,0,244360,13,,S 339 | 1229,0,3,"Elias, Mr. Joseph",male,39,0,2,2675,7.2292,,C 340 | 1230,0,2,"Denbury, Mr. Herbert",male,25,0,0,C.A. 31029,31.5,,S 341 | 1231,0,3,"Betros, Master. Seman",male,,0,0,2622,7.2292,,C 342 | 1232,0,2,"Fillbrook, Mr. Joseph Charles",male,18,0,0,C.A. 15185,10.5,,S 343 | 1233,0,3,"Lundstrom, Mr. Thure Edvin",male,32,0,0,350403,7.5792,,S 344 | 1234,0,3,"Sage, Mr. John George",male,,1,9,CA. 2343,69.55,,S 345 | 1235,1,1,"Cardeza, Mrs. James Warburton Martinez (Charlotte Wardle Drake)",female,58,0,1,PC 17755,512.3292,B51 B53 B55,C 346 | 1236,0,3,"van Billiard, Master. James William",male,,1,1,A/5. 851,14.5,,S 347 | 1237,1,3,"Abelseth, Miss. Karen Marie",female,16,0,0,348125,7.65,,S 348 | 1238,0,2,"Botsford, Mr. William Hull",male,26,0,0,237670,13,,S 349 | 1239,1,3,"Whabee, Mrs. George Joseph (Shawneene Abi-Saab)",female,38,0,0,2688,7.2292,,C 350 | 1240,0,2,"Giles, Mr. Ralph",male,24,0,0,248726,13.5,,S 351 | 1241,1,2,"Walcroft, Miss. Nellie",female,31,0,0,F.C.C. 13528,21,,S 352 | 1242,1,1,"Greenfield, Mrs. Leo David (Blanche Strouse)",female,45,0,1,PC 17759,63.3583,D10 D12,C 353 | 1243,0,2,"Stokes, Mr. Philip Joseph",male,25,0,0,F.C.C. 13540,10.5,,S 354 | 1244,0,2,"Dibden, Mr. William",male,18,0,0,S.O.C. 14879,73.5,,S 355 | 1245,0,2,"Herman, Mr. Samuel",male,49,1,2,220845,65,,S 356 | 1246,1,3,"Dean, Miss. Elizabeth Gladys Millvina""""",female,0.17,1,2,C.A. 2315,20.575,,S 357 | 1247,0,1,"Julian, Mr. Henry Forbes",male,50,0,0,113044,26,E60,S 358 | 1248,1,1,"Brown, Mrs. John Murray (Caroline Lane Lamson)",female,59,2,0,11769,51.4792,C101,S 359 | 1249,0,3,"Lockyer, Mr. Edward",male,,0,0,1222,7.8792,,S 360 | 1250,0,3,"O'Keefe, Mr. Patrick",male,,0,0,368402,7.75,,Q 361 | 1251,1,3,"Lindell, Mrs. Edvard Bengtsson (Elin Gerda Persson)",female,30,1,0,349910,15.55,,S 362 | 1252,0,3,"Sage, Master. William Henry",male,14.5,8,2,CA. 2343,69.55,,S 363 | 1253,1,2,"Mallet, Mrs. Albert (Antoinette Magnin)",female,24,1,1,S.C./PARIS 2079,37.0042,,C 364 | 1254,1,2,"Ware, Mrs. John James (Florence Louise Long)",female,31,0,0,CA 31352,21,,S 365 | 1255,0,3,"Strilic, Mr. Ivan",male,27,0,0,315083,8.6625,,S 366 | 1256,1,1,"Harder, Mrs. George Achilles (Dorothy Annan)",female,25,1,0,11765,55.4417,E50,C 367 | 1257,1,3,"Sage, Mrs. John (Annie Bullen)",female,,1,9,CA. 2343,69.55,,S 368 | 1258,0,3,"Caram, Mr. Joseph",male,,1,0,2689,14.4583,,C 369 | 1259,1,3,"Riihivouri, Miss. Susanna Juhantytar Sanni""""",female,22,0,0,3101295,39.6875,,S 370 | 1260,1,1,"Gibson, Mrs. Leonard (Pauline C Boeson)",female,45,0,1,112378,59.4,,C 371 | 1261,0,2,"Pallas y Castello, Mr. Emilio",male,29,0,0,SC/PARIS 2147,13.8583,,C 372 | 1262,0,2,"Giles, Mr. Edgar",male,21,1,0,28133,11.5,,S 373 | 1263,1,1,"Wilson, Miss. Helen Alice",female,31,0,0,16966,134.5,E39 E41,C 374 | 1264,0,1,"Ismay, Mr. Joseph Bruce",male,49,0,0,112058,0,B52 B54 B56,S 375 | 1265,0,2,"Harbeck, Mr. William H",male,44,0,0,248746,13,,S 376 | 1266,1,1,"Dodge, Mrs. Washington (Ruth Vidaver)",female,54,1,1,33638,81.8583,A34,S 377 | 1267,1,1,"Bowen, Miss. Grace Scott",female,45,0,0,PC 17608,262.375,,C 378 | 1268,1,3,"Kink, Miss. Maria",female,22,2,0,315152,8.6625,,S 379 | 1269,0,2,"Cotterill, Mr. Henry Harry""""",male,21,0,0,29107,11.5,,S 380 | 1270,0,1,"Hipkins, Mr. William Edward",male,55,0,0,680,50,C39,S 381 | 1271,0,3,"Asplund, Master. Carl Edgar",male,5,4,2,347077,31.3875,,S 382 | 1272,0,3,"O'Connor, Mr. Patrick",male,,0,0,366713,7.75,,Q 383 | 1273,0,3,"Foley, Mr. Joseph",male,26,0,0,330910,7.8792,,Q 384 | 1274,1,3,"Risien, Mrs. Samuel (Emma)",female,,0,0,364498,14.5,,S 385 | 1275,1,3,"McNamee, Mrs. Neal (Eileen O'Leary)",female,19,1,0,376566,16.1,,S 386 | 1276,0,2,"Wheeler, Mr. Edwin Frederick""""",male,,0,0,SC/PARIS 2159,12.875,,S 387 | 1277,1,2,"Herman, Miss. Kate",female,24,1,2,220845,65,,S 388 | 1278,0,3,"Aronsson, Mr. Ernst Axel Algot",male,24,0,0,349911,7.775,,S 389 | 1279,0,2,"Ashby, Mr. John",male,57,0,0,244346,13,,S 390 | 1280,0,3,"Canavan, Mr. Patrick",male,21,0,0,364858,7.75,,Q 391 | 1281,0,3,"Palsson, Master. Paul Folke",male,6,3,1,349909,21.075,,S 392 | 1282,0,1,"Payne, Mr. Vivian Ponsonby",male,23,0,0,12749,93.5,B24,S 393 | 1283,1,1,"Lines, Mrs. Ernest H (Elizabeth Lindsey James)",female,51,0,1,PC 17592,39.4,D28,S 394 | 1284,0,3,"Abbott, Master. Eugene Joseph",male,13,0,2,C.A. 2673,20.25,,S 395 | 1285,0,2,"Gilbert, Mr. William",male,47,0,0,C.A. 30769,10.5,,S 396 | 1286,0,3,"Kink-Heilmann, Mr. Anton",male,29,3,1,315153,22.025,,S 397 | 1287,1,1,"Smith, Mrs. Lucien Philip (Mary Eloise Hughes)",female,18,1,0,13695,60,C31,S 398 | 1288,0,3,"Colbert, Mr. Patrick",male,24,0,0,371109,7.25,,Q 399 | 1289,1,1,"Frolicher-Stehli, Mrs. Maxmillian (Margaretha Emerentia Stehli)",female,48,1,1,13567,79.2,B41,C 400 | 1290,0,3,"Larsson-Rondberg, Mr. Edvard A",male,22,0,0,347065,7.775,,S 401 | 1291,0,3,"Conlon, Mr. Thomas Henry",male,31,0,0,21332,7.7333,,Q 402 | 1292,1,1,"Bonnell, Miss. Caroline",female,30,0,0,36928,164.8667,C7,S 403 | 1293,0,2,"Gale, Mr. Harry",male,38,1,0,28664,21,,S 404 | 1294,1,1,"Gibson, Miss. Dorothy Winifred",female,22,0,1,112378,59.4,,C 405 | 1295,0,1,"Carrau, Mr. Jose Pedro",male,17,0,0,113059,47.1,,S 406 | 1296,0,1,"Frauenthal, Mr. Isaac Gerald",male,43,1,0,17765,27.7208,D40,C 407 | 1297,0,2,"Nourney, Mr. Alfred (Baron von Drachstedt"")""",male,20,0,0,SC/PARIS 2166,13.8625,D38,C 408 | 1298,0,2,"Ware, Mr. William Jeffery",male,23,1,0,28666,10.5,,S 409 | 1299,0,1,"Widener, Mr. George Dunton",male,50,1,1,113503,211.5,C80,C 410 | 1300,1,3,"Riordan, Miss. Johanna Hannah""""",female,,0,0,334915,7.7208,,Q 411 | 1301,1,3,"Peacock, Miss. Treasteall",female,3,1,1,SOTON/O.Q. 3101315,13.775,,S 412 | 1302,1,3,"Naughton, Miss. Hannah",female,,0,0,365237,7.75,,Q 413 | 1303,1,1,"Minahan, Mrs. William Edward (Lillian E Thorpe)",female,37,1,0,19928,90,C78,Q 414 | 1304,1,3,"Henriksson, Miss. Jenny Lovisa",female,28,0,0,347086,7.775,,S 415 | 1305,0,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.05,,S 416 | 1306,1,1,"Oliva y Ocana, Dona. Fermina",female,39,0,0,PC 17758,108.9,C105,C 417 | 1307,0,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.25,,S 418 | 1308,0,3,"Ware, Mr. Frederick",male,,0,0,359309,8.05,,S 419 | 1309,0,3,"Peter, Master. Michael J",male,,1,1,2668,22.3583,,C 420 | -------------------------------------------------------------------------------- /Data Visualization/Notebooks/data/titanic.csv: -------------------------------------------------------------------------------- 1 | PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked 2 | 892,0,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q 3 | 893,1,3,"Wilkes, Mrs. James (Ellen Needs)",female,47,1,0,363272,7,,S 4 | 894,0,2,"Myles, Mr. Thomas Francis",male,62,0,0,240276,9.6875,,Q 5 | 895,0,3,"Wirz, Mr. Albert",male,27,0,0,315154,8.6625,,S 6 | 896,1,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22,1,1,3101298,12.2875,,S 7 | 897,0,3,"Svensson, Mr. Johan Cervin",male,14,0,0,7538,9.225,,S 8 | 898,1,3,"Connolly, Miss. Kate",female,30,0,0,330972,7.6292,,Q 9 | 899,0,2,"Caldwell, Mr. Albert Francis",male,26,1,1,248738,29,,S 10 | 900,1,3,"Abrahim, Mrs. Joseph (Sophie Halaut Easu)",female,18,0,0,2657,7.2292,,C 11 | 901,0,3,"Davies, Mr. John Samuel",male,21,2,0,A/4 48871,24.15,,S 12 | 902,0,3,"Ilieff, Mr. Ylio",male,,0,0,349220,7.8958,,S 13 | 903,0,1,"Jones, Mr. Charles Cresson",male,46,0,0,694,26,,S 14 | 904,1,1,"Snyder, Mrs. John Pillsbury (Nelle Stevenson)",female,23,1,0,21228,82.2667,B45,S 15 | 905,0,2,"Howard, Mr. Benjamin",male,63,1,0,24065,26,,S 16 | 906,1,1,"Chaffee, Mrs. Herbert Fuller (Carrie Constance Toogood)",female,47,1,0,W.E.P. 5734,61.175,E31,S 17 | 907,1,2,"del Carlo, Mrs. Sebastiano (Argenia Genovesi)",female,24,1,0,SC/PARIS 2167,27.7208,,C 18 | 908,0,2,"Keane, Mr. Daniel",male,35,0,0,233734,12.35,,Q 19 | 909,0,3,"Assaf, Mr. Gerios",male,21,0,0,2692,7.225,,C 20 | 910,1,3,"Ilmakangas, Miss. Ida Livija",female,27,1,0,STON/O2. 3101270,7.925,,S 21 | 911,1,3,"Assaf Khalil, Mrs. Mariana (Miriam"")""",female,45,0,0,2696,7.225,,C 22 | 912,0,1,"Rothschild, Mr. Martin",male,55,1,0,PC 17603,59.4,,C 23 | 913,0,3,"Olsen, Master. Artur Karl",male,9,0,1,C 17368,3.1708,,S 24 | 914,1,1,"Flegenheim, Mrs. Alfred (Antoinette)",female,,0,0,PC 17598,31.6833,,S 25 | 915,0,1,"Williams, Mr. Richard Norris II",male,21,0,1,PC 17597,61.3792,,C 26 | 916,1,1,"Ryerson, Mrs. Arthur Larned (Emily Maria Borie)",female,48,1,3,PC 17608,262.375,B57 B59 B63 B66,C 27 | 917,0,3,"Robins, Mr. Alexander A",male,50,1,0,A/5. 3337,14.5,,S 28 | 918,1,1,"Ostby, Miss. Helene Ragnhild",female,22,0,1,113509,61.9792,B36,C 29 | 919,0,3,"Daher, Mr. Shedid",male,22.5,0,0,2698,7.225,,C 30 | 920,0,1,"Brady, Mr. John Bertram",male,41,0,0,113054,30.5,A21,S 31 | 921,0,3,"Samaan, Mr. Elias",male,,2,0,2662,21.6792,,C 32 | 922,0,2,"Louch, Mr. Charles Alexander",male,50,1,0,SC/AH 3085,26,,S 33 | 923,0,2,"Jefferys, Mr. Clifford Thomas",male,24,2,0,C.A. 31029,31.5,,S 34 | 924,1,3,"Dean, Mrs. Bertram (Eva Georgetta Light)",female,33,1,2,C.A. 2315,20.575,,S 35 | 925,1,3,"Johnston, Mrs. Andrew G (Elizabeth Lily"" Watson)""",female,,1,2,W./C. 6607,23.45,,S 36 | 926,0,1,"Mock, Mr. Philipp Edmund",male,30,1,0,13236,57.75,C78,C 37 | 927,0,3,"Katavelas, Mr. Vassilios (Catavelas Vassilios"")""",male,18.5,0,0,2682,7.2292,,C 38 | 928,1,3,"Roth, Miss. Sarah A",female,,0,0,342712,8.05,,S 39 | 929,1,3,"Cacic, Miss. Manda",female,21,0,0,315087,8.6625,,S 40 | 930,0,3,"Sap, Mr. Julius",male,25,0,0,345768,9.5,,S 41 | 931,0,3,"Hee, Mr. Ling",male,,0,0,1601,56.4958,,S 42 | 932,0,3,"Karun, Mr. Franz",male,39,0,1,349256,13.4167,,C 43 | 933,0,1,"Franklin, Mr. Thomas Parham",male,,0,0,113778,26.55,D34,S 44 | 934,0,3,"Goldsmith, Mr. Nathan",male,41,0,0,SOTON/O.Q. 3101263,7.85,,S 45 | 935,1,2,"Corbett, Mrs. Walter H (Irene Colvin)",female,30,0,0,237249,13,,S 46 | 936,1,1,"Kimball, Mrs. Edwin Nelson Jr (Gertrude Parsons)",female,45,1,0,11753,52.5542,D19,S 47 | 937,0,3,"Peltomaki, Mr. Nikolai Johannes",male,25,0,0,STON/O 2. 3101291,7.925,,S 48 | 938,0,1,"Chevre, Mr. Paul Romaine",male,45,0,0,PC 17594,29.7,A9,C 49 | 939,0,3,"Shaughnessy, Mr. Patrick",male,,0,0,370374,7.75,,Q 50 | 940,1,1,"Bucknell, Mrs. William Robert (Emma Eliza Ward)",female,60,0,0,11813,76.2917,D15,C 51 | 941,1,3,"Coutts, Mrs. William (Winnie Minnie"" Treanor)""",female,36,0,2,C.A. 37671,15.9,,S 52 | 942,0,1,"Smith, Mr. Lucien Philip",male,24,1,0,13695,60,C31,S 53 | 943,0,2,"Pulbaum, Mr. Franz",male,27,0,0,SC/PARIS 2168,15.0333,,C 54 | 944,1,2,"Hocking, Miss. Ellen Nellie""""",female,20,2,1,29105,23,,S 55 | 945,1,1,"Fortune, Miss. Ethel Flora",female,28,3,2,19950,263,C23 C25 C27,S 56 | 946,0,2,"Mangiavacchi, Mr. Serafino Emilio",male,,0,0,SC/A.3 2861,15.5792,,C 57 | 947,0,3,"Rice, Master. Albert",male,10,4,1,382652,29.125,,Q 58 | 948,0,3,"Cor, Mr. Bartol",male,35,0,0,349230,7.8958,,S 59 | 949,0,3,"Abelseth, Mr. Olaus Jorgensen",male,25,0,0,348122,7.65,F G63,S 60 | 950,0,3,"Davison, Mr. Thomas Henry",male,,1,0,386525,16.1,,S 61 | 951,1,1,"Chaudanson, Miss. Victorine",female,36,0,0,PC 17608,262.375,B61,C 62 | 952,0,3,"Dika, Mr. Mirko",male,17,0,0,349232,7.8958,,S 63 | 953,0,2,"McCrae, Mr. Arthur Gordon",male,32,0,0,237216,13.5,,S 64 | 954,0,3,"Bjorklund, Mr. Ernst Herbert",male,18,0,0,347090,7.75,,S 65 | 955,1,3,"Bradley, Miss. Bridget Delia",female,22,0,0,334914,7.725,,Q 66 | 956,0,1,"Ryerson, Master. John Borie",male,13,2,2,PC 17608,262.375,B57 B59 B63 B66,C 67 | 957,1,2,"Corey, Mrs. Percy C (Mary Phyllis Elizabeth Miller)",female,,0,0,F.C.C. 13534,21,,S 68 | 958,1,3,"Burns, Miss. Mary Delia",female,18,0,0,330963,7.8792,,Q 69 | 959,0,1,"Moore, Mr. Clarence Bloomfield",male,47,0,0,113796,42.4,,S 70 | 960,0,1,"Tucker, Mr. Gilbert Milligan Jr",male,31,0,0,2543,28.5375,C53,C 71 | 961,1,1,"Fortune, Mrs. Mark (Mary McDougald)",female,60,1,4,19950,263,C23 C25 C27,S 72 | 962,1,3,"Mulvihill, Miss. Bertha E",female,24,0,0,382653,7.75,,Q 73 | 963,0,3,"Minkoff, Mr. Lazar",male,21,0,0,349211,7.8958,,S 74 | 964,1,3,"Nieminen, Miss. Manta Josefina",female,29,0,0,3101297,7.925,,S 75 | 965,0,1,"Ovies y Rodriguez, Mr. Servando",male,28.5,0,0,PC 17562,27.7208,D43,C 76 | 966,1,1,"Geiger, Miss. Amalie",female,35,0,0,113503,211.5,C130,C 77 | 967,0,1,"Keeping, Mr. Edwin",male,32.5,0,0,113503,211.5,C132,C 78 | 968,0,3,"Miles, Mr. Frank",male,,0,0,359306,8.05,,S 79 | 969,1,1,"Cornell, Mrs. Robert Clifford (Malvina Helen Lamson)",female,55,2,0,11770,25.7,C101,S 80 | 970,0,2,"Aldworth, Mr. Charles Augustus",male,30,0,0,248744,13,,S 81 | 971,1,3,"Doyle, Miss. Elizabeth",female,24,0,0,368702,7.75,,Q 82 | 972,0,3,"Boulos, Master. Akar",male,6,1,1,2678,15.2458,,C 83 | 973,0,1,"Straus, Mr. Isidor",male,67,1,0,PC 17483,221.7792,C55 C57,S 84 | 974,0,1,"Case, Mr. Howard Brown",male,49,0,0,19924,26,,S 85 | 975,0,3,"Demetri, Mr. Marinko",male,,0,0,349238,7.8958,,S 86 | 976,0,2,"Lamb, Mr. John Joseph",male,,0,0,240261,10.7083,,Q 87 | 977,0,3,"Khalil, Mr. Betros",male,,1,0,2660,14.4542,,C 88 | 978,1,3,"Barry, Miss. Julia",female,27,0,0,330844,7.8792,,Q 89 | 979,1,3,"Badman, Miss. Emily Louisa",female,18,0,0,A/4 31416,8.05,,S 90 | 980,1,3,"O'Donoghue, Ms. Bridget",female,,0,0,364856,7.75,,Q 91 | 981,0,2,"Wells, Master. Ralph Lester",male,2,1,1,29103,23,,S 92 | 982,1,3,"Dyker, Mrs. Adolf Fredrik (Anna Elisabeth Judith Andersson)",female,22,1,0,347072,13.9,,S 93 | 983,0,3,"Pedersen, Mr. Olaf",male,,0,0,345498,7.775,,S 94 | 984,1,1,"Davidson, Mrs. Thornton (Orian Hays)",female,27,1,2,F.C. 12750,52,B71,S 95 | 985,0,3,"Guest, Mr. Robert",male,,0,0,376563,8.05,,S 96 | 986,0,1,"Birnbaum, Mr. Jakob",male,25,0,0,13905,26,,C 97 | 987,0,3,"Tenglin, Mr. Gunnar Isidor",male,25,0,0,350033,7.7958,,S 98 | 988,1,1,"Cavendish, Mrs. Tyrell William (Julia Florence Siegel)",female,76,1,0,19877,78.85,C46,S 99 | 989,0,3,"Makinen, Mr. Kalle Edvard",male,29,0,0,STON/O 2. 3101268,7.925,,S 100 | 990,1,3,"Braf, Miss. Elin Ester Maria",female,20,0,0,347471,7.8542,,S 101 | 991,0,3,"Nancarrow, Mr. William Henry",male,33,0,0,A./5. 3338,8.05,,S 102 | 992,1,1,"Stengel, Mrs. Charles Emil Henry (Annie May Morris)",female,43,1,0,11778,55.4417,C116,C 103 | 993,0,2,"Weisz, Mr. Leopold",male,27,1,0,228414,26,,S 104 | 994,0,3,"Foley, Mr. William",male,,0,0,365235,7.75,,Q 105 | 995,0,3,"Johansson Palmquist, Mr. Oskar Leander",male,26,0,0,347070,7.775,,S 106 | 996,1,3,"Thomas, Mrs. Alexander (Thamine Thelma"")""",female,16,1,1,2625,8.5167,,C 107 | 997,0,3,"Holthen, Mr. Johan Martin",male,28,0,0,C 4001,22.525,,S 108 | 998,0,3,"Buckley, Mr. Daniel",male,21,0,0,330920,7.8208,,Q 109 | 999,0,3,"Ryan, Mr. Edward",male,,0,0,383162,7.75,,Q 110 | 1000,0,3,"Willer, Mr. Aaron (Abi Weller"")""",male,,0,0,3410,8.7125,,S 111 | 1001,0,2,"Swane, Mr. George",male,18.5,0,0,248734,13,F,S 112 | 1002,0,2,"Stanton, Mr. Samuel Ward",male,41,0,0,237734,15.0458,,C 113 | 1003,1,3,"Shine, Miss. Ellen Natalia",female,,0,0,330968,7.7792,,Q 114 | 1004,1,1,"Evans, Miss. Edith Corse",female,36,0,0,PC 17531,31.6792,A29,C 115 | 1005,1,3,"Buckley, Miss. Katherine",female,18.5,0,0,329944,7.2833,,Q 116 | 1006,1,1,"Straus, Mrs. Isidor (Rosalie Ida Blun)",female,63,1,0,PC 17483,221.7792,C55 C57,S 117 | 1007,0,3,"Chronopoulos, Mr. Demetrios",male,18,1,0,2680,14.4542,,C 118 | 1008,0,3,"Thomas, Mr. John",male,,0,0,2681,6.4375,,C 119 | 1009,1,3,"Sandstrom, Miss. Beatrice Irene",female,1,1,1,PP 9549,16.7,G6,S 120 | 1010,0,1,"Beattie, Mr. Thomson",male,36,0,0,13050,75.2417,C6,C 121 | 1011,1,2,"Chapman, Mrs. John Henry (Sara Elizabeth Lawry)",female,29,1,0,SC/AH 29037,26,,S 122 | 1012,1,2,"Watt, Miss. Bertha J",female,12,0,0,C.A. 33595,15.75,,S 123 | 1013,0,3,"Kiernan, Mr. John",male,,1,0,367227,7.75,,Q 124 | 1014,1,1,"Schabert, Mrs. Paul (Emma Mock)",female,35,1,0,13236,57.75,C28,C 125 | 1015,0,3,"Carver, Mr. Alfred John",male,28,0,0,392095,7.25,,S 126 | 1016,0,3,"Kennedy, Mr. John",male,,0,0,368783,7.75,,Q 127 | 1017,1,3,"Cribb, Miss. Laura Alice",female,17,0,1,371362,16.1,,S 128 | 1018,0,3,"Brobeck, Mr. Karl Rudolf",male,22,0,0,350045,7.7958,,S 129 | 1019,1,3,"McCoy, Miss. Alicia",female,,2,0,367226,23.25,,Q 130 | 1020,0,2,"Bowenur, Mr. Solomon",male,42,0,0,211535,13,,S 131 | 1021,0,3,"Petersen, Mr. Marius",male,24,0,0,342441,8.05,,S 132 | 1022,0,3,"Spinner, Mr. Henry John",male,32,0,0,STON/OQ. 369943,8.05,,S 133 | 1023,0,1,"Gracie, Col. Archibald IV",male,53,0,0,113780,28.5,C51,C 134 | 1024,1,3,"Lefebre, Mrs. Frank (Frances)",female,,0,4,4133,25.4667,,S 135 | 1025,0,3,"Thomas, Mr. Charles P",male,,1,0,2621,6.4375,,C 136 | 1026,0,3,"Dintcheff, Mr. Valtcho",male,43,0,0,349226,7.8958,,S 137 | 1027,0,3,"Carlsson, Mr. Carl Robert",male,24,0,0,350409,7.8542,,S 138 | 1028,0,3,"Zakarian, Mr. Mapriededer",male,26.5,0,0,2656,7.225,,C 139 | 1029,0,2,"Schmidt, Mr. August",male,26,0,0,248659,13,,S 140 | 1030,1,3,"Drapkin, Miss. Jennie",female,23,0,0,SOTON/OQ 392083,8.05,,S 141 | 1031,0,3,"Goodwin, Mr. Charles Frederick",male,40,1,6,CA 2144,46.9,,S 142 | 1032,1,3,"Goodwin, Miss. Jessie Allis",female,10,5,2,CA 2144,46.9,,S 143 | 1033,1,1,"Daniels, Miss. Sarah",female,33,0,0,113781,151.55,,S 144 | 1034,0,1,"Ryerson, Mr. Arthur Larned",male,61,1,3,PC 17608,262.375,B57 B59 B63 B66,C 145 | 1035,0,2,"Beauchamp, Mr. Henry James",male,28,0,0,244358,26,,S 146 | 1036,0,1,"Lindeberg-Lind, Mr. Erik Gustaf (Mr Edward Lingrey"")""",male,42,0,0,17475,26.55,,S 147 | 1037,0,3,"Vander Planke, Mr. Julius",male,31,3,0,345763,18,,S 148 | 1038,0,1,"Hilliard, Mr. Herbert Henry",male,,0,0,17463,51.8625,E46,S 149 | 1039,0,3,"Davies, Mr. Evan",male,22,0,0,SC/A4 23568,8.05,,S 150 | 1040,0,1,"Crafton, Mr. John Bertram",male,,0,0,113791,26.55,,S 151 | 1041,0,2,"Lahtinen, Rev. William",male,30,1,1,250651,26,,S 152 | 1042,1,1,"Earnshaw, Mrs. Boulton (Olive Potter)",female,23,0,1,11767,83.1583,C54,C 153 | 1043,0,3,"Matinoff, Mr. Nicola",male,,0,0,349255,7.8958,,C 154 | 1044,0,3,"Storey, Mr. Thomas",male,60.5,0,0,3701,,,S 155 | 1045,1,3,"Klasen, Mrs. (Hulda Kristina Eugenia Lofqvist)",female,36,0,2,350405,12.1833,,S 156 | 1046,0,3,"Asplund, Master. Filip Oscar",male,13,4,2,347077,31.3875,,S 157 | 1047,0,3,"Duquemin, Mr. Joseph",male,24,0,0,S.O./P.P. 752,7.55,,S 158 | 1048,1,1,"Bird, Miss. Ellen",female,29,0,0,PC 17483,221.7792,C97,S 159 | 1049,1,3,"Lundin, Miss. Olga Elida",female,23,0,0,347469,7.8542,,S 160 | 1050,0,1,"Borebank, Mr. John James",male,42,0,0,110489,26.55,D22,S 161 | 1051,1,3,"Peacock, Mrs. Benjamin (Edith Nile)",female,26,0,2,SOTON/O.Q. 3101315,13.775,,S 162 | 1052,1,3,"Smyth, Miss. Julia",female,,0,0,335432,7.7333,,Q 163 | 1053,0,3,"Touma, Master. Georges Youssef",male,7,1,1,2650,15.2458,,C 164 | 1054,1,2,"Wright, Miss. Marion",female,26,0,0,220844,13.5,,S 165 | 1055,0,3,"Pearce, Mr. Ernest",male,,0,0,343271,7,,S 166 | 1056,0,2,"Peruschitz, Rev. Joseph Maria",male,41,0,0,237393,13,,S 167 | 1057,1,3,"Kink-Heilmann, Mrs. Anton (Luise Heilmann)",female,26,1,1,315153,22.025,,S 168 | 1058,0,1,"Brandeis, Mr. Emil",male,48,0,0,PC 17591,50.4958,B10,C 169 | 1059,0,3,"Ford, Mr. Edward Watson",male,18,2,2,W./C. 6608,34.375,,S 170 | 1060,1,1,"Cassebeer, Mrs. Henry Arthur Jr (Eleanor Genevieve Fosdick)",female,,0,0,17770,27.7208,,C 171 | 1061,1,3,"Hellstrom, Miss. Hilda Maria",female,22,0,0,7548,8.9625,,S 172 | 1062,0,3,"Lithman, Mr. Simon",male,,0,0,S.O./P.P. 251,7.55,,S 173 | 1063,0,3,"Zakarian, Mr. Ortin",male,27,0,0,2670,7.225,,C 174 | 1064,0,3,"Dyker, Mr. Adolf Fredrik",male,23,1,0,347072,13.9,,S 175 | 1065,0,3,"Torfa, Mr. Assad",male,,0,0,2673,7.2292,,C 176 | 1066,0,3,"Asplund, Mr. Carl Oscar Vilhelm Gustafsson",male,40,1,5,347077,31.3875,,S 177 | 1067,1,2,"Brown, Miss. Edith Eileen",female,15,0,2,29750,39,,S 178 | 1068,1,2,"Sincock, Miss. Maude",female,20,0,0,C.A. 33112,36.75,,S 179 | 1069,0,1,"Stengel, Mr. Charles Emil Henry",male,54,1,0,11778,55.4417,C116,C 180 | 1070,1,2,"Becker, Mrs. Allen Oliver (Nellie E Baumgardner)",female,36,0,3,230136,39,F4,S 181 | 1071,1,1,"Compton, Mrs. Alexander Taylor (Mary Eliza Ingersoll)",female,64,0,2,PC 17756,83.1583,E45,C 182 | 1072,0,2,"McCrie, Mr. James Matthew",male,30,0,0,233478,13,,S 183 | 1073,0,1,"Compton, Mr. Alexander Taylor Jr",male,37,1,1,PC 17756,83.1583,E52,C 184 | 1074,1,1,"Marvin, Mrs. Daniel Warner (Mary Graham Carmichael Farquarson)",female,18,1,0,113773,53.1,D30,S 185 | 1075,0,3,"Lane, Mr. Patrick",male,,0,0,7935,7.75,,Q 186 | 1076,1,1,"Douglas, Mrs. Frederick Charles (Mary Helene Baxter)",female,27,1,1,PC 17558,247.5208,B58 B60,C 187 | 1077,0,2,"Maybery, Mr. Frank Hubert",male,40,0,0,239059,16,,S 188 | 1078,1,2,"Phillips, Miss. Alice Frances Louisa",female,21,0,1,S.O./P.P. 2,21,,S 189 | 1079,0,3,"Davies, Mr. Joseph",male,17,2,0,A/4 48873,8.05,,S 190 | 1080,1,3,"Sage, Miss. Ada",female,,8,2,CA. 2343,69.55,,S 191 | 1081,0,2,"Veal, Mr. James",male,40,0,0,28221,13,,S 192 | 1082,0,2,"Angle, Mr. William A",male,34,1,0,226875,26,,S 193 | 1083,0,1,"Salomon, Mr. Abraham L",male,,0,0,111163,26,,S 194 | 1084,0,3,"van Billiard, Master. Walter John",male,11.5,1,1,A/5. 851,14.5,,S 195 | 1085,0,2,"Lingane, Mr. John",male,61,0,0,235509,12.35,,Q 196 | 1086,0,2,"Drew, Master. Marshall Brines",male,8,0,2,28220,32.5,,S 197 | 1087,0,3,"Karlsson, Mr. Julius Konrad Eugen",male,33,0,0,347465,7.8542,,S 198 | 1088,0,1,"Spedden, Master. Robert Douglas",male,6,0,2,16966,134.5,E34,C 199 | 1089,1,3,"Nilsson, Miss. Berta Olivia",female,18,0,0,347066,7.775,,S 200 | 1090,0,2,"Baimbrigge, Mr. Charles Robert",male,23,0,0,C.A. 31030,10.5,,S 201 | 1091,1,3,"Rasmussen, Mrs. (Lena Jacobsen Solvang)",female,,0,0,65305,8.1125,,S 202 | 1092,1,3,"Murphy, Miss. Nora",female,,0,0,36568,15.5,,Q 203 | 1093,0,3,"Danbom, Master. Gilbert Sigvard Emanuel",male,0.33,0,2,347080,14.4,,S 204 | 1094,0,1,"Astor, Col. John Jacob",male,47,1,0,PC 17757,227.525,C62 C64,C 205 | 1095,1,2,"Quick, Miss. Winifred Vera",female,8,1,1,26360,26,,S 206 | 1096,0,2,"Andrew, Mr. Frank Thomas",male,25,0,0,C.A. 34050,10.5,,S 207 | 1097,0,1,"Omont, Mr. Alfred Fernand",male,,0,0,F.C. 12998,25.7417,,C 208 | 1098,1,3,"McGowan, Miss. Katherine",female,35,0,0,9232,7.75,,Q 209 | 1099,0,2,"Collett, Mr. Sidney C Stuart",male,24,0,0,28034,10.5,,S 210 | 1100,1,1,"Rosenbaum, Miss. Edith Louise",female,33,0,0,PC 17613,27.7208,A11,C 211 | 1101,0,3,"Delalic, Mr. Redjo",male,25,0,0,349250,7.8958,,S 212 | 1102,0,3,"Andersen, Mr. Albert Karvin",male,32,0,0,C 4001,22.525,,S 213 | 1103,0,3,"Finoli, Mr. Luigi",male,,0,0,SOTON/O.Q. 3101308,7.05,,S 214 | 1104,0,2,"Deacon, Mr. Percy William",male,17,0,0,S.O.C. 14879,73.5,,S 215 | 1105,1,2,"Howard, Mrs. Benjamin (Ellen Truelove Arman)",female,60,1,0,24065,26,,S 216 | 1106,1,3,"Andersson, Miss. Ida Augusta Margareta",female,38,4,2,347091,7.775,,S 217 | 1107,0,1,"Head, Mr. Christopher",male,42,0,0,113038,42.5,B11,S 218 | 1108,1,3,"Mahon, Miss. Bridget Delia",female,,0,0,330924,7.8792,,Q 219 | 1109,0,1,"Wick, Mr. George Dennick",male,57,1,1,36928,164.8667,,S 220 | 1110,1,1,"Widener, Mrs. George Dunton (Eleanor Elkins)",female,50,1,1,113503,211.5,C80,C 221 | 1111,0,3,"Thomson, Mr. Alexander Morrison",male,,0,0,32302,8.05,,S 222 | 1112,1,2,"Duran y More, Miss. Florentina",female,30,1,0,SC/PARIS 2148,13.8583,,C 223 | 1113,0,3,"Reynolds, Mr. Harold J",male,21,0,0,342684,8.05,,S 224 | 1114,1,2,"Cook, Mrs. (Selena Rogers)",female,22,0,0,W./C. 14266,10.5,F33,S 225 | 1115,0,3,"Karlsson, Mr. Einar Gervasius",male,21,0,0,350053,7.7958,,S 226 | 1116,1,1,"Candee, Mrs. Edward (Helen Churchill Hungerford)",female,53,0,0,PC 17606,27.4458,,C 227 | 1117,1,3,"Moubarek, Mrs. George (Omine Amenia"" Alexander)""",female,,0,2,2661,15.2458,,C 228 | 1118,0,3,"Asplund, Mr. Johan Charles",male,23,0,0,350054,7.7958,,S 229 | 1119,1,3,"McNeill, Miss. Bridget",female,,0,0,370368,7.75,,Q 230 | 1120,0,3,"Everett, Mr. Thomas James",male,40.5,0,0,C.A. 6212,15.1,,S 231 | 1121,0,2,"Hocking, Mr. Samuel James Metcalfe",male,36,0,0,242963,13,,S 232 | 1122,0,2,"Sweet, Mr. George Frederick",male,14,0,0,220845,65,,S 233 | 1123,1,1,"Willard, Miss. Constance",female,21,0,0,113795,26.55,,S 234 | 1124,0,3,"Wiklund, Mr. Karl Johan",male,21,1,0,3101266,6.4958,,S 235 | 1125,0,3,"Linehan, Mr. Michael",male,,0,0,330971,7.8792,,Q 236 | 1126,0,1,"Cumings, Mr. John Bradley",male,39,1,0,PC 17599,71.2833,C85,C 237 | 1127,0,3,"Vendel, Mr. Olof Edvin",male,20,0,0,350416,7.8542,,S 238 | 1128,0,1,"Warren, Mr. Frank Manley",male,64,1,0,110813,75.25,D37,C 239 | 1129,0,3,"Baccos, Mr. Raffull",male,20,0,0,2679,7.225,,C 240 | 1130,1,2,"Hiltunen, Miss. Marta",female,18,1,1,250650,13,,S 241 | 1131,1,1,"Douglas, Mrs. Walter Donald (Mahala Dutton)",female,48,1,0,PC 17761,106.425,C86,C 242 | 1132,1,1,"Lindstrom, Mrs. Carl Johan (Sigrid Posse)",female,55,0,0,112377,27.7208,,C 243 | 1133,1,2,"Christy, Mrs. (Alice Frances)",female,45,0,2,237789,30,,S 244 | 1134,0,1,"Spedden, Mr. Frederic Oakley",male,45,1,1,16966,134.5,E34,C 245 | 1135,0,3,"Hyman, Mr. Abraham",male,,0,0,3470,7.8875,,S 246 | 1136,0,3,"Johnston, Master. William Arthur Willie""""",male,,1,2,W./C. 6607,23.45,,S 247 | 1137,0,1,"Kenyon, Mr. Frederick R",male,41,1,0,17464,51.8625,D21,S 248 | 1138,1,2,"Karnes, Mrs. J Frank (Claire Bennett)",female,22,0,0,F.C.C. 13534,21,,S 249 | 1139,0,2,"Drew, Mr. James Vivian",male,42,1,1,28220,32.5,,S 250 | 1140,1,2,"Hold, Mrs. Stephen (Annie Margaret Hill)",female,29,1,0,26707,26,,S 251 | 1141,1,3,"Khalil, Mrs. Betros (Zahie Maria"" Elias)""",female,,1,0,2660,14.4542,,C 252 | 1142,1,2,"West, Miss. Barbara J",female,0.92,1,2,C.A. 34651,27.75,,S 253 | 1143,0,3,"Abrahamsson, Mr. Abraham August Johannes",male,20,0,0,SOTON/O2 3101284,7.925,,S 254 | 1144,0,1,"Clark, Mr. Walter Miller",male,27,1,0,13508,136.7792,C89,C 255 | 1145,0,3,"Salander, Mr. Karl Johan",male,24,0,0,7266,9.325,,S 256 | 1146,0,3,"Wenzel, Mr. Linhart",male,32.5,0,0,345775,9.5,,S 257 | 1147,0,3,"MacKay, Mr. George William",male,,0,0,C.A. 42795,7.55,,S 258 | 1148,0,3,"Mahon, Mr. John",male,,0,0,AQ/4 3130,7.75,,Q 259 | 1149,0,3,"Niklasson, Mr. Samuel",male,28,0,0,363611,8.05,,S 260 | 1150,1,2,"Bentham, Miss. Lilian W",female,19,0,0,28404,13,,S 261 | 1151,0,3,"Midtsjo, Mr. Karl Albert",male,21,0,0,345501,7.775,,S 262 | 1152,0,3,"de Messemaeker, Mr. Guillaume Joseph",male,36.5,1,0,345572,17.4,,S 263 | 1153,0,3,"Nilsson, Mr. August Ferdinand",male,21,0,0,350410,7.8542,,S 264 | 1154,1,2,"Wells, Mrs. Arthur Henry (Addie"" Dart Trevaskis)""",female,29,0,2,29103,23,,S 265 | 1155,1,3,"Klasen, Miss. Gertrud Emilia",female,1,1,1,350405,12.1833,,S 266 | 1156,0,2,"Portaluppi, Mr. Emilio Ilario Giuseppe",male,30,0,0,C.A. 34644,12.7375,,C 267 | 1157,0,3,"Lyntakoff, Mr. Stanko",male,,0,0,349235,7.8958,,S 268 | 1158,0,1,"Chisholm, Mr. Roderick Robert Crispin",male,,0,0,112051,0,,S 269 | 1159,0,3,"Warren, Mr. Charles William",male,,0,0,C.A. 49867,7.55,,S 270 | 1160,1,3,"Howard, Miss. May Elizabeth",female,,0,0,A. 2. 39186,8.05,,S 271 | 1161,0,3,"Pokrnic, Mr. Mate",male,17,0,0,315095,8.6625,,S 272 | 1162,0,1,"McCaffry, Mr. Thomas Francis",male,46,0,0,13050,75.2417,C6,C 273 | 1163,0,3,"Fox, Mr. Patrick",male,,0,0,368573,7.75,,Q 274 | 1164,1,1,"Clark, Mrs. Walter Miller (Virginia McDowell)",female,26,1,0,13508,136.7792,C89,C 275 | 1165,1,3,"Lennon, Miss. Mary",female,,1,0,370371,15.5,,Q 276 | 1166,0,3,"Saade, Mr. Jean Nassr",male,,0,0,2676,7.225,,C 277 | 1167,1,2,"Bryhl, Miss. Dagmar Jenny Ingeborg ",female,20,1,0,236853,26,,S 278 | 1168,0,2,"Parker, Mr. Clifford Richard",male,28,0,0,SC 14888,10.5,,S 279 | 1169,0,2,"Faunthorpe, Mr. Harry",male,40,1,0,2926,26,,S 280 | 1170,0,2,"Ware, Mr. John James",male,30,1,0,CA 31352,21,,S 281 | 1171,0,2,"Oxenham, Mr. Percy Thomas",male,22,0,0,W./C. 14260,10.5,,S 282 | 1172,1,3,"Oreskovic, Miss. Jelka",female,23,0,0,315085,8.6625,,S 283 | 1173,0,3,"Peacock, Master. Alfred Edward",male,0.75,1,1,SOTON/O.Q. 3101315,13.775,,S 284 | 1174,1,3,"Fleming, Miss. Honora",female,,0,0,364859,7.75,,Q 285 | 1175,1,3,"Touma, Miss. Maria Youssef",female,9,1,1,2650,15.2458,,C 286 | 1176,1,3,"Rosblom, Miss. Salli Helena",female,2,1,1,370129,20.2125,,S 287 | 1177,0,3,"Dennis, Mr. William",male,36,0,0,A/5 21175,7.25,,S 288 | 1178,0,3,"Franklin, Mr. Charles (Charles Fardon)",male,,0,0,SOTON/O.Q. 3101314,7.25,,S 289 | 1179,0,1,"Snyder, Mr. John Pillsbury",male,24,1,0,21228,82.2667,B45,S 290 | 1180,0,3,"Mardirosian, Mr. Sarkis",male,,0,0,2655,7.2292,F E46,C 291 | 1181,0,3,"Ford, Mr. Arthur",male,,0,0,A/5 1478,8.05,,S 292 | 1182,0,1,"Rheims, Mr. George Alexander Lucien",male,,0,0,PC 17607,39.6,,S 293 | 1183,1,3,"Daly, Miss. Margaret Marcella Maggie""""",female,30,0,0,382650,6.95,,Q 294 | 1184,0,3,"Nasr, Mr. Mustafa",male,,0,0,2652,7.2292,,C 295 | 1185,0,1,"Dodge, Dr. Washington",male,53,1,1,33638,81.8583,A34,S 296 | 1186,0,3,"Wittevrongel, Mr. Camille",male,36,0,0,345771,9.5,,S 297 | 1187,0,3,"Angheloff, Mr. Minko",male,26,0,0,349202,7.8958,,S 298 | 1188,1,2,"Laroche, Miss. Louise",female,1,1,2,SC/Paris 2123,41.5792,,C 299 | 1189,0,3,"Samaan, Mr. Hanna",male,,2,0,2662,21.6792,,C 300 | 1190,0,1,"Loring, Mr. Joseph Holland",male,30,0,0,113801,45.5,,S 301 | 1191,0,3,"Johansson, Mr. Nils",male,29,0,0,347467,7.8542,,S 302 | 1192,0,3,"Olsson, Mr. Oscar Wilhelm",male,32,0,0,347079,7.775,,S 303 | 1193,0,2,"Malachard, Mr. Noel",male,,0,0,237735,15.0458,D,C 304 | 1194,0,2,"Phillips, Mr. Escott Robert",male,43,0,1,S.O./P.P. 2,21,,S 305 | 1195,0,3,"Pokrnic, Mr. Tome",male,24,0,0,315092,8.6625,,S 306 | 1196,1,3,"McCarthy, Miss. Catherine Katie""""",female,,0,0,383123,7.75,,Q 307 | 1197,1,1,"Crosby, Mrs. Edward Gifford (Catherine Elizabeth Halstead)",female,64,1,1,112901,26.55,B26,S 308 | 1198,0,1,"Allison, Mr. Hudson Joshua Creighton",male,30,1,2,113781,151.55,C22 C26,S 309 | 1199,0,3,"Aks, Master. Philip Frank",male,0.83,0,1,392091,9.35,,S 310 | 1200,0,1,"Hays, Mr. Charles Melville",male,55,1,1,12749,93.5,B69,S 311 | 1201,1,3,"Hansen, Mrs. Claus Peter (Jennie L Howard)",female,45,1,0,350026,14.1083,,S 312 | 1202,0,3,"Cacic, Mr. Jego Grga",male,18,0,0,315091,8.6625,,S 313 | 1203,0,3,"Vartanian, Mr. David",male,22,0,0,2658,7.225,,C 314 | 1204,0,3,"Sadowitz, Mr. Harry",male,,0,0,LP 1588,7.575,,S 315 | 1205,1,3,"Carr, Miss. Jeannie",female,37,0,0,368364,7.75,,Q 316 | 1206,1,1,"White, Mrs. John Stuart (Ella Holmes)",female,55,0,0,PC 17760,135.6333,C32,C 317 | 1207,1,3,"Hagardon, Miss. Kate",female,17,0,0,AQ/3. 30631,7.7333,,Q 318 | 1208,0,1,"Spencer, Mr. William Augustus",male,57,1,0,PC 17569,146.5208,B78,C 319 | 1209,0,2,"Rogers, Mr. Reginald Harry",male,19,0,0,28004,10.5,,S 320 | 1210,0,3,"Jonsson, Mr. Nils Hilding",male,27,0,0,350408,7.8542,,S 321 | 1211,0,2,"Jefferys, Mr. Ernest Wilfred",male,22,2,0,C.A. 31029,31.5,,S 322 | 1212,0,3,"Andersson, Mr. Johan Samuel",male,26,0,0,347075,7.775,,S 323 | 1213,0,3,"Krekorian, Mr. Neshan",male,25,0,0,2654,7.2292,F E57,C 324 | 1214,0,2,"Nesson, Mr. Israel",male,26,0,0,244368,13,F2,S 325 | 1215,0,1,"Rowe, Mr. Alfred G",male,33,0,0,113790,26.55,,S 326 | 1216,1,1,"Kreuchen, Miss. Emilie",female,39,0,0,24160,211.3375,,S 327 | 1217,0,3,"Assam, Mr. Ali",male,23,0,0,SOTON/O.Q. 3101309,7.05,,S 328 | 1218,1,2,"Becker, Miss. Ruth Elizabeth",female,12,2,1,230136,39,F4,S 329 | 1219,0,1,"Rosenshine, Mr. George (Mr George Thorne"")""",male,46,0,0,PC 17585,79.2,,C 330 | 1220,0,2,"Clarke, Mr. Charles Valentine",male,29,1,0,2003,26,,S 331 | 1221,0,2,"Enander, Mr. Ingvar",male,21,0,0,236854,13,,S 332 | 1222,1,2,"Davies, Mrs. John Morgan (Elizabeth Agnes Mary White) ",female,48,0,2,C.A. 33112,36.75,,S 333 | 1223,0,1,"Dulles, Mr. William Crothers",male,39,0,0,PC 17580,29.7,A18,C 334 | 1224,0,3,"Thomas, Mr. Tannous",male,,0,0,2684,7.225,,C 335 | 1225,1,3,"Nakid, Mrs. Said (Waika Mary"" Mowad)""",female,19,1,1,2653,15.7417,,C 336 | 1226,0,3,"Cor, Mr. Ivan",male,27,0,0,349229,7.8958,,S 337 | 1227,0,1,"Maguire, Mr. John Edward",male,30,0,0,110469,26,C106,S 338 | 1228,0,2,"de Brito, Mr. Jose Joaquim",male,32,0,0,244360,13,,S 339 | 1229,0,3,"Elias, Mr. Joseph",male,39,0,2,2675,7.2292,,C 340 | 1230,0,2,"Denbury, Mr. Herbert",male,25,0,0,C.A. 31029,31.5,,S 341 | 1231,0,3,"Betros, Master. Seman",male,,0,0,2622,7.2292,,C 342 | 1232,0,2,"Fillbrook, Mr. Joseph Charles",male,18,0,0,C.A. 15185,10.5,,S 343 | 1233,0,3,"Lundstrom, Mr. Thure Edvin",male,32,0,0,350403,7.5792,,S 344 | 1234,0,3,"Sage, Mr. John George",male,,1,9,CA. 2343,69.55,,S 345 | 1235,1,1,"Cardeza, Mrs. James Warburton Martinez (Charlotte Wardle Drake)",female,58,0,1,PC 17755,512.3292,B51 B53 B55,C 346 | 1236,0,3,"van Billiard, Master. James William",male,,1,1,A/5. 851,14.5,,S 347 | 1237,1,3,"Abelseth, Miss. Karen Marie",female,16,0,0,348125,7.65,,S 348 | 1238,0,2,"Botsford, Mr. William Hull",male,26,0,0,237670,13,,S 349 | 1239,1,3,"Whabee, Mrs. George Joseph (Shawneene Abi-Saab)",female,38,0,0,2688,7.2292,,C 350 | 1240,0,2,"Giles, Mr. Ralph",male,24,0,0,248726,13.5,,S 351 | 1241,1,2,"Walcroft, Miss. Nellie",female,31,0,0,F.C.C. 13528,21,,S 352 | 1242,1,1,"Greenfield, Mrs. Leo David (Blanche Strouse)",female,45,0,1,PC 17759,63.3583,D10 D12,C 353 | 1243,0,2,"Stokes, Mr. Philip Joseph",male,25,0,0,F.C.C. 13540,10.5,,S 354 | 1244,0,2,"Dibden, Mr. William",male,18,0,0,S.O.C. 14879,73.5,,S 355 | 1245,0,2,"Herman, Mr. Samuel",male,49,1,2,220845,65,,S 356 | 1246,1,3,"Dean, Miss. Elizabeth Gladys Millvina""""",female,0.17,1,2,C.A. 2315,20.575,,S 357 | 1247,0,1,"Julian, Mr. Henry Forbes",male,50,0,0,113044,26,E60,S 358 | 1248,1,1,"Brown, Mrs. John Murray (Caroline Lane Lamson)",female,59,2,0,11769,51.4792,C101,S 359 | 1249,0,3,"Lockyer, Mr. Edward",male,,0,0,1222,7.8792,,S 360 | 1250,0,3,"O'Keefe, Mr. Patrick",male,,0,0,368402,7.75,,Q 361 | 1251,1,3,"Lindell, Mrs. Edvard Bengtsson (Elin Gerda Persson)",female,30,1,0,349910,15.55,,S 362 | 1252,0,3,"Sage, Master. William Henry",male,14.5,8,2,CA. 2343,69.55,,S 363 | 1253,1,2,"Mallet, Mrs. Albert (Antoinette Magnin)",female,24,1,1,S.C./PARIS 2079,37.0042,,C 364 | 1254,1,2,"Ware, Mrs. John James (Florence Louise Long)",female,31,0,0,CA 31352,21,,S 365 | 1255,0,3,"Strilic, Mr. Ivan",male,27,0,0,315083,8.6625,,S 366 | 1256,1,1,"Harder, Mrs. George Achilles (Dorothy Annan)",female,25,1,0,11765,55.4417,E50,C 367 | 1257,1,3,"Sage, Mrs. John (Annie Bullen)",female,,1,9,CA. 2343,69.55,,S 368 | 1258,0,3,"Caram, Mr. Joseph",male,,1,0,2689,14.4583,,C 369 | 1259,1,3,"Riihivouri, Miss. Susanna Juhantytar Sanni""""",female,22,0,0,3101295,39.6875,,S 370 | 1260,1,1,"Gibson, Mrs. Leonard (Pauline C Boeson)",female,45,0,1,112378,59.4,,C 371 | 1261,0,2,"Pallas y Castello, Mr. Emilio",male,29,0,0,SC/PARIS 2147,13.8583,,C 372 | 1262,0,2,"Giles, Mr. Edgar",male,21,1,0,28133,11.5,,S 373 | 1263,1,1,"Wilson, Miss. Helen Alice",female,31,0,0,16966,134.5,E39 E41,C 374 | 1264,0,1,"Ismay, Mr. Joseph Bruce",male,49,0,0,112058,0,B52 B54 B56,S 375 | 1265,0,2,"Harbeck, Mr. William H",male,44,0,0,248746,13,,S 376 | 1266,1,1,"Dodge, Mrs. Washington (Ruth Vidaver)",female,54,1,1,33638,81.8583,A34,S 377 | 1267,1,1,"Bowen, Miss. Grace Scott",female,45,0,0,PC 17608,262.375,,C 378 | 1268,1,3,"Kink, Miss. Maria",female,22,2,0,315152,8.6625,,S 379 | 1269,0,2,"Cotterill, Mr. Henry Harry""""",male,21,0,0,29107,11.5,,S 380 | 1270,0,1,"Hipkins, Mr. William Edward",male,55,0,0,680,50,C39,S 381 | 1271,0,3,"Asplund, Master. Carl Edgar",male,5,4,2,347077,31.3875,,S 382 | 1272,0,3,"O'Connor, Mr. Patrick",male,,0,0,366713,7.75,,Q 383 | 1273,0,3,"Foley, Mr. Joseph",male,26,0,0,330910,7.8792,,Q 384 | 1274,1,3,"Risien, Mrs. Samuel (Emma)",female,,0,0,364498,14.5,,S 385 | 1275,1,3,"McNamee, Mrs. Neal (Eileen O'Leary)",female,19,1,0,376566,16.1,,S 386 | 1276,0,2,"Wheeler, Mr. Edwin Frederick""""",male,,0,0,SC/PARIS 2159,12.875,,S 387 | 1277,1,2,"Herman, Miss. Kate",female,24,1,2,220845,65,,S 388 | 1278,0,3,"Aronsson, Mr. Ernst Axel Algot",male,24,0,0,349911,7.775,,S 389 | 1279,0,2,"Ashby, Mr. John",male,57,0,0,244346,13,,S 390 | 1280,0,3,"Canavan, Mr. Patrick",male,21,0,0,364858,7.75,,Q 391 | 1281,0,3,"Palsson, Master. Paul Folke",male,6,3,1,349909,21.075,,S 392 | 1282,0,1,"Payne, Mr. Vivian Ponsonby",male,23,0,0,12749,93.5,B24,S 393 | 1283,1,1,"Lines, Mrs. Ernest H (Elizabeth Lindsey James)",female,51,0,1,PC 17592,39.4,D28,S 394 | 1284,0,3,"Abbott, Master. Eugene Joseph",male,13,0,2,C.A. 2673,20.25,,S 395 | 1285,0,2,"Gilbert, Mr. William",male,47,0,0,C.A. 30769,10.5,,S 396 | 1286,0,3,"Kink-Heilmann, Mr. Anton",male,29,3,1,315153,22.025,,S 397 | 1287,1,1,"Smith, Mrs. Lucien Philip (Mary Eloise Hughes)",female,18,1,0,13695,60,C31,S 398 | 1288,0,3,"Colbert, Mr. Patrick",male,24,0,0,371109,7.25,,Q 399 | 1289,1,1,"Frolicher-Stehli, Mrs. Maxmillian (Margaretha Emerentia Stehli)",female,48,1,1,13567,79.2,B41,C 400 | 1290,0,3,"Larsson-Rondberg, Mr. Edvard A",male,22,0,0,347065,7.775,,S 401 | 1291,0,3,"Conlon, Mr. Thomas Henry",male,31,0,0,21332,7.7333,,Q 402 | 1292,1,1,"Bonnell, Miss. Caroline",female,30,0,0,36928,164.8667,C7,S 403 | 1293,0,2,"Gale, Mr. Harry",male,38,1,0,28664,21,,S 404 | 1294,1,1,"Gibson, Miss. Dorothy Winifred",female,22,0,1,112378,59.4,,C 405 | 1295,0,1,"Carrau, Mr. Jose Pedro",male,17,0,0,113059,47.1,,S 406 | 1296,0,1,"Frauenthal, Mr. Isaac Gerald",male,43,1,0,17765,27.7208,D40,C 407 | 1297,0,2,"Nourney, Mr. Alfred (Baron von Drachstedt"")""",male,20,0,0,SC/PARIS 2166,13.8625,D38,C 408 | 1298,0,2,"Ware, Mr. William Jeffery",male,23,1,0,28666,10.5,,S 409 | 1299,0,1,"Widener, Mr. George Dunton",male,50,1,1,113503,211.5,C80,C 410 | 1300,1,3,"Riordan, Miss. Johanna Hannah""""",female,,0,0,334915,7.7208,,Q 411 | 1301,1,3,"Peacock, Miss. Treasteall",female,3,1,1,SOTON/O.Q. 3101315,13.775,,S 412 | 1302,1,3,"Naughton, Miss. Hannah",female,,0,0,365237,7.75,,Q 413 | 1303,1,1,"Minahan, Mrs. William Edward (Lillian E Thorpe)",female,37,1,0,19928,90,C78,Q 414 | 1304,1,3,"Henriksson, Miss. Jenny Lovisa",female,28,0,0,347086,7.775,,S 415 | 1305,0,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.05,,S 416 | 1306,1,1,"Oliva y Ocana, Dona. Fermina",female,39,0,0,PC 17758,108.9,C105,C 417 | 1307,0,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.25,,S 418 | 1308,0,3,"Ware, Mr. Frederick",male,,0,0,359309,8.05,,S 419 | 1309,0,3,"Peter, Master. Michael J",male,,1,1,2668,22.3583,,C 420 | -------------------------------------------------------------------------------- /Intro/Notebooks/100_Numpy_exercises_with_solutions.md: -------------------------------------------------------------------------------- 1 | # 100 numpy exercises 2 | 3 | Original source of this notebook is [here](https://github.com/rougier/numpy-100). Credits to Nicolas P. Rougier. Also video with live coding solutions is [here](https://www.youtube.com/watch?v=PM504XhEVCU) 4 | 5 | --- 6 | 7 | This is a collection of exercises that have been collected in the numpy mailing list, on stack overflow 8 | and in the numpy documentation. The goal of this collection is to offer a quick reference for both old 9 | and new users but also to provide a set of exercises for those who teach. 10 | 11 | 12 | If you find an error or think you've a better way to solve some of them, feel 13 | free to open an issue at . 14 | File automatically generated. See the documentation to update questions/answers/hints programmatically. 15 | 16 | #### 1. Import the numpy package under the name `np` (★☆☆) 17 | 18 | 19 | ```python 20 | import numpy as np 21 | ``` 22 | #### 2. Print the numpy version and the configuration (★☆☆) 23 | 24 | 25 | ```python 26 | print(np.__version__) 27 | np.show_config() 28 | ``` 29 | #### 3. Create a null vector of size 10 (★☆☆) 30 | 31 | 32 | ```python 33 | Z = np.zeros(10) 34 | print(Z) 35 | ``` 36 | #### 4. How to find the memory size of any array (★☆☆) 37 | 38 | 39 | ```python 40 | Z = np.zeros((10,10)) 41 | print("%d bytes" % (Z.size * Z.itemsize)) 42 | 43 | # Simpler alternative 44 | print("%d bytes" % Z.nbytes) 45 | ``` 46 | #### 5. How to get the documentation of the numpy add function from the command line? (★☆☆) 47 | 48 | 49 | ```python 50 | %run `python -c "import numpy; numpy.info(numpy.add)"` 51 | ``` 52 | #### 6. Create a null vector of size 10 but the fifth value which is 1 (★☆☆) 53 | 54 | 55 | ```python 56 | Z = np.zeros(10) 57 | Z[4] = 1 58 | print(Z) 59 | ``` 60 | #### 7. Create a vector with values ranging from 10 to 49 (★☆☆) 61 | 62 | 63 | ```python 64 | Z = np.arange(10,50) 65 | print(Z) 66 | ``` 67 | #### 8. Reverse a vector (first element becomes last) (★☆☆) 68 | 69 | 70 | ```python 71 | Z = np.arange(50) 72 | Z = Z[::-1] 73 | print(Z) 74 | ``` 75 | #### 9. Create a 3x3 matrix with values ranging from 0 to 8 (★☆☆) 76 | 77 | 78 | ```python 79 | Z = np.arange(9).reshape(3, 3) 80 | print(Z) 81 | ``` 82 | #### 10. Find indices of non-zero elements from [1,2,0,0,4,0] (★☆☆) 83 | 84 | 85 | ```python 86 | nz = np.nonzero([1,2,0,0,4,0]) 87 | print(nz) 88 | ``` 89 | #### 11. Create a 3x3 identity matrix (★☆☆) 90 | 91 | 92 | ```python 93 | Z = np.eye(3) 94 | print(Z) 95 | ``` 96 | #### 12. Create a 3x3x3 array with random values (★☆☆) 97 | 98 | 99 | ```python 100 | Z = np.random.random((3,3,3)) 101 | print(Z) 102 | ``` 103 | #### 13. Create a 10x10 array with random values and find the minimum and maximum values (★☆☆) 104 | 105 | 106 | ```python 107 | Z = np.random.random((10,10)) 108 | Zmin, Zmax = Z.min(), Z.max() 109 | print(Zmin, Zmax) 110 | ``` 111 | #### 14. Create a random vector of size 30 and find the mean value (★☆☆) 112 | 113 | 114 | ```python 115 | Z = np.random.random(30) 116 | m = Z.mean() 117 | print(m) 118 | ``` 119 | #### 15. Create a 2d array with 1 on the border and 0 inside (★☆☆) 120 | 121 | 122 | ```python 123 | Z = np.ones((10,10)) 124 | Z[1:-1,1:-1] = 0 125 | print(Z) 126 | ``` 127 | #### 16. How to add a border (filled with 0's) around an existing array? (★☆☆) 128 | 129 | 130 | ```python 131 | Z = np.ones((5,5)) 132 | Z = np.pad(Z, pad_width=1, mode='constant', constant_values=0) 133 | print(Z) 134 | 135 | # Using fancy indexing 136 | Z[:, [0, -1]] = 0 137 | Z[[0, -1], :] = 0 138 | print(Z) 139 | ``` 140 | #### 17. What is the result of the following expression? (★☆☆) 141 | ```python 142 | 0 * np.nan 143 | np.nan == np.nan 144 | np.inf > np.nan 145 | np.nan - np.nan 146 | np.nan in set([np.nan]) 147 | 0.3 == 3 * 0.1 148 | ``` 149 | 150 | 151 | ```python 152 | print(0 * np.nan) 153 | print(np.nan == np.nan) 154 | print(np.inf > np.nan) 155 | print(np.nan - np.nan) 156 | print(np.nan in set([np.nan])) 157 | print(0.3 == 3 * 0.1) 158 | ``` 159 | #### 18. Create a 5x5 matrix with values 1,2,3,4 just below the diagonal (★☆☆) 160 | 161 | 162 | ```python 163 | Z = np.diag(1+np.arange(4),k=-1) 164 | print(Z) 165 | ``` 166 | #### 19. Create a 8x8 matrix and fill it with a checkerboard pattern (★☆☆) 167 | 168 | 169 | ```python 170 | Z = np.zeros((8,8),dtype=int) 171 | Z[1::2,::2] = 1 172 | Z[::2,1::2] = 1 173 | print(Z) 174 | ``` 175 | #### 20. Consider a (6,7,8) shape array, what is the index (x,y,z) of the 100th element? (★☆☆) 176 | 177 | 178 | ```python 179 | print(np.unravel_index(99,(6,7,8))) 180 | ``` 181 | #### 21. Create a checkerboard 8x8 matrix using the tile function (★☆☆) 182 | 183 | 184 | ```python 185 | Z = np.tile( np.array([[0,1],[1,0]]), (4,4)) 186 | print(Z) 187 | ``` 188 | #### 22. Normalize a 5x5 random matrix (★☆☆) 189 | 190 | 191 | ```python 192 | Z = np.random.random((5,5)) 193 | Z = (Z - np.mean (Z)) / (np.std (Z)) 194 | print(Z) 195 | ``` 196 | #### 23. Create a custom dtype that describes a color as four unsigned bytes (RGBA) (★☆☆) 197 | 198 | 199 | ```python 200 | color = np.dtype([("r", np.ubyte), 201 | ("g", np.ubyte), 202 | ("b", np.ubyte), 203 | ("a", np.ubyte)]) 204 | ``` 205 | #### 24. Multiply a 5x3 matrix by a 3x2 matrix (real matrix product) (★☆☆) 206 | 207 | 208 | ```python 209 | Z = np.matmul(np.ones((5, 3)), np.ones((3, 2))) 210 | print(Z) 211 | 212 | # Alternative solution, in Python 3.5 and above 213 | Z = np.ones((5,3)) @ np.ones((3,2)) 214 | print(Z) 215 | ``` 216 | #### 25. Given a 1D array, negate all elements which are between 3 and 8, in place. (★☆☆) 217 | 218 | 219 | ```python 220 | # Author: Evgeni Burovski 221 | 222 | Z = np.arange(11) 223 | Z[(3 < Z) & (Z < 8)] *= -1 224 | print(Z) 225 | ``` 226 | #### 26. What is the output of the following script? (★☆☆) 227 | ```python 228 | # Author: Jake VanderPlas 229 | 230 | print(sum(range(5),-1)) 231 | from numpy import * 232 | print(sum(range(5),-1)) 233 | ``` 234 | 235 | 236 | ```python 237 | # Author: Jake VanderPlas 238 | 239 | print(sum(range(5),-1)) 240 | from numpy import * 241 | print(sum(range(5),-1)) 242 | ``` 243 | #### 27. Consider an integer vector Z, which of these expressions are legal? (★☆☆) 244 | ```python 245 | Z**Z 246 | 2 << Z >> 2 247 | Z <- Z 248 | 1j*Z 249 | Z/1/1 250 | ZZ 251 | ``` 252 | 253 | 254 | ```python 255 | Z**Z 256 | 2 << Z >> 2 257 | Z <- Z 258 | 1j*Z 259 | Z/1/1 260 | ZZ 261 | ``` 262 | #### 28. What are the result of the following expressions? (★☆☆) 263 | ```python 264 | np.array(0) / np.array(0) 265 | np.array(0) // np.array(0) 266 | np.array([np.nan]).astype(int).astype(float) 267 | ``` 268 | 269 | 270 | ```python 271 | print(np.array(0) / np.array(0)) 272 | print(np.array(0) // np.array(0)) 273 | print(np.array([np.nan]).astype(int).astype(float)) 274 | ``` 275 | #### 29. How to round away from zero a float array ? (★☆☆) 276 | 277 | 278 | ```python 279 | # Author: Charles R Harris 280 | 281 | Z = np.random.uniform(-10,+10,10) 282 | print(np.copysign(np.ceil(np.abs(Z)), Z)) 283 | 284 | # More readable but less efficient 285 | print(np.where(Z>0, np.ceil(Z), np.floor(Z))) 286 | ``` 287 | #### 30. How to find common values between two arrays? (★☆☆) 288 | 289 | 290 | ```python 291 | Z1 = np.random.randint(0,10,10) 292 | Z2 = np.random.randint(0,10,10) 293 | print(np.intersect1d(Z1,Z2)) 294 | ``` 295 | #### 31. How to ignore all numpy warnings (not recommended)? (★☆☆) 296 | 297 | 298 | ```python 299 | # Suicide mode on 300 | defaults = np.seterr(all="ignore") 301 | Z = np.ones(1) / 0 302 | 303 | # Back to sanity 304 | _ = np.seterr(**defaults) 305 | 306 | # Equivalently with a context manager 307 | with np.errstate(all="ignore"): 308 | np.arange(3) / 0 309 | ``` 310 | #### 32. Is the following expressions true? (★☆☆) 311 | ```python 312 | np.sqrt(-1) == np.emath.sqrt(-1) 313 | ``` 314 | 315 | 316 | ```python 317 | np.sqrt(-1) == np.emath.sqrt(-1) 318 | ``` 319 | #### 33. How to get the dates of yesterday, today and tomorrow? (★☆☆) 320 | 321 | 322 | ```python 323 | yesterday = np.datetime64('today') - np.timedelta64(1) 324 | today = np.datetime64('today') 325 | tomorrow = np.datetime64('today') + np.timedelta64(1) 326 | ``` 327 | #### 34. How to get all the dates corresponding to the month of July 2016? (★★☆) 328 | 329 | 330 | ```python 331 | Z = np.arange('2016-07', '2016-08', dtype='datetime64[D]') 332 | print(Z) 333 | ``` 334 | #### 35. How to compute ((A+B)*(-A/2)) in place (without copy)? (★★☆) 335 | 336 | 337 | ```python 338 | A = np.ones(3)*1 339 | B = np.ones(3)*2 340 | np.add(A,B,out=B) 341 | np.divide(A,2,out=A) 342 | np.negative(A,out=A) 343 | np.multiply(A,B,out=A) 344 | ``` 345 | #### 36. Extract the integer part of a random array of positive numbers using 4 different methods (★★☆) 346 | 347 | 348 | ```python 349 | Z = np.random.uniform(0,10,10) 350 | 351 | print(Z - Z%1) 352 | print(Z // 1) 353 | print(np.floor(Z)) 354 | print(Z.astype(int)) 355 | print(np.trunc(Z)) 356 | ``` 357 | #### 37. Create a 5x5 matrix with row values ranging from 0 to 4 (★★☆) 358 | 359 | 360 | ```python 361 | Z = np.zeros((5,5)) 362 | Z += np.arange(5) 363 | print(Z) 364 | 365 | # without broadcasting 366 | Z = np.tile(np.arange(0, 5), (5,1)) 367 | print(Z) 368 | ``` 369 | #### 38. Consider a generator function that generates 10 integers and use it to build an array (★☆☆) 370 | 371 | 372 | ```python 373 | def generate(): 374 | for x in range(10): 375 | yield x 376 | Z = np.fromiter(generate(),dtype=float,count=-1) 377 | print(Z) 378 | ``` 379 | #### 39. Create a vector of size 10 with values ranging from 0 to 1, both excluded (★★☆) 380 | 381 | 382 | ```python 383 | Z = np.linspace(0,1,11,endpoint=False)[1:] 384 | print(Z) 385 | ``` 386 | #### 40. Create a random vector of size 10 and sort it (★★☆) 387 | 388 | 389 | ```python 390 | Z = np.random.random(10) 391 | Z.sort() 392 | print(Z) 393 | ``` 394 | #### 41. How to sum a small array faster than np.sum? (★★☆) 395 | 396 | 397 | ```python 398 | # Author: Evgeni Burovski 399 | 400 | Z = np.arange(10) 401 | np.add.reduce(Z) 402 | ``` 403 | #### 42. Consider two random arrays A and B, check if they are equal (★★☆) 404 | 405 | 406 | ```python 407 | A = np.random.randint(0,2,5) 408 | B = np.random.randint(0,2,5) 409 | 410 | # Assuming identical shape of the arrays and a tolerance for the comparison of values 411 | equal = np.allclose(A,B) 412 | print(equal) 413 | 414 | # Checking both the shape and the element values, no tolerance (values have to be exactly equal) 415 | equal = np.array_equal(A,B) 416 | print(equal) 417 | ``` 418 | #### 43. Make an array immutable (read-only) (★★☆) 419 | 420 | 421 | ```python 422 | Z = np.zeros(10) 423 | Z.flags.writeable = False 424 | Z[0] = 1 425 | ``` 426 | #### 44. Consider a random 10x2 matrix representing cartesian coordinates, convert them to polar coordinates (★★☆) 427 | 428 | 429 | ```python 430 | Z = np.random.random((10,2)) 431 | X,Y = Z[:,0], Z[:,1] 432 | R = np.sqrt(X**2+Y**2) 433 | T = np.arctan2(Y,X) 434 | print(R) 435 | print(T) 436 | ``` 437 | #### 45. Create random vector of size 10 and replace the maximum value by 0 (★★☆) 438 | 439 | 440 | ```python 441 | Z = np.random.random(10) 442 | Z[Z.argmax()] = 0 443 | print(Z) 444 | ``` 445 | #### 46. Create a structured array with `x` and `y` coordinates covering the [0,1]x[0,1] area (★★☆) 446 | 447 | 448 | ```python 449 | Z = np.zeros((5,5), [('x',float),('y',float)]) 450 | Z['x'], Z['y'] = np.meshgrid(np.linspace(0,1,5), 451 | np.linspace(0,1,5)) 452 | print(Z) 453 | ``` 454 | #### 47. Given two arrays, X and Y, construct the Cauchy matrix C (Cij =1/(xi - yj)) (★★☆) 455 | 456 | 457 | ```python 458 | # Author: Evgeni Burovski 459 | 460 | X = np.arange(8) 461 | Y = X + 0.5 462 | C = 1.0 / np.subtract.outer(X, Y) 463 | print(np.linalg.det(C)) 464 | ``` 465 | #### 48. Print the minimum and maximum representable values for each numpy scalar type (★★☆) 466 | 467 | 468 | ```python 469 | for dtype in [np.int8, np.int32, np.int64]: 470 | print(np.iinfo(dtype).min) 471 | print(np.iinfo(dtype).max) 472 | for dtype in [np.float32, np.float64]: 473 | print(np.finfo(dtype).min) 474 | print(np.finfo(dtype).max) 475 | print(np.finfo(dtype).eps) 476 | ``` 477 | #### 49. How to print all the values of an array? (★★☆) 478 | 479 | 480 | ```python 481 | np.set_printoptions(threshold=float("inf")) 482 | Z = np.zeros((40,40)) 483 | print(Z) 484 | ``` 485 | #### 50. How to find the closest value (to a given scalar) in a vector? (★★☆) 486 | 487 | 488 | ```python 489 | Z = np.arange(100) 490 | v = np.random.uniform(0,100) 491 | index = (np.abs(Z-v)).argmin() 492 | print(Z[index]) 493 | ``` 494 | #### 51. Create a structured array representing a position (x,y) and a color (r,g,b) (★★☆) 495 | 496 | 497 | ```python 498 | Z = np.zeros(10, [ ('position', [ ('x', float, 1), 499 | ('y', float, 1)]), 500 | ('color', [ ('r', float, 1), 501 | ('g', float, 1), 502 | ('b', float, 1)])]) 503 | print(Z) 504 | ``` 505 | #### 52. Consider a random vector with shape (100,2) representing coordinates, find point by point distances (★★☆) 506 | 507 | 508 | ```python 509 | Z = np.random.random((10,2)) 510 | X,Y = np.atleast_2d(Z[:,0], Z[:,1]) 511 | D = np.sqrt( (X-X.T)**2 + (Y-Y.T)**2) 512 | print(D) 513 | 514 | # Much faster with scipy 515 | import scipy 516 | # Thanks Gavin Heverly-Coulson (#issue 1) 517 | import scipy.spatial 518 | 519 | Z = np.random.random((10,2)) 520 | D = scipy.spatial.distance.cdist(Z,Z) 521 | print(D) 522 | ``` 523 | #### 53. How to convert a float (32 bits) array into an integer (32 bits) array in place? 524 | 525 | 526 | ```python 527 | # Thanks Vikas (https://stackoverflow.com/a/10622758/5989906) 528 | # & unutbu (https://stackoverflow.com/a/4396247/5989906) 529 | Z = (np.random.rand(10)*100).astype(np.float32) 530 | Y = Z.view(np.int32) 531 | Y[:] = Z 532 | print(Y) 533 | ``` 534 | #### 54. How to read the following file? (★★☆) 535 | ``` 536 | 1, 2, 3, 4, 5 537 | 6, , , 7, 8 538 | , , 9,10,11 539 | ``` 540 | 541 | 542 | ```python 543 | from io import StringIO 544 | 545 | # Fake file 546 | s = StringIO('''1, 2, 3, 4, 5 547 | 548 | 6, , , 7, 8 549 | 550 | , , 9,10,11 551 | ''') 552 | Z = np.genfromtxt(s, delimiter=",", dtype=np.int) 553 | print(Z) 554 | ``` 555 | #### 55. What is the equivalent of enumerate for numpy arrays? (★★☆) 556 | 557 | 558 | ```python 559 | Z = np.arange(9).reshape(3,3) 560 | for index, value in np.ndenumerate(Z): 561 | print(index, value) 562 | for index in np.ndindex(Z.shape): 563 | print(index, Z[index]) 564 | ``` 565 | #### 56. Generate a generic 2D Gaussian-like array (★★☆) 566 | 567 | 568 | ```python 569 | X, Y = np.meshgrid(np.linspace(-1,1,10), np.linspace(-1,1,10)) 570 | D = np.sqrt(X*X+Y*Y) 571 | sigma, mu = 1.0, 0.0 572 | G = np.exp(-( (D-mu)**2 / ( 2.0 * sigma**2 ) ) ) 573 | print(G) 574 | ``` 575 | #### 57. How to randomly place p elements in a 2D array? (★★☆) 576 | 577 | 578 | ```python 579 | # Author: Divakar 580 | 581 | n = 10 582 | p = 3 583 | Z = np.zeros((n,n)) 584 | np.put(Z, np.random.choice(range(n*n), p, replace=False),1) 585 | print(Z) 586 | ``` 587 | #### 58. Subtract the mean of each row of a matrix (★★☆) 588 | 589 | 590 | ```python 591 | # Author: Warren Weckesser 592 | 593 | X = np.random.rand(5, 10) 594 | 595 | # Recent versions of numpy 596 | Y = X - X.mean(axis=1, keepdims=True) 597 | 598 | # Older versions of numpy 599 | Y = X - X.mean(axis=1).reshape(-1, 1) 600 | 601 | print(Y) 602 | ``` 603 | #### 59. How to sort an array by the nth column? (★★☆) 604 | 605 | 606 | ```python 607 | # Author: Steve Tjoa 608 | 609 | Z = np.random.randint(0,10,(3,3)) 610 | print(Z) 611 | print(Z[Z[:,1].argsort()]) 612 | ``` 613 | #### 60. How to tell if a given 2D array has null columns? (★★☆) 614 | 615 | 616 | ```python 617 | # Author: Warren Weckesser 618 | 619 | # null : 0 620 | Z = np.random.randint(0,3,(3,10)) 621 | print((~Z.any(axis=0)).any()) 622 | 623 | # null : np.nan 624 | Z=np.array([ 625 | [0,1,np.nan], 626 | [1,2,np.nan], 627 | [4,5,np.nan] 628 | ]) 629 | print(np.isnan(Z).all(axis=0)) 630 | ``` 631 | #### 61. Find the nearest value from a given value in an array (★★☆) 632 | 633 | 634 | ```python 635 | Z = np.random.uniform(0,1,10) 636 | z = 0.5 637 | m = Z.flat[np.abs(Z - z).argmin()] 638 | print(m) 639 | ``` 640 | #### 62. Considering two arrays with shape (1,3) and (3,1), how to compute their sum using an iterator? (★★☆) 641 | 642 | 643 | ```python 644 | A = np.arange(3).reshape(3,1) 645 | B = np.arange(3).reshape(1,3) 646 | it = np.nditer([A,B,None]) 647 | for x,y,z in it: z[...] = x + y 648 | print(it.operands[2]) 649 | ``` 650 | #### 63. Create an array class that has a name attribute (★★☆) 651 | 652 | 653 | ```python 654 | class NamedArray(np.ndarray): 655 | def __new__(cls, array, name="no name"): 656 | obj = np.asarray(array).view(cls) 657 | obj.name = name 658 | return obj 659 | def __array_finalize__(self, obj): 660 | if obj is None: return 661 | self.name = getattr(obj, 'name', "no name") 662 | 663 | Z = NamedArray(np.arange(10), "range_10") 664 | print (Z.name) 665 | ``` 666 | #### 64. Consider a given vector, how to add 1 to each element indexed by a second vector (be careful with repeated indices)? (★★★) 667 | 668 | 669 | ```python 670 | # Author: Brett Olsen 671 | 672 | Z = np.ones(10) 673 | I = np.random.randint(0,len(Z),20) 674 | Z += np.bincount(I, minlength=len(Z)) 675 | print(Z) 676 | 677 | # Another solution 678 | # Author: Bartosz Telenczuk 679 | np.add.at(Z, I, 1) 680 | print(Z) 681 | ``` 682 | #### 65. How to accumulate elements of a vector (X) to an array (F) based on an index list (I)? (★★★) 683 | 684 | 685 | ```python 686 | # Author: Alan G Isaac 687 | 688 | X = [1,2,3,4,5,6] 689 | I = [1,3,9,3,4,1] 690 | F = np.bincount(I,X) 691 | print(F) 692 | ``` 693 | #### 66. Considering a (w,h,3) image of (dtype=ubyte), compute the number of unique colors (★★☆) 694 | 695 | 696 | ```python 697 | # Author: Fisher Wang 698 | 699 | w, h = 256, 256 700 | I = np.random.randint(0, 4, (h, w, 3)).astype(np.ubyte) 701 | colors = np.unique(I.reshape(-1, 3), axis=0) 702 | n = len(colors) 703 | print(n) 704 | 705 | # Faster version 706 | # Author: Mark Setchell 707 | # https://stackoverflow.com/a/59671950/2836621 708 | 709 | w, h = 256, 256 710 | I = np.random.randint(0,4,(h,w,3), dtype=np.uint8) 711 | 712 | # View each pixel as a single 24-bit integer, rather than three 8-bit bytes 713 | I24 = np.dot(I.astype(np.uint32),[1,256,65536]) 714 | 715 | # Count unique colours 716 | n = len(np.unique(I24)) 717 | print(n) 718 | ``` 719 | #### 67. Considering a four dimensions array, how to get sum over the last two axis at once? (★★★) 720 | 721 | 722 | ```python 723 | A = np.random.randint(0,10,(3,4,3,4)) 724 | # solution by passing a tuple of axes (introduced in numpy 1.7.0) 725 | sum = A.sum(axis=(-2,-1)) 726 | print(sum) 727 | # solution by flattening the last two dimensions into one 728 | # (useful for functions that don't accept tuples for axis argument) 729 | sum = A.reshape(A.shape[:-2] + (-1,)).sum(axis=-1) 730 | print(sum) 731 | ``` 732 | #### 68. Considering a one-dimensional vector D, how to compute means of subsets of D using a vector S of same size describing subset indices? (★★★) 733 | 734 | 735 | ```python 736 | # Author: Jaime Fernández del Río 737 | 738 | D = np.random.uniform(0,1,100) 739 | S = np.random.randint(0,10,100) 740 | D_sums = np.bincount(S, weights=D) 741 | D_counts = np.bincount(S) 742 | D_means = D_sums / D_counts 743 | print(D_means) 744 | 745 | # Pandas solution as a reference due to more intuitive code 746 | import pandas as pd 747 | print(pd.Series(D).groupby(S).mean()) 748 | ``` 749 | #### 69. How to get the diagonal of a dot product? (★★★) 750 | 751 | 752 | ```python 753 | # Author: Mathieu Blondel 754 | 755 | A = np.random.uniform(0,1,(5,5)) 756 | B = np.random.uniform(0,1,(5,5)) 757 | 758 | # Slow version 759 | np.diag(np.dot(A, B)) 760 | 761 | # Fast version 762 | np.sum(A * B.T, axis=1) 763 | 764 | # Faster version 765 | np.einsum("ij,ji->i", A, B) 766 | ``` 767 | #### 70. Consider the vector [1, 2, 3, 4, 5], how to build a new vector with 3 consecutive zeros interleaved between each value? (★★★) 768 | 769 | 770 | ```python 771 | # Author: Warren Weckesser 772 | 773 | Z = np.array([1,2,3,4,5]) 774 | nz = 3 775 | Z0 = np.zeros(len(Z) + (len(Z)-1)*(nz)) 776 | Z0[::nz+1] = Z 777 | print(Z0) 778 | ``` 779 | #### 71. Consider an array of dimension (5,5,3), how to multiply it by an array with dimensions (5,5)? (★★★) 780 | 781 | 782 | ```python 783 | A = np.ones((5,5,3)) 784 | B = 2*np.ones((5,5)) 785 | print(A * B[:,:,None]) 786 | ``` 787 | #### 72. How to swap two rows of an array? (★★★) 788 | 789 | 790 | ```python 791 | # Author: Eelco Hoogendoorn 792 | 793 | A = np.arange(25).reshape(5,5) 794 | A[[0,1]] = A[[1,0]] 795 | print(A) 796 | ``` 797 | #### 73. Consider a set of 10 triplets describing 10 triangles (with shared vertices), find the set of unique line segments composing all the triangles (★★★) 798 | 799 | 800 | ```python 801 | # Author: Nicolas P. Rougier 802 | 803 | faces = np.random.randint(0,100,(10,3)) 804 | F = np.roll(faces.repeat(2,axis=1),-1,axis=1) 805 | F = F.reshape(len(F)*3,2) 806 | F = np.sort(F,axis=1) 807 | G = F.view( dtype=[('p0',F.dtype),('p1',F.dtype)] ) 808 | G = np.unique(G) 809 | print(G) 810 | ``` 811 | #### 74. Given a sorted array C that corresponds to a bincount, how to produce an array A such that np.bincount(A) == C? (★★★) 812 | 813 | 814 | ```python 815 | # Author: Jaime Fernández del Río 816 | 817 | C = np.bincount([1,1,2,3,4,4,6]) 818 | A = np.repeat(np.arange(len(C)), C) 819 | print(A) 820 | ``` 821 | #### 75. How to compute averages using a sliding window over an array? (★★★) 822 | 823 | 824 | ```python 825 | # Author: Jaime Fernández del Río 826 | 827 | def moving_average(a, n=3) : 828 | ret = np.cumsum(a, dtype=float) 829 | ret[n:] = ret[n:] - ret[:-n] 830 | return ret[n - 1:] / n 831 | Z = np.arange(20) 832 | print(moving_average(Z, n=3)) 833 | 834 | # Author: Jeff Luo (@Jeff1999) 835 | # make sure your NumPy >= 1.20.0 836 | 837 | from numpy.lib.stride_tricks import sliding_window_view 838 | 839 | Z = np.arange(20) 840 | print(sliding_window_view(Z, window_shape=3).mean(axis=-1)) 841 | ``` 842 | #### 76. Consider a one-dimensional array Z, build a two-dimensional array whose first row is (Z[0],Z[1],Z[2]) and each subsequent row is shifted by 1 (last row should be (Z[-3],Z[-2],Z[-1]) (★★★) 843 | 844 | 845 | ```python 846 | # Author: Joe Kington / Erik Rigtorp 847 | from numpy.lib import stride_tricks 848 | 849 | def rolling(a, window): 850 | shape = (a.size - window + 1, window) 851 | strides = (a.strides[0], a.strides[0]) 852 | return stride_tricks.as_strided(a, shape=shape, strides=strides) 853 | Z = rolling(np.arange(10), 3) 854 | print(Z) 855 | 856 | # Author: Jeff Luo (@Jeff1999) 857 | 858 | Z = np.arange(10) 859 | print(sliding_window_view(Z, window_shape=3)) 860 | ``` 861 | #### 77. How to negate a boolean, or to change the sign of a float inplace? (★★★) 862 | 863 | 864 | ```python 865 | # Author: Nathaniel J. Smith 866 | 867 | Z = np.random.randint(0,2,100) 868 | np.logical_not(Z, out=Z) 869 | 870 | Z = np.random.uniform(-1.0,1.0,100) 871 | np.negative(Z, out=Z) 872 | ``` 873 | #### 78. Consider 2 sets of points P0,P1 describing lines (2d) and a point p, how to compute distance from p to each line i (P0[i],P1[i])? (★★★) 874 | 875 | 876 | ```python 877 | P0 = np.random.uniform(-10,10,(10,2)) 878 | P1 = np.random.uniform(-10,10,(10,2)) 879 | p = np.random.uniform(-10,10,( 1,2)) 880 | 881 | def distance_faster(P0,P1,p): 882 | #Author: Hemanth Pasupuleti 883 | #Reference: https://mathworld.wolfram.com/Point-LineDistance2-Dimensional.html 884 | 885 | v = P1- P0 886 | v[:,[0,1]] = v[:,[1,0]] 887 | v[:,1]*=-1 888 | norm = np.linalg.norm(v,axis=1) 889 | r = P0 - p 890 | d = np.abs(np.einsum("ij,ij->i",r,v)) / norm 891 | 892 | return d 893 | 894 | print(distance_faster(P0, P1, p)) 895 | 896 | ##--------------- OR ---------------## 897 | def distance_slower(P0, P1, p): 898 | T = P1 - P0 899 | L = (T**2).sum(axis=1) 900 | U = -((P0[:,0]-p[...,0])*T[:,0] + (P0[:,1]-p[...,1])*T[:,1]) / L 901 | U = U.reshape(len(U),1) 902 | D = P0 + U*T - p 903 | return np.sqrt((D**2).sum(axis=1)) 904 | 905 | print(distance_slower(P0, P1, p)) 906 | ``` 907 | #### 79. Consider 2 sets of points P0,P1 describing lines (2d) and a set of points P, how to compute distance from each point j (P[j]) to each line i (P0[i],P1[i])? (★★★) 908 | 909 | 910 | ```python 911 | # Author: Italmassov Kuanysh 912 | 913 | # based on distance function from previous question 914 | P0 = np.random.uniform(-10, 10, (10,2)) 915 | P1 = np.random.uniform(-10,10,(10,2)) 916 | p = np.random.uniform(-10, 10, (10,2)) 917 | print(np.array([distance(P0,P1,p_i) for p_i in p])) 918 | 919 | # Author: Yang Wu (Broadcasting) 920 | def distance_points_to_lines(p: np.ndarray, p_1: np.ndarray, p_2: np.ndarray) -> np.ndarray: 921 | x_0, y_0 = p.T # Shape -> (n points, ) 922 | x_1, y_1 = p_1.T # Shape -> (n lines, ) 923 | x_2, y_2 = p_2.T # Shape -> (n lines, ) 924 | 925 | # Displacement vector coordinates from p_1 -> p_2 926 | dx = x_2 - x_1 # Shape -> (n lines, ) 927 | dy = y_2 - y_1 # Shape -> (n lines, ) 928 | 929 | # The 'cross product' term 930 | cross_term = x_2 * y_1 - y_2 * x_1 # Shape -> (n lines, ) 931 | 932 | # Broadcast x_0, y_0 (n points, 1) and dx, dy, cross_term (1, n lines) -> (n points, n lines) 933 | numerator = np.abs( 934 | dy[np.newaxis, :] * x_0[:, np.newaxis] 935 | - dx[np.newaxis, :] * y_0[:, np.newaxis] 936 | + cross_term[np.newaxis, :] 937 | ) 938 | denominator = np.sqrt(dx**2 + dy**2) # Shape -> (n lines, ) 939 | 940 | # Shape (n points, n lines) / (1, n_lines) -> (n points, n lines) 941 | return numerator / denominator[np.newaxis, :] 942 | 943 | distance_points_to_lines(p, P0, P1) 944 | ``` 945 | #### 80. Consider an arbitrary array, write a function that extracts a subpart with a fixed shape and centered on a given element (pad with a `fill` value when necessary) (★★★) 946 | 947 | 948 | ```python 949 | # Author: Nicolas Rougier 950 | 951 | Z = np.random.randint(0,10,(10,10)) 952 | shape = (5,5) 953 | fill = 0 954 | position = (1,1) 955 | 956 | R = np.ones(shape, dtype=Z.dtype)*fill 957 | P = np.array(list(position)).astype(int) 958 | Rs = np.array(list(R.shape)).astype(int) 959 | Zs = np.array(list(Z.shape)).astype(int) 960 | 961 | R_start = np.zeros((len(shape),)).astype(int) 962 | R_stop = np.array(list(shape)).astype(int) 963 | Z_start = (P-Rs//2) 964 | Z_stop = (P+Rs//2)+Rs%2 965 | 966 | R_start = (R_start - np.minimum(Z_start,0)).tolist() 967 | Z_start = (np.maximum(Z_start,0)).tolist() 968 | R_stop = np.maximum(R_start, (R_stop - np.maximum(Z_stop-Zs,0))).tolist() 969 | Z_stop = (np.minimum(Z_stop,Zs)).tolist() 970 | 971 | r = tuple([slice(start,stop) for start,stop in zip(R_start,R_stop)]) 972 | z = tuple([slice(start,stop) for start,stop in zip(Z_start,Z_stop)]) 973 | R[r] = Z[z] 974 | print(Z) 975 | print(R) 976 | ``` 977 | #### 81. Consider an array Z = [1,2,3,4,5,6,7,8,9,10,11,12,13,14], how to generate an array R = [[1,2,3,4], [2,3,4,5], [3,4,5,6], ..., [11,12,13,14]]? (★★★) 978 | 979 | 980 | ```python 981 | # Author: Stefan van der Walt 982 | 983 | Z = np.arange(1,15,dtype=np.uint32) 984 | R = stride_tricks.as_strided(Z,(11,4),(4,4)) 985 | print(R) 986 | 987 | # Author: Jeff Luo (@Jeff1999) 988 | 989 | Z = np.arange(1, 15, dtype=np.uint32) 990 | print(sliding_window_view(Z, window_shape=4)) 991 | ``` 992 | #### 82. Compute a matrix rank (★★★) 993 | 994 | 995 | ```python 996 | # Author: Stefan van der Walt 997 | 998 | Z = np.random.uniform(0,1,(10,10)) 999 | U, S, V = np.linalg.svd(Z) # Singular Value Decomposition 1000 | threshold = len(S) * S.max() * np.finfo(S.dtype).eps 1001 | rank = np.sum(S > threshold) 1002 | print(rank) 1003 | 1004 | # alternative solution: 1005 | # Author: Jeff Luo (@Jeff1999) 1006 | 1007 | rank = np.linalg.matrix_rank(Z) 1008 | print(rank) 1009 | ``` 1010 | #### 83. How to find the most frequent value in an array? 1011 | 1012 | 1013 | ```python 1014 | Z = np.random.randint(0,10,50) 1015 | print(np.bincount(Z).argmax()) 1016 | ``` 1017 | #### 84. Extract all the contiguous 3x3 blocks from a random 10x10 matrix (★★★) 1018 | 1019 | 1020 | ```python 1021 | # Author: Chris Barker 1022 | 1023 | Z = np.random.randint(0,5,(10,10)) 1024 | n = 3 1025 | i = 1 + (Z.shape[0]-3) 1026 | j = 1 + (Z.shape[1]-3) 1027 | C = stride_tricks.as_strided(Z, shape=(i, j, n, n), strides=Z.strides + Z.strides) 1028 | print(C) 1029 | 1030 | # Author: Jeff Luo (@Jeff1999) 1031 | 1032 | Z = np.random.randint(0,5,(10,10)) 1033 | print(sliding_window_view(Z, window_shape=(3, 3))) 1034 | ``` 1035 | #### 85. Create a 2D array subclass such that Z[i,j] == Z[j,i] (★★★) 1036 | 1037 | 1038 | ```python 1039 | # Author: Eric O. Lebigot 1040 | # Note: only works for 2d array and value setting using indices 1041 | 1042 | class Symetric(np.ndarray): 1043 | def __setitem__(self, index, value): 1044 | i,j = index 1045 | super(Symetric, self).__setitem__((i,j), value) 1046 | super(Symetric, self).__setitem__((j,i), value) 1047 | 1048 | def symetric(Z): 1049 | return np.asarray(Z + Z.T - np.diag(Z.diagonal())).view(Symetric) 1050 | 1051 | S = symetric(np.random.randint(0,10,(5,5))) 1052 | S[2,3] = 42 1053 | print(S) 1054 | ``` 1055 | #### 86. Consider a set of p matrices with shape (n,n) and a set of p vectors with shape (n,1). How to compute the sum of of the p matrix products at once? (result has shape (n,1)) (★★★) 1056 | 1057 | 1058 | ```python 1059 | # Author: Stefan van der Walt 1060 | 1061 | p, n = 10, 20 1062 | M = np.ones((p,n,n)) 1063 | V = np.ones((p,n,1)) 1064 | S = np.tensordot(M, V, axes=[[0, 2], [0, 1]]) 1065 | print(S) 1066 | 1067 | # It works, because: 1068 | # M is (p,n,n) 1069 | # V is (p,n,1) 1070 | # Thus, summing over the paired axes 0 and 0 (of M and V independently), 1071 | # and 2 and 1, to remain with a (n,1) vector. 1072 | ``` 1073 | #### 87. Consider a 16x16 array, how to get the block-sum (block size is 4x4)? (★★★) 1074 | 1075 | 1076 | ```python 1077 | # Author: Robert Kern 1078 | 1079 | Z = np.ones((16,16)) 1080 | k = 4 1081 | S = np.add.reduceat(np.add.reduceat(Z, np.arange(0, Z.shape[0], k), axis=0), 1082 | np.arange(0, Z.shape[1], k), axis=1) 1083 | print(S) 1084 | 1085 | # alternative solution: 1086 | # Author: Sebastian Wallkötter (@FirefoxMetzger) 1087 | 1088 | Z = np.ones((16,16)) 1089 | k = 4 1090 | 1091 | windows = np.lib.stride_tricks.sliding_window_view(Z, (k, k)) 1092 | S = windows[::k, ::k, ...].sum(axis=(-2, -1)) 1093 | 1094 | # alternative solution (by @Gattocrucco) 1095 | S = Z.reshape(4, 4, 4, 4).sum((1, 3)) 1096 | ``` 1097 | #### 88. How to implement the Game of Life using numpy arrays? (★★★) 1098 | 1099 | 1100 | ```python 1101 | # Author: Nicolas Rougier 1102 | 1103 | def iterate(Z): 1104 | # Count neighbours 1105 | N = (Z[0:-2,0:-2] + Z[0:-2,1:-1] + Z[0:-2,2:] + 1106 | Z[1:-1,0:-2] + Z[1:-1,2:] + 1107 | Z[2: ,0:-2] + Z[2: ,1:-1] + Z[2: ,2:]) 1108 | 1109 | # Apply rules 1110 | birth = (N==3) & (Z[1:-1,1:-1]==0) 1111 | survive = ((N==2) | (N==3)) & (Z[1:-1,1:-1]==1) 1112 | Z[...] = 0 1113 | Z[1:-1,1:-1][birth | survive] = 1 1114 | return Z 1115 | 1116 | Z = np.random.randint(0,2,(50,50)) 1117 | for i in range(100): Z = iterate(Z) 1118 | print(Z) 1119 | ``` 1120 | #### 89. How to get the n largest values of an array (★★★) 1121 | 1122 | 1123 | ```python 1124 | Z = np.arange(10000) 1125 | np.random.shuffle(Z) 1126 | n = 5 1127 | 1128 | # Slow 1129 | print (Z[np.argsort(Z)[-n:]]) 1130 | 1131 | # Fast 1132 | print (Z[np.argpartition(-Z,n)[:n]]) 1133 | ``` 1134 | #### 90. Given an arbitrary number of vectors, build the cartesian product (every combination of every item) (★★★) 1135 | 1136 | 1137 | ```python 1138 | # Author: Stefan Van der Walt 1139 | 1140 | def cartesian(arrays): 1141 | arrays = [np.asarray(a) for a in arrays] 1142 | shape = (len(x) for x in arrays) 1143 | 1144 | ix = np.indices(shape, dtype=int) 1145 | ix = ix.reshape(len(arrays), -1).T 1146 | 1147 | for n, arr in enumerate(arrays): 1148 | ix[:, n] = arrays[n][ix[:, n]] 1149 | 1150 | return ix 1151 | 1152 | print (cartesian(([1, 2, 3], [4, 5], [6, 7]))) 1153 | ``` 1154 | #### 91. How to create a record array from a regular array? (★★★) 1155 | 1156 | 1157 | ```python 1158 | Z = np.array([("Hello", 2.5, 3), 1159 | ("World", 3.6, 2)]) 1160 | R = np.core.records.fromarrays(Z.T, 1161 | names='col1, col2, col3', 1162 | formats = 'S8, f8, i8') 1163 | print(R) 1164 | ``` 1165 | #### 92. Consider a large vector Z, compute Z to the power of 3 using 3 different methods (★★★) 1166 | 1167 | 1168 | ```python 1169 | # Author: Ryan G. 1170 | 1171 | x = np.random.rand(int(5e7)) 1172 | 1173 | %timeit np.power(x,3) 1174 | %timeit x*x*x 1175 | %timeit np.einsum('i,i,i->i',x,x,x) 1176 | ``` 1177 | #### 93. Consider two arrays A and B of shape (8,3) and (2,2). How to find rows of A that contain elements of each row of B regardless of the order of the elements in B? (★★★) 1178 | 1179 | 1180 | ```python 1181 | # Author: Gabe Schwartz 1182 | 1183 | A = np.random.randint(0,5,(8,3)) 1184 | B = np.random.randint(0,5,(2,2)) 1185 | 1186 | C = (A[..., np.newaxis, np.newaxis] == B) 1187 | rows = np.where(C.any((3,1)).all(1))[0] 1188 | print(rows) 1189 | ``` 1190 | #### 94. Considering a 10x3 matrix, extract rows with unequal values (e.g. [2,2,3]) (★★★) 1191 | 1192 | 1193 | ```python 1194 | # Author: Robert Kern 1195 | 1196 | Z = np.random.randint(0,5,(10,3)) 1197 | print(Z) 1198 | # solution for arrays of all dtypes (including string arrays and record arrays) 1199 | E = np.all(Z[:,1:] == Z[:,:-1], axis=1) 1200 | U = Z[~E] 1201 | print(U) 1202 | # soluiton for numerical arrays only, will work for any number of columns in Z 1203 | U = Z[Z.max(axis=1) != Z.min(axis=1),:] 1204 | print(U) 1205 | ``` 1206 | #### 95. Convert a vector of ints into a matrix binary representation (★★★) 1207 | 1208 | 1209 | ```python 1210 | # Author: Warren Weckesser 1211 | 1212 | I = np.array([0, 1, 2, 3, 15, 16, 32, 64, 128]) 1213 | B = ((I.reshape(-1,1) & (2**np.arange(8))) != 0).astype(int) 1214 | print(B[:,::-1]) 1215 | 1216 | # Author: Daniel T. McDonald 1217 | 1218 | I = np.array([0, 1, 2, 3, 15, 16, 32, 64, 128], dtype=np.uint8) 1219 | print(np.unpackbits(I[:, np.newaxis], axis=1)) 1220 | ``` 1221 | #### 96. Given a two dimensional array, how to extract unique rows? (★★★) 1222 | 1223 | 1224 | ```python 1225 | # Author: Jaime Fernández del Río 1226 | 1227 | Z = np.random.randint(0,2,(6,3)) 1228 | T = np.ascontiguousarray(Z).view(np.dtype((np.void, Z.dtype.itemsize * Z.shape[1]))) 1229 | _, idx = np.unique(T, return_index=True) 1230 | uZ = Z[idx] 1231 | print(uZ) 1232 | 1233 | # Author: Andreas Kouzelis 1234 | # NumPy >= 1.13 1235 | uZ = np.unique(Z, axis=0) 1236 | print(uZ) 1237 | ``` 1238 | #### 97. Considering 2 vectors A & B, write the einsum equivalent of inner, outer, sum, and mul function (★★★) 1239 | 1240 | 1241 | ```python 1242 | # Author: Alex Riley 1243 | # Make sure to read: http://ajcr.net/Basic-guide-to-einsum/ 1244 | 1245 | A = np.random.uniform(0,1,10) 1246 | B = np.random.uniform(0,1,10) 1247 | 1248 | np.einsum('i->', A) # np.sum(A) 1249 | np.einsum('i,i->i', A, B) # A * B 1250 | np.einsum('i,i', A, B) # np.inner(A, B) 1251 | np.einsum('i,j->ij', A, B) # np.outer(A, B) 1252 | ``` 1253 | #### 98. Considering a path described by two vectors (X,Y), how to sample it using equidistant samples (★★★)? 1254 | 1255 | 1256 | ```python 1257 | # Author: Bas Swinckels 1258 | 1259 | phi = np.arange(0, 10*np.pi, 0.1) 1260 | a = 1 1261 | x = a*phi*np.cos(phi) 1262 | y = a*phi*np.sin(phi) 1263 | 1264 | dr = (np.diff(x)**2 + np.diff(y)**2)**.5 # segment lengths 1265 | r = np.zeros_like(x) 1266 | r[1:] = np.cumsum(dr) # integrate path 1267 | r_int = np.linspace(0, r.max(), 200) # regular spaced path 1268 | x_int = np.interp(r_int, r, x) # integrate path 1269 | y_int = np.interp(r_int, r, y) 1270 | ``` 1271 | #### 99. Given an integer n and a 2D array X, select from X the rows which can be interpreted as draws from a multinomial distribution with n degrees, i.e., the rows which only contain integers and which sum to n. (★★★) 1272 | 1273 | 1274 | ```python 1275 | # Author: Evgeni Burovski 1276 | 1277 | X = np.asarray([[1.0, 0.0, 3.0, 8.0], 1278 | [2.0, 0.0, 1.0, 1.0], 1279 | [1.5, 2.5, 1.0, 0.0]]) 1280 | n = 4 1281 | M = np.logical_and.reduce(np.mod(X, 1) == 0, axis=-1) 1282 | M &= (X.sum(axis=-1) == n) 1283 | print(X[M]) 1284 | ``` 1285 | #### 100. Compute bootstrapped 95% confidence intervals for the mean of a 1D array X (i.e., resample the elements of an array with replacement N times, compute the mean of each sample, and then compute percentiles over the means). (★★★) 1286 | 1287 | 1288 | ```python 1289 | # Author: Jessica B. Hamrick 1290 | 1291 | X = np.random.randn(100) # random 1D array 1292 | N = 1000 # number of bootstrap samples 1293 | idx = np.random.randint(0, X.size, (N, X.size)) 1294 | means = X[idx].mean(axis=1) 1295 | confint = np.percentile(means, [2.5, 97.5]) 1296 | print(confint) 1297 | ``` -------------------------------------------------------------------------------- /Intro/Notebooks/100_Numpy_exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "5a28b949", 6 | "metadata": {}, 7 | "source": [ 8 | "# 100 numpy exercises\n", 9 | "\n", 10 | "Original source of this notebook is [here](https://github.com/rougier/numpy-100). Credits to Nicolas P. Rougier. Also video with live coding solutions is [here](https://www.youtube.com/watch?v=PM504XhEVCU)\n", 11 | "\n", 12 | "---\n", 13 | "\n", 14 | "This is a collection of exercises that have been collected in the numpy mailing list, on stack overflow\n", 15 | "and in the numpy documentation. The goal of this collection is to offer a quick reference for both old\n", 16 | "and new users but also to provide a set of exercises for those who teach.\n", 17 | "\n", 18 | "\n", 19 | "If you find an error or think you've a better way to solve some of them, feel\n", 20 | "free to open an issue at ." 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "id": "3f7a87ec", 26 | "metadata": {}, 27 | "source": [ 28 | "File automatically generated. See the documentation to update questions/answers/hints programmatically." 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "id": "854ecf1b", 34 | "metadata": {}, 35 | "source": [ 36 | "Run the `initialise.py` module, then for each question you can query the\n", 37 | "answer or an hint with `hint(n)` or `answer(n)` for `n` question number." 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "id": "c53ea877", 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "%run initialise.py" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "id": "c00df986", 53 | "metadata": {}, 54 | "source": [ 55 | "#### 1. Import the numpy package under the name `np` (★☆☆)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "id": "35bde95c", 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "id": "b555a89f", 69 | "metadata": {}, 70 | "source": [ 71 | "#### 2. Print the numpy version and the configuration (★☆☆)" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "id": "2c9d363d", 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "id": "e2713407", 85 | "metadata": {}, 86 | "source": [ 87 | "#### 3. Create a null vector of size 10 (★☆☆)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "id": "6df2f6db", 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "id": "5a09554f", 101 | "metadata": {}, 102 | "source": [ 103 | "#### 4. How to find the memory size of any array (★☆☆)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "id": "c55a2a00", 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "id": "4720d438", 117 | "metadata": {}, 118 | "source": [ 119 | "#### 5. How to get the documentation of the numpy add function from the command line? (★☆☆)" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "id": "d9e2277e", 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "id": "b812d322", 133 | "metadata": {}, 134 | "source": [ 135 | "#### 6. Create a null vector of size 10 but the fifth value which is 1 (★☆☆)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "id": "9f95cedf", 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "id": "a348f2a8", 149 | "metadata": {}, 150 | "source": [ 151 | "#### 7. Create a vector with values ranging from 10 to 49 (★☆☆)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "id": "5a68fd5b", 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "id": "2f4ca4c0", 165 | "metadata": {}, 166 | "source": [ 167 | "#### 8. Reverse a vector (first element becomes last) (★☆☆)" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "id": "1892e775", 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "id": "31381832", 181 | "metadata": {}, 182 | "source": [ 183 | "#### 9. Create a 3x3 matrix with values ranging from 0 to 8 (★☆☆)" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "id": "0154f251", 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "id": "8c353e84", 197 | "metadata": {}, 198 | "source": [ 199 | "#### 10. Find indices of non-zero elements from [1,2,0,0,4,0] (★☆☆)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "id": "da3495c3", 206 | "metadata": {}, 207 | "outputs": [], 208 | "source": [] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "id": "a26ac749", 213 | "metadata": {}, 214 | "source": [ 215 | "#### 11. Create a 3x3 identity matrix (★☆☆)" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "id": "3e33feda", 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "id": "3a293379", 229 | "metadata": {}, 230 | "source": [ 231 | "#### 12. Create a 3x3x3 array with random values (★☆☆)" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": null, 237 | "id": "2da08152", 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "id": "e3faa924", 245 | "metadata": {}, 246 | "source": [ 247 | "#### 13. Create a 10x10 array with random values and find the minimum and maximum values (★☆☆)" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "id": "e9fb3793", 254 | "metadata": {}, 255 | "outputs": [], 256 | "source": [] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "id": "317cb8d1", 261 | "metadata": {}, 262 | "source": [ 263 | "#### 14. Create a random vector of size 30 and find the mean value (★☆☆)" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "id": "4ea995d6", 270 | "metadata": {}, 271 | "outputs": [], 272 | "source": [] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "id": "b8dee0f2", 277 | "metadata": {}, 278 | "source": [ 279 | "#### 15. Create a 2d array with 1 on the border and 0 inside (★☆☆)" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": null, 285 | "id": "7054464b", 286 | "metadata": {}, 287 | "outputs": [], 288 | "source": [] 289 | }, 290 | { 291 | "cell_type": "markdown", 292 | "id": "1476f45f", 293 | "metadata": {}, 294 | "source": [ 295 | "#### 16. How to add a border (filled with 0's) around an existing array? (★☆☆)" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": null, 301 | "id": "4f0212af", 302 | "metadata": {}, 303 | "outputs": [], 304 | "source": [] 305 | }, 306 | { 307 | "cell_type": "markdown", 308 | "id": "52844dc7", 309 | "metadata": {}, 310 | "source": [ 311 | "#### 17. What is the result of the following expression? (★☆☆)\n", 312 | "```python\n", 313 | "0 * np.nan\n", 314 | "np.nan == np.nan\n", 315 | "np.inf > np.nan\n", 316 | "np.nan - np.nan\n", 317 | "np.nan in set([np.nan])\n", 318 | "0.3 == 3 * 0.1\n", 319 | "```" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": null, 325 | "id": "cfdb7352", 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [] 329 | }, 330 | { 331 | "cell_type": "markdown", 332 | "id": "9801c961", 333 | "metadata": {}, 334 | "source": [ 335 | "#### 18. Create a 5x5 matrix with values 1,2,3,4 just below the diagonal (★☆☆)" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": null, 341 | "id": "4e97651a", 342 | "metadata": {}, 343 | "outputs": [], 344 | "source": [] 345 | }, 346 | { 347 | "cell_type": "markdown", 348 | "id": "3dc05e9f", 349 | "metadata": {}, 350 | "source": [ 351 | "#### 19. Create a 8x8 matrix and fill it with a checkerboard pattern (★☆☆)" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": null, 357 | "id": "0b100b85", 358 | "metadata": {}, 359 | "outputs": [], 360 | "source": [] 361 | }, 362 | { 363 | "cell_type": "markdown", 364 | "id": "d8e941b6", 365 | "metadata": {}, 366 | "source": [ 367 | "#### 20. Consider a (6,7,8) shape array, what is the index (x,y,z) of the 100th element? (★☆☆)" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": null, 373 | "id": "b3bc1fcb", 374 | "metadata": {}, 375 | "outputs": [], 376 | "source": [] 377 | }, 378 | { 379 | "cell_type": "markdown", 380 | "id": "bf3b8ab3", 381 | "metadata": {}, 382 | "source": [ 383 | "#### 21. Create a checkerboard 8x8 matrix using the tile function (★☆☆)" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": null, 389 | "id": "eb15c681", 390 | "metadata": {}, 391 | "outputs": [], 392 | "source": [] 393 | }, 394 | { 395 | "cell_type": "markdown", 396 | "id": "71af9d19", 397 | "metadata": {}, 398 | "source": [ 399 | "#### 22. Normalize a 5x5 random matrix (★☆☆)" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": null, 405 | "id": "df20b788", 406 | "metadata": {}, 407 | "outputs": [], 408 | "source": [] 409 | }, 410 | { 411 | "cell_type": "markdown", 412 | "id": "01cd9893", 413 | "metadata": {}, 414 | "source": [ 415 | "#### 23. Create a custom dtype that describes a color as four unsigned bytes (RGBA) (★☆☆)" 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": null, 421 | "id": "4acddc2f", 422 | "metadata": {}, 423 | "outputs": [], 424 | "source": [] 425 | }, 426 | { 427 | "cell_type": "markdown", 428 | "id": "0ef5f38b", 429 | "metadata": {}, 430 | "source": [ 431 | "#### 24. Multiply a 5x3 matrix by a 3x2 matrix (real matrix product) (★☆☆)" 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": null, 437 | "id": "3d36d4a0", 438 | "metadata": {}, 439 | "outputs": [], 440 | "source": [] 441 | }, 442 | { 443 | "cell_type": "markdown", 444 | "id": "a46db425", 445 | "metadata": {}, 446 | "source": [ 447 | "#### 25. Given a 1D array, negate all elements which are between 3 and 8, in place. (★☆☆)" 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": null, 453 | "id": "86e3d6a8", 454 | "metadata": {}, 455 | "outputs": [], 456 | "source": [] 457 | }, 458 | { 459 | "cell_type": "markdown", 460 | "id": "89fa6007", 461 | "metadata": {}, 462 | "source": [ 463 | "#### 26. What is the output of the following script? (★☆☆)\n", 464 | "```python\n", 465 | "# Author: Jake VanderPlas\n", 466 | "\n", 467 | "print(sum(range(5),-1))\n", 468 | "from numpy import *\n", 469 | "print(sum(range(5),-1))\n", 470 | "```" 471 | ] 472 | }, 473 | { 474 | "cell_type": "code", 475 | "execution_count": null, 476 | "id": "59f2c6dc", 477 | "metadata": {}, 478 | "outputs": [], 479 | "source": [] 480 | }, 481 | { 482 | "cell_type": "markdown", 483 | "id": "38de4589", 484 | "metadata": {}, 485 | "source": [ 486 | "#### 27. Consider an integer vector Z, which of these expressions are legal? (★☆☆)\n", 487 | "```python\n", 488 | "Z**Z\n", 489 | "2 << Z >> 2\n", 490 | "Z <- Z\n", 491 | "1j*Z\n", 492 | "Z/1/1\n", 493 | "ZZ\n", 494 | "```" 495 | ] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "execution_count": null, 500 | "id": "a9dc9520", 501 | "metadata": {}, 502 | "outputs": [], 503 | "source": [] 504 | }, 505 | { 506 | "cell_type": "markdown", 507 | "id": "77c8b8c7", 508 | "metadata": {}, 509 | "source": [ 510 | "#### 28. What are the result of the following expressions? (★☆☆)\n", 511 | "```python\n", 512 | "np.array(0) / np.array(0)\n", 513 | "np.array(0) // np.array(0)\n", 514 | "np.array([np.nan]).astype(int).astype(float)\n", 515 | "```" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": null, 521 | "id": "3b66fe44", 522 | "metadata": {}, 523 | "outputs": [], 524 | "source": [] 525 | }, 526 | { 527 | "cell_type": "markdown", 528 | "id": "1f3f0892", 529 | "metadata": {}, 530 | "source": [ 531 | "#### 29. How to round away from zero a float array ? (★☆☆)" 532 | ] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "execution_count": null, 537 | "id": "8fce1bd7", 538 | "metadata": {}, 539 | "outputs": [], 540 | "source": [] 541 | }, 542 | { 543 | "cell_type": "markdown", 544 | "id": "53f47438", 545 | "metadata": {}, 546 | "source": [ 547 | "#### 30. How to find common values between two arrays? (★☆☆)" 548 | ] 549 | }, 550 | { 551 | "cell_type": "code", 552 | "execution_count": null, 553 | "id": "981db6ec", 554 | "metadata": {}, 555 | "outputs": [], 556 | "source": [] 557 | }, 558 | { 559 | "cell_type": "markdown", 560 | "id": "51cf9109", 561 | "metadata": {}, 562 | "source": [ 563 | "#### 31. How to ignore all numpy warnings (not recommended)? (★☆☆)" 564 | ] 565 | }, 566 | { 567 | "cell_type": "code", 568 | "execution_count": null, 569 | "id": "09ba7521", 570 | "metadata": {}, 571 | "outputs": [], 572 | "source": [] 573 | }, 574 | { 575 | "cell_type": "markdown", 576 | "id": "948a691b", 577 | "metadata": {}, 578 | "source": [ 579 | "#### 32. Is the following expressions true? (★☆☆)\n", 580 | "```python\n", 581 | "np.sqrt(-1) == np.emath.sqrt(-1)\n", 582 | "```" 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "execution_count": null, 588 | "id": "6d8f745b", 589 | "metadata": {}, 590 | "outputs": [], 591 | "source": [] 592 | }, 593 | { 594 | "cell_type": "markdown", 595 | "id": "d1a8992e", 596 | "metadata": {}, 597 | "source": [ 598 | "#### 33. How to get the dates of yesterday, today and tomorrow? (★☆☆)" 599 | ] 600 | }, 601 | { 602 | "cell_type": "code", 603 | "execution_count": null, 604 | "id": "31a1ade2", 605 | "metadata": {}, 606 | "outputs": [], 607 | "source": [] 608 | }, 609 | { 610 | "cell_type": "markdown", 611 | "id": "01378794", 612 | "metadata": {}, 613 | "source": [ 614 | "#### 34. How to get all the dates corresponding to the month of July 2016? (★★☆)" 615 | ] 616 | }, 617 | { 618 | "cell_type": "code", 619 | "execution_count": null, 620 | "id": "928c55c6", 621 | "metadata": {}, 622 | "outputs": [], 623 | "source": [] 624 | }, 625 | { 626 | "cell_type": "markdown", 627 | "id": "e3f8fe94", 628 | "metadata": {}, 629 | "source": [ 630 | "#### 35. How to compute ((A+B)*(-A/2)) in place (without copy)? (★★☆)" 631 | ] 632 | }, 633 | { 634 | "cell_type": "code", 635 | "execution_count": null, 636 | "id": "14e478d5", 637 | "metadata": {}, 638 | "outputs": [], 639 | "source": [] 640 | }, 641 | { 642 | "cell_type": "markdown", 643 | "id": "5c4d58c9", 644 | "metadata": {}, 645 | "source": [ 646 | "#### 36. Extract the integer part of a random array of positive numbers using 4 different methods (★★☆)" 647 | ] 648 | }, 649 | { 650 | "cell_type": "code", 651 | "execution_count": null, 652 | "id": "586beded", 653 | "metadata": {}, 654 | "outputs": [], 655 | "source": [] 656 | }, 657 | { 658 | "cell_type": "markdown", 659 | "id": "62628653", 660 | "metadata": {}, 661 | "source": [ 662 | "#### 37. Create a 5x5 matrix with row values ranging from 0 to 4 (★★☆)" 663 | ] 664 | }, 665 | { 666 | "cell_type": "code", 667 | "execution_count": null, 668 | "id": "e57763b3", 669 | "metadata": {}, 670 | "outputs": [], 671 | "source": [] 672 | }, 673 | { 674 | "cell_type": "markdown", 675 | "id": "d3d46d64", 676 | "metadata": {}, 677 | "source": [ 678 | "#### 38. Consider a generator function that generates 10 integers and use it to build an array (★☆☆)" 679 | ] 680 | }, 681 | { 682 | "cell_type": "code", 683 | "execution_count": null, 684 | "id": "24590d8c", 685 | "metadata": {}, 686 | "outputs": [], 687 | "source": [] 688 | }, 689 | { 690 | "cell_type": "markdown", 691 | "id": "5e1e51e4", 692 | "metadata": {}, 693 | "source": [ 694 | "#### 39. Create a vector of size 10 with values ranging from 0 to 1, both excluded (★★☆)" 695 | ] 696 | }, 697 | { 698 | "cell_type": "code", 699 | "execution_count": null, 700 | "id": "def73884", 701 | "metadata": {}, 702 | "outputs": [], 703 | "source": [] 704 | }, 705 | { 706 | "cell_type": "markdown", 707 | "id": "9e19964c", 708 | "metadata": {}, 709 | "source": [ 710 | "#### 40. Create a random vector of size 10 and sort it (★★☆)" 711 | ] 712 | }, 713 | { 714 | "cell_type": "code", 715 | "execution_count": null, 716 | "id": "ec302eb1", 717 | "metadata": {}, 718 | "outputs": [], 719 | "source": [] 720 | }, 721 | { 722 | "cell_type": "markdown", 723 | "id": "13a034e2", 724 | "metadata": {}, 725 | "source": [ 726 | "#### 41. How to sum a small array faster than np.sum? (★★☆)" 727 | ] 728 | }, 729 | { 730 | "cell_type": "code", 731 | "execution_count": null, 732 | "id": "b09a3551", 733 | "metadata": {}, 734 | "outputs": [], 735 | "source": [] 736 | }, 737 | { 738 | "cell_type": "markdown", 739 | "id": "32d10473", 740 | "metadata": {}, 741 | "source": [ 742 | "#### 42. Consider two random arrays A and B, check if they are equal (★★☆)" 743 | ] 744 | }, 745 | { 746 | "cell_type": "code", 747 | "execution_count": null, 748 | "id": "5ebb1103", 749 | "metadata": {}, 750 | "outputs": [], 751 | "source": [] 752 | }, 753 | { 754 | "cell_type": "markdown", 755 | "id": "a50474b0", 756 | "metadata": {}, 757 | "source": [ 758 | "#### 43. Make an array immutable (read-only) (★★☆)" 759 | ] 760 | }, 761 | { 762 | "cell_type": "code", 763 | "execution_count": null, 764 | "id": "db6fe176", 765 | "metadata": {}, 766 | "outputs": [], 767 | "source": [] 768 | }, 769 | { 770 | "cell_type": "markdown", 771 | "id": "b7a6093e", 772 | "metadata": {}, 773 | "source": [ 774 | "#### 44. Consider a random 10x2 matrix representing cartesian coordinates, convert them to polar coordinates (★★☆)" 775 | ] 776 | }, 777 | { 778 | "cell_type": "code", 779 | "execution_count": null, 780 | "id": "4479c48d", 781 | "metadata": {}, 782 | "outputs": [], 783 | "source": [] 784 | }, 785 | { 786 | "cell_type": "markdown", 787 | "id": "934f889c", 788 | "metadata": {}, 789 | "source": [ 790 | "#### 45. Create random vector of size 10 and replace the maximum value by 0 (★★☆)" 791 | ] 792 | }, 793 | { 794 | "cell_type": "code", 795 | "execution_count": null, 796 | "id": "4b576814", 797 | "metadata": {}, 798 | "outputs": [], 799 | "source": [] 800 | }, 801 | { 802 | "cell_type": "markdown", 803 | "id": "d4f6e2e1", 804 | "metadata": {}, 805 | "source": [ 806 | "#### 46. Create a structured array with `x` and `y` coordinates covering the [0,1]x[0,1] area (★★☆)" 807 | ] 808 | }, 809 | { 810 | "cell_type": "code", 811 | "execution_count": null, 812 | "id": "f90f969b", 813 | "metadata": {}, 814 | "outputs": [], 815 | "source": [] 816 | }, 817 | { 818 | "cell_type": "markdown", 819 | "id": "bc9c88d3", 820 | "metadata": {}, 821 | "source": [ 822 | "#### 47. Given two arrays, X and Y, construct the Cauchy matrix C (Cij =1/(xi - yj)) (★★☆)" 823 | ] 824 | }, 825 | { 826 | "cell_type": "code", 827 | "execution_count": null, 828 | "id": "e90ad595", 829 | "metadata": {}, 830 | "outputs": [], 831 | "source": [] 832 | }, 833 | { 834 | "cell_type": "markdown", 835 | "id": "a840a52f", 836 | "metadata": {}, 837 | "source": [ 838 | "#### 48. Print the minimum and maximum representable values for each numpy scalar type (★★☆)" 839 | ] 840 | }, 841 | { 842 | "cell_type": "code", 843 | "execution_count": null, 844 | "id": "d2eb3b4a", 845 | "metadata": {}, 846 | "outputs": [], 847 | "source": [] 848 | }, 849 | { 850 | "cell_type": "markdown", 851 | "id": "0a0819d5", 852 | "metadata": {}, 853 | "source": [ 854 | "#### 49. How to print all the values of an array? (★★☆)" 855 | ] 856 | }, 857 | { 858 | "cell_type": "code", 859 | "execution_count": null, 860 | "id": "d76dbf55", 861 | "metadata": {}, 862 | "outputs": [], 863 | "source": [] 864 | }, 865 | { 866 | "cell_type": "markdown", 867 | "id": "50c62214", 868 | "metadata": {}, 869 | "source": [ 870 | "#### 50. How to find the closest value (to a given scalar) in a vector? (★★☆)" 871 | ] 872 | }, 873 | { 874 | "cell_type": "code", 875 | "execution_count": null, 876 | "id": "24b45bb5", 877 | "metadata": {}, 878 | "outputs": [], 879 | "source": [] 880 | }, 881 | { 882 | "cell_type": "markdown", 883 | "id": "4c5a5116", 884 | "metadata": {}, 885 | "source": [ 886 | "#### 51. Create a structured array representing a position (x,y) and a color (r,g,b) (★★☆)" 887 | ] 888 | }, 889 | { 890 | "cell_type": "code", 891 | "execution_count": null, 892 | "id": "275806b2", 893 | "metadata": {}, 894 | "outputs": [], 895 | "source": [] 896 | }, 897 | { 898 | "cell_type": "markdown", 899 | "id": "1d3399ca", 900 | "metadata": {}, 901 | "source": [ 902 | "#### 52. Consider a random vector with shape (100,2) representing coordinates, find point by point distances (★★☆)" 903 | ] 904 | }, 905 | { 906 | "cell_type": "code", 907 | "execution_count": null, 908 | "id": "67eae5bf", 909 | "metadata": {}, 910 | "outputs": [], 911 | "source": [] 912 | }, 913 | { 914 | "cell_type": "markdown", 915 | "id": "003ff920", 916 | "metadata": {}, 917 | "source": [ 918 | "#### 53. How to convert a float (32 bits) array into an integer (32 bits) array in place?" 919 | ] 920 | }, 921 | { 922 | "cell_type": "code", 923 | "execution_count": null, 924 | "id": "76245e6d", 925 | "metadata": {}, 926 | "outputs": [], 927 | "source": [] 928 | }, 929 | { 930 | "cell_type": "markdown", 931 | "id": "0c99eac4", 932 | "metadata": {}, 933 | "source": [ 934 | "#### 54. How to read the following file? (★★☆)\n", 935 | "```\n", 936 | "1, 2, 3, 4, 5\n", 937 | "6, , , 7, 8\n", 938 | " , , 9,10,11\n", 939 | "```" 940 | ] 941 | }, 942 | { 943 | "cell_type": "code", 944 | "execution_count": null, 945 | "id": "55c6bf8c", 946 | "metadata": {}, 947 | "outputs": [], 948 | "source": [] 949 | }, 950 | { 951 | "cell_type": "markdown", 952 | "id": "8cd30360", 953 | "metadata": {}, 954 | "source": [ 955 | "#### 55. What is the equivalent of enumerate for numpy arrays? (★★☆)" 956 | ] 957 | }, 958 | { 959 | "cell_type": "code", 960 | "execution_count": null, 961 | "id": "608315ba", 962 | "metadata": {}, 963 | "outputs": [], 964 | "source": [] 965 | }, 966 | { 967 | "cell_type": "markdown", 968 | "id": "4823a294", 969 | "metadata": {}, 970 | "source": [ 971 | "#### 56. Generate a generic 2D Gaussian-like array (★★☆)" 972 | ] 973 | }, 974 | { 975 | "cell_type": "code", 976 | "execution_count": null, 977 | "id": "9b3b682d", 978 | "metadata": {}, 979 | "outputs": [], 980 | "source": [] 981 | }, 982 | { 983 | "cell_type": "markdown", 984 | "id": "2dd8891d", 985 | "metadata": {}, 986 | "source": [ 987 | "#### 57. How to randomly place p elements in a 2D array? (★★☆)" 988 | ] 989 | }, 990 | { 991 | "cell_type": "code", 992 | "execution_count": null, 993 | "id": "a55c52dc", 994 | "metadata": {}, 995 | "outputs": [], 996 | "source": [] 997 | }, 998 | { 999 | "cell_type": "markdown", 1000 | "id": "270b6834", 1001 | "metadata": {}, 1002 | "source": [ 1003 | "#### 58. Subtract the mean of each row of a matrix (★★☆)" 1004 | ] 1005 | }, 1006 | { 1007 | "cell_type": "code", 1008 | "execution_count": null, 1009 | "id": "814080c9", 1010 | "metadata": {}, 1011 | "outputs": [], 1012 | "source": [] 1013 | }, 1014 | { 1015 | "cell_type": "markdown", 1016 | "id": "52c92916", 1017 | "metadata": {}, 1018 | "source": [ 1019 | "#### 59. How to sort an array by the nth column? (★★☆)" 1020 | ] 1021 | }, 1022 | { 1023 | "cell_type": "code", 1024 | "execution_count": null, 1025 | "id": "c6ebac6d", 1026 | "metadata": {}, 1027 | "outputs": [], 1028 | "source": [] 1029 | }, 1030 | { 1031 | "cell_type": "markdown", 1032 | "id": "05cef60b", 1033 | "metadata": {}, 1034 | "source": [ 1035 | "#### 60. How to tell if a given 2D array has null columns? (★★☆)" 1036 | ] 1037 | }, 1038 | { 1039 | "cell_type": "code", 1040 | "execution_count": null, 1041 | "id": "7979b467", 1042 | "metadata": {}, 1043 | "outputs": [], 1044 | "source": [] 1045 | }, 1046 | { 1047 | "cell_type": "markdown", 1048 | "id": "4891fdf5", 1049 | "metadata": {}, 1050 | "source": [ 1051 | "#### 61. Find the nearest value from a given value in an array (★★☆)" 1052 | ] 1053 | }, 1054 | { 1055 | "cell_type": "code", 1056 | "execution_count": null, 1057 | "id": "b38950fd", 1058 | "metadata": {}, 1059 | "outputs": [], 1060 | "source": [] 1061 | }, 1062 | { 1063 | "cell_type": "markdown", 1064 | "id": "b34bfbc7", 1065 | "metadata": {}, 1066 | "source": [ 1067 | "#### 62. Considering two arrays with shape (1,3) and (3,1), how to compute their sum using an iterator? (★★☆)" 1068 | ] 1069 | }, 1070 | { 1071 | "cell_type": "code", 1072 | "execution_count": null, 1073 | "id": "aa7eb006", 1074 | "metadata": {}, 1075 | "outputs": [], 1076 | "source": [] 1077 | }, 1078 | { 1079 | "cell_type": "markdown", 1080 | "id": "77a27b83", 1081 | "metadata": {}, 1082 | "source": [ 1083 | "#### 63. Create an array class that has a name attribute (★★☆)" 1084 | ] 1085 | }, 1086 | { 1087 | "cell_type": "code", 1088 | "execution_count": null, 1089 | "id": "a2a83936", 1090 | "metadata": {}, 1091 | "outputs": [], 1092 | "source": [] 1093 | }, 1094 | { 1095 | "cell_type": "markdown", 1096 | "id": "f7f5d229", 1097 | "metadata": {}, 1098 | "source": [ 1099 | "#### 64. Consider a given vector, how to add 1 to each element indexed by a second vector (be careful with repeated indices)? (★★★)" 1100 | ] 1101 | }, 1102 | { 1103 | "cell_type": "code", 1104 | "execution_count": null, 1105 | "id": "9b8f8f48", 1106 | "metadata": {}, 1107 | "outputs": [], 1108 | "source": [] 1109 | }, 1110 | { 1111 | "cell_type": "markdown", 1112 | "id": "b067fca8", 1113 | "metadata": {}, 1114 | "source": [ 1115 | "#### 65. How to accumulate elements of a vector (X) to an array (F) based on an index list (I)? (★★★)" 1116 | ] 1117 | }, 1118 | { 1119 | "cell_type": "code", 1120 | "execution_count": null, 1121 | "id": "f983f8ba", 1122 | "metadata": {}, 1123 | "outputs": [], 1124 | "source": [] 1125 | }, 1126 | { 1127 | "cell_type": "markdown", 1128 | "id": "765e2b8e", 1129 | "metadata": {}, 1130 | "source": [ 1131 | "#### 66. Considering a (w,h,3) image of (dtype=ubyte), compute the number of unique colors (★★☆)" 1132 | ] 1133 | }, 1134 | { 1135 | "cell_type": "code", 1136 | "execution_count": null, 1137 | "id": "7572d90d", 1138 | "metadata": {}, 1139 | "outputs": [], 1140 | "source": [] 1141 | }, 1142 | { 1143 | "cell_type": "markdown", 1144 | "id": "02743317", 1145 | "metadata": {}, 1146 | "source": [ 1147 | "#### 67. Considering a four dimensions array, how to get sum over the last two axis at once? (★★★)" 1148 | ] 1149 | }, 1150 | { 1151 | "cell_type": "code", 1152 | "execution_count": null, 1153 | "id": "980b5c45", 1154 | "metadata": {}, 1155 | "outputs": [], 1156 | "source": [] 1157 | }, 1158 | { 1159 | "cell_type": "markdown", 1160 | "id": "fe8b8107", 1161 | "metadata": {}, 1162 | "source": [ 1163 | "#### 68. Considering a one-dimensional vector D, how to compute means of subsets of D using a vector S of same size describing subset indices? (★★★)" 1164 | ] 1165 | }, 1166 | { 1167 | "cell_type": "code", 1168 | "execution_count": null, 1169 | "id": "9c17c8c9", 1170 | "metadata": {}, 1171 | "outputs": [], 1172 | "source": [] 1173 | }, 1174 | { 1175 | "cell_type": "markdown", 1176 | "id": "7efa1173", 1177 | "metadata": {}, 1178 | "source": [ 1179 | "#### 69. How to get the diagonal of a dot product? (★★★)" 1180 | ] 1181 | }, 1182 | { 1183 | "cell_type": "code", 1184 | "execution_count": null, 1185 | "id": "59fc6a51", 1186 | "metadata": {}, 1187 | "outputs": [], 1188 | "source": [] 1189 | }, 1190 | { 1191 | "cell_type": "markdown", 1192 | "id": "b72adc30", 1193 | "metadata": {}, 1194 | "source": [ 1195 | "#### 70. Consider the vector [1, 2, 3, 4, 5], how to build a new vector with 3 consecutive zeros interleaved between each value? (★★★)" 1196 | ] 1197 | }, 1198 | { 1199 | "cell_type": "code", 1200 | "execution_count": null, 1201 | "id": "94b582b4", 1202 | "metadata": {}, 1203 | "outputs": [], 1204 | "source": [] 1205 | }, 1206 | { 1207 | "cell_type": "markdown", 1208 | "id": "fafa05b8", 1209 | "metadata": {}, 1210 | "source": [ 1211 | "#### 71. Consider an array of dimension (5,5,3), how to multiply it by an array with dimensions (5,5)? (★★★)" 1212 | ] 1213 | }, 1214 | { 1215 | "cell_type": "code", 1216 | "execution_count": null, 1217 | "id": "0dbb5bd4", 1218 | "metadata": {}, 1219 | "outputs": [], 1220 | "source": [] 1221 | }, 1222 | { 1223 | "cell_type": "markdown", 1224 | "id": "35a2bae5", 1225 | "metadata": {}, 1226 | "source": [ 1227 | "#### 72. How to swap two rows of an array? (★★★)" 1228 | ] 1229 | }, 1230 | { 1231 | "cell_type": "code", 1232 | "execution_count": null, 1233 | "id": "d63b30c3", 1234 | "metadata": {}, 1235 | "outputs": [], 1236 | "source": [] 1237 | }, 1238 | { 1239 | "cell_type": "markdown", 1240 | "id": "b760fc57", 1241 | "metadata": {}, 1242 | "source": [ 1243 | "#### 73. Consider a set of 10 triplets describing 10 triangles (with shared vertices), find the set of unique line segments composing all the triangles (★★★)" 1244 | ] 1245 | }, 1246 | { 1247 | "cell_type": "code", 1248 | "execution_count": null, 1249 | "id": "01738774", 1250 | "metadata": {}, 1251 | "outputs": [], 1252 | "source": [] 1253 | }, 1254 | { 1255 | "cell_type": "markdown", 1256 | "id": "2d28cdaa", 1257 | "metadata": {}, 1258 | "source": [ 1259 | "#### 74. Given a sorted array C that corresponds to a bincount, how to produce an array A such that np.bincount(A) == C? (★★★)" 1260 | ] 1261 | }, 1262 | { 1263 | "cell_type": "code", 1264 | "execution_count": null, 1265 | "id": "8423859b", 1266 | "metadata": {}, 1267 | "outputs": [], 1268 | "source": [] 1269 | }, 1270 | { 1271 | "cell_type": "markdown", 1272 | "id": "5f4cafbd", 1273 | "metadata": {}, 1274 | "source": [ 1275 | "#### 75. How to compute averages using a sliding window over an array? (★★★)" 1276 | ] 1277 | }, 1278 | { 1279 | "cell_type": "code", 1280 | "execution_count": null, 1281 | "id": "62d6d514", 1282 | "metadata": {}, 1283 | "outputs": [], 1284 | "source": [] 1285 | }, 1286 | { 1287 | "cell_type": "markdown", 1288 | "id": "575d8bb4", 1289 | "metadata": {}, 1290 | "source": [ 1291 | "#### 76. Consider a one-dimensional array Z, build a two-dimensional array whose first row is (Z[0],Z[1],Z[2]) and each subsequent row is shifted by 1 (last row should be (Z[-3],Z[-2],Z[-1]) (★★★)" 1292 | ] 1293 | }, 1294 | { 1295 | "cell_type": "code", 1296 | "execution_count": null, 1297 | "id": "2a41d341", 1298 | "metadata": {}, 1299 | "outputs": [], 1300 | "source": [] 1301 | }, 1302 | { 1303 | "cell_type": "markdown", 1304 | "id": "80dc0b2a", 1305 | "metadata": {}, 1306 | "source": [ 1307 | "#### 77. How to negate a boolean, or to change the sign of a float inplace? (★★★)" 1308 | ] 1309 | }, 1310 | { 1311 | "cell_type": "code", 1312 | "execution_count": null, 1313 | "id": "6fc68014", 1314 | "metadata": {}, 1315 | "outputs": [], 1316 | "source": [] 1317 | }, 1318 | { 1319 | "cell_type": "markdown", 1320 | "id": "7288634d", 1321 | "metadata": {}, 1322 | "source": [ 1323 | "#### 78. Consider 2 sets of points P0,P1 describing lines (2d) and a point p, how to compute distance from p to each line i (P0[i],P1[i])? (★★★)" 1324 | ] 1325 | }, 1326 | { 1327 | "cell_type": "code", 1328 | "execution_count": null, 1329 | "id": "4602caa6", 1330 | "metadata": {}, 1331 | "outputs": [], 1332 | "source": [] 1333 | }, 1334 | { 1335 | "cell_type": "markdown", 1336 | "id": "e1880e79", 1337 | "metadata": {}, 1338 | "source": [ 1339 | "#### 79. Consider 2 sets of points P0,P1 describing lines (2d) and a set of points P, how to compute distance from each point j (P[j]) to each line i (P0[i],P1[i])? (★★★)" 1340 | ] 1341 | }, 1342 | { 1343 | "cell_type": "code", 1344 | "execution_count": null, 1345 | "id": "a14e151b", 1346 | "metadata": {}, 1347 | "outputs": [], 1348 | "source": [] 1349 | }, 1350 | { 1351 | "cell_type": "markdown", 1352 | "id": "b8a2a287", 1353 | "metadata": {}, 1354 | "source": [ 1355 | "#### 80. Consider an arbitrary array, write a function that extracts a subpart with a fixed shape and centered on a given element (pad with a `fill` value when necessary) (★★★)" 1356 | ] 1357 | }, 1358 | { 1359 | "cell_type": "code", 1360 | "execution_count": null, 1361 | "id": "86af1186", 1362 | "metadata": {}, 1363 | "outputs": [], 1364 | "source": [] 1365 | }, 1366 | { 1367 | "cell_type": "markdown", 1368 | "id": "caa64150", 1369 | "metadata": {}, 1370 | "source": [ 1371 | "#### 81. Consider an array Z = [1,2,3,4,5,6,7,8,9,10,11,12,13,14], how to generate an array R = [[1,2,3,4], [2,3,4,5], [3,4,5,6], ..., [11,12,13,14]]? (★★★)" 1372 | ] 1373 | }, 1374 | { 1375 | "cell_type": "code", 1376 | "execution_count": null, 1377 | "id": "4e7cc696", 1378 | "metadata": {}, 1379 | "outputs": [], 1380 | "source": [] 1381 | }, 1382 | { 1383 | "cell_type": "markdown", 1384 | "id": "9444c83b", 1385 | "metadata": {}, 1386 | "source": [ 1387 | "#### 82. Compute a matrix rank (★★★)" 1388 | ] 1389 | }, 1390 | { 1391 | "cell_type": "code", 1392 | "execution_count": null, 1393 | "id": "9e5c1d64", 1394 | "metadata": {}, 1395 | "outputs": [], 1396 | "source": [] 1397 | }, 1398 | { 1399 | "cell_type": "markdown", 1400 | "id": "6eef9f33", 1401 | "metadata": {}, 1402 | "source": [ 1403 | "#### 83. How to find the most frequent value in an array?" 1404 | ] 1405 | }, 1406 | { 1407 | "cell_type": "code", 1408 | "execution_count": null, 1409 | "id": "9fb60e52", 1410 | "metadata": {}, 1411 | "outputs": [], 1412 | "source": [] 1413 | }, 1414 | { 1415 | "cell_type": "markdown", 1416 | "id": "65f370f9", 1417 | "metadata": {}, 1418 | "source": [ 1419 | "#### 84. Extract all the contiguous 3x3 blocks from a random 10x10 matrix (★★★)" 1420 | ] 1421 | }, 1422 | { 1423 | "cell_type": "code", 1424 | "execution_count": null, 1425 | "id": "65ade1a6", 1426 | "metadata": {}, 1427 | "outputs": [], 1428 | "source": [] 1429 | }, 1430 | { 1431 | "cell_type": "markdown", 1432 | "id": "845ad7f8", 1433 | "metadata": {}, 1434 | "source": [ 1435 | "#### 85. Create a 2D array subclass such that Z[i,j] == Z[j,i] (★★★)" 1436 | ] 1437 | }, 1438 | { 1439 | "cell_type": "code", 1440 | "execution_count": null, 1441 | "id": "30b1ba71", 1442 | "metadata": {}, 1443 | "outputs": [], 1444 | "source": [] 1445 | }, 1446 | { 1447 | "cell_type": "markdown", 1448 | "id": "adc48f02", 1449 | "metadata": {}, 1450 | "source": [ 1451 | "#### 86. Consider a set of p matrices with shape (n,n) and a set of p vectors with shape (n,1). How to compute the sum of of the p matrix products at once? (result has shape (n,1)) (★★★)" 1452 | ] 1453 | }, 1454 | { 1455 | "cell_type": "code", 1456 | "execution_count": null, 1457 | "id": "ec798e90", 1458 | "metadata": {}, 1459 | "outputs": [], 1460 | "source": [] 1461 | }, 1462 | { 1463 | "cell_type": "markdown", 1464 | "id": "9efb34b2", 1465 | "metadata": {}, 1466 | "source": [ 1467 | "#### 87. Consider a 16x16 array, how to get the block-sum (block size is 4x4)? (★★★)" 1468 | ] 1469 | }, 1470 | { 1471 | "cell_type": "code", 1472 | "execution_count": null, 1473 | "id": "ea79f40b", 1474 | "metadata": {}, 1475 | "outputs": [], 1476 | "source": [] 1477 | }, 1478 | { 1479 | "cell_type": "markdown", 1480 | "id": "a9368578", 1481 | "metadata": {}, 1482 | "source": [ 1483 | "#### 88. How to implement the Game of Life using numpy arrays? (★★★)" 1484 | ] 1485 | }, 1486 | { 1487 | "cell_type": "code", 1488 | "execution_count": null, 1489 | "id": "315e9f11", 1490 | "metadata": {}, 1491 | "outputs": [], 1492 | "source": [] 1493 | }, 1494 | { 1495 | "cell_type": "markdown", 1496 | "id": "8c598af3", 1497 | "metadata": {}, 1498 | "source": [ 1499 | "#### 89. How to get the n largest values of an array (★★★)" 1500 | ] 1501 | }, 1502 | { 1503 | "cell_type": "code", 1504 | "execution_count": null, 1505 | "id": "cba8d433", 1506 | "metadata": {}, 1507 | "outputs": [], 1508 | "source": [] 1509 | }, 1510 | { 1511 | "cell_type": "markdown", 1512 | "id": "036a3bb0", 1513 | "metadata": {}, 1514 | "source": [ 1515 | "#### 90. Given an arbitrary number of vectors, build the cartesian product (every combination of every item) (★★★)" 1516 | ] 1517 | }, 1518 | { 1519 | "cell_type": "code", 1520 | "execution_count": null, 1521 | "id": "d712972d", 1522 | "metadata": {}, 1523 | "outputs": [], 1524 | "source": [] 1525 | }, 1526 | { 1527 | "cell_type": "markdown", 1528 | "id": "36487aab", 1529 | "metadata": {}, 1530 | "source": [ 1531 | "#### 91. How to create a record array from a regular array? (★★★)" 1532 | ] 1533 | }, 1534 | { 1535 | "cell_type": "code", 1536 | "execution_count": null, 1537 | "id": "9779dbac", 1538 | "metadata": {}, 1539 | "outputs": [], 1540 | "source": [] 1541 | }, 1542 | { 1543 | "cell_type": "markdown", 1544 | "id": "9f178562", 1545 | "metadata": {}, 1546 | "source": [ 1547 | "#### 92. Consider a large vector Z, compute Z to the power of 3 using 3 different methods (★★★)" 1548 | ] 1549 | }, 1550 | { 1551 | "cell_type": "code", 1552 | "execution_count": null, 1553 | "id": "b08000b8", 1554 | "metadata": {}, 1555 | "outputs": [], 1556 | "source": [] 1557 | }, 1558 | { 1559 | "cell_type": "markdown", 1560 | "id": "86acaf60", 1561 | "metadata": {}, 1562 | "source": [ 1563 | "#### 93. Consider two arrays A and B of shape (8,3) and (2,2). How to find rows of A that contain elements of each row of B regardless of the order of the elements in B? (★★★)" 1564 | ] 1565 | }, 1566 | { 1567 | "cell_type": "code", 1568 | "execution_count": null, 1569 | "id": "793b26d4", 1570 | "metadata": {}, 1571 | "outputs": [], 1572 | "source": [] 1573 | }, 1574 | { 1575 | "cell_type": "markdown", 1576 | "id": "c71b9df2", 1577 | "metadata": {}, 1578 | "source": [ 1579 | "#### 94. Considering a 10x3 matrix, extract rows with unequal values (e.g. [2,2,3]) (★★★)" 1580 | ] 1581 | }, 1582 | { 1583 | "cell_type": "code", 1584 | "execution_count": null, 1585 | "id": "494069f0", 1586 | "metadata": {}, 1587 | "outputs": [], 1588 | "source": [] 1589 | }, 1590 | { 1591 | "cell_type": "markdown", 1592 | "id": "1a8ae7c1", 1593 | "metadata": {}, 1594 | "source": [ 1595 | "#### 95. Convert a vector of ints into a matrix binary representation (★★★)" 1596 | ] 1597 | }, 1598 | { 1599 | "cell_type": "code", 1600 | "execution_count": null, 1601 | "id": "d5e967b8", 1602 | "metadata": {}, 1603 | "outputs": [], 1604 | "source": [] 1605 | }, 1606 | { 1607 | "cell_type": "markdown", 1608 | "id": "edef1198", 1609 | "metadata": {}, 1610 | "source": [ 1611 | "#### 96. Given a two dimensional array, how to extract unique rows? (★★★)" 1612 | ] 1613 | }, 1614 | { 1615 | "cell_type": "code", 1616 | "execution_count": null, 1617 | "id": "fa14799b", 1618 | "metadata": {}, 1619 | "outputs": [], 1620 | "source": [] 1621 | }, 1622 | { 1623 | "cell_type": "markdown", 1624 | "id": "575598a1", 1625 | "metadata": {}, 1626 | "source": [ 1627 | "#### 97. Considering 2 vectors A & B, write the einsum equivalent of inner, outer, sum, and mul function (★★★)" 1628 | ] 1629 | }, 1630 | { 1631 | "cell_type": "code", 1632 | "execution_count": null, 1633 | "id": "9da72f6e", 1634 | "metadata": {}, 1635 | "outputs": [], 1636 | "source": [] 1637 | }, 1638 | { 1639 | "cell_type": "markdown", 1640 | "id": "88e2796a", 1641 | "metadata": {}, 1642 | "source": [ 1643 | "#### 98. Considering a path described by two vectors (X,Y), how to sample it using equidistant samples (★★★)?" 1644 | ] 1645 | }, 1646 | { 1647 | "cell_type": "code", 1648 | "execution_count": null, 1649 | "id": "c17314f8", 1650 | "metadata": {}, 1651 | "outputs": [], 1652 | "source": [] 1653 | }, 1654 | { 1655 | "cell_type": "markdown", 1656 | "id": "8b25b7bd", 1657 | "metadata": {}, 1658 | "source": [ 1659 | "#### 99. Given an integer n and a 2D array X, select from X the rows which can be interpreted as draws from a multinomial distribution with n degrees, i.e., the rows which only contain integers and which sum to n. (★★★)" 1660 | ] 1661 | }, 1662 | { 1663 | "cell_type": "code", 1664 | "execution_count": null, 1665 | "id": "f0dc48cd", 1666 | "metadata": {}, 1667 | "outputs": [], 1668 | "source": [] 1669 | }, 1670 | { 1671 | "cell_type": "markdown", 1672 | "id": "b7068c76", 1673 | "metadata": {}, 1674 | "source": [ 1675 | "#### 100. Compute bootstrapped 95% confidence intervals for the mean of a 1D array X (i.e., resample the elements of an array with replacement N times, compute the mean of each sample, and then compute percentiles over the means). (★★★)" 1676 | ] 1677 | }, 1678 | { 1679 | "cell_type": "code", 1680 | "execution_count": null, 1681 | "id": "3c234201", 1682 | "metadata": {}, 1683 | "outputs": [], 1684 | "source": [] 1685 | } 1686 | ], 1687 | "metadata": { 1688 | "language_info": { 1689 | "name": "python" 1690 | } 1691 | }, 1692 | "nbformat": 4, 1693 | "nbformat_minor": 5 1694 | } 1695 | --------------------------------------------------------------------------------