├── README.md ├── .gitignore ├── SpaceX Plotly.py ├── Week 1 : Spacex-Data wrangling.ipynb ├── Week 2 : EDA SQL Coursera.ipynb └── Machine Learning Prediction lab.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Applied-Data-Science-Capstone -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /SpaceX Plotly.py: -------------------------------------------------------------------------------- 1 | # Import required libraries 2 | import pandas as pd 3 | import dash 4 | import dash_html_components as html 5 | import dash_core_components as dcc 6 | from dash.dependencies import Input, Output 7 | import plotly.express as px 8 | 9 | # Read the airline data into pandas dataframe 10 | spacex_df = pd.read_csv("spacex_launch_dash.csv") 11 | max_payload = spacex_df['Payload Mass (kg)'].max() 12 | min_payload = spacex_df['Payload Mass (kg)'].min() 13 | 14 | # Create a dash application 15 | app = dash.Dash(__name__) 16 | 17 | # Create an app layout 18 | app.layout = html.Div(children=[html.H1('SpaceX Launch Records Dashboard', 19 | style={'textAlign': 'center', 'color': '#503D36', 20 | 'font-size': 40}), 21 | # TASK 1: Add a dropdown list to enable Launch Site selection 22 | # The default select value is for ALL sites 23 | # dcc.Dropdown(id='site-dropdown',...) 24 | dcc.Dropdown(id='site-dropdown', 25 | options=[ 26 | {'label': 'ALL SITES', 'value': 'ALL'}, 27 | {'label': 'CCAFS LC-40', 'value': 'CCAFS LC-40'}, 28 | {'label': 'VAFB SLC-4E', 'value': 'VAFB SLC-4E'}, 29 | {'label': 'KSC LC-39A', 'value': 'KSC LC-39A'}, 30 | {'label': 'CCAFS SLC-40', 'value': 'CCAFS SLC-40'} 31 | ], 32 | value='ALL', 33 | placeholder="Select a Launch Site here", 34 | searchable=True), 35 | html.Br(), 36 | 37 | # TASK 2: Add a pie chart to show the total successful launches count for all sites 38 | # If a specific launch site was selected, show the Success vs. Failed counts for the site 39 | html.Div(dcc.Graph(id='success-pie-chart')), 40 | html.Br(), 41 | 42 | html.P("Payload range (Kg):"), 43 | # TASK 3: Add a slider to select payload range 44 | dcc.RangeSlider(id='payload-slider', 45 | min=0,max=10000,step=1000, 46 | value=[min_payload,max_payload], 47 | marks={0: '0', 2500:'2500',5000:'5000', 48 | 7500:'7500', 10000: '10000'}), 49 | 50 | # TASK 4: Add a scatter chart to show the correlation between payload and launch success 51 | html.Div(dcc.Graph(id='success-payload-scatter-chart')), 52 | ]) 53 | 54 | # TASK 2: 55 | # Add a callback function for `site-dropdown` as input, `success-pie-chart` as output 56 | @app.callback( 57 | Output(component_id='success-pie-chart', component_property='figure'), 58 | Input(component_id='site-dropdown', component_property='value')) 59 | 60 | def build_graph(site_dropdown): 61 | if site_dropdown == 'ALL': 62 | piechart = px.pie(data_frame = spacex_df, names='Launch Site', values='class' ,title='Total Launches for All Sites') 63 | return piechart 64 | else: 65 | #specific_df = spacex_df['Launch Site'] 66 | specific_df=spacex_df.loc[spacex_df['Launch Site'] == site_dropdown] 67 | piechart = px.pie(data_frame = specific_df, names='class',title='Total Launch for a Specific Site') 68 | return piechart 69 | 70 | # TASK 4: 71 | # Add a callback function for `site-dropdown` and `payload-slider` as inputs, `success-payload-scatter-chart` as output 72 | @app.callback( 73 | Output(component_id='success-payload-scatter-chart', component_property='figure'), 74 | [Input(component_id='site-dropdown', component_property='value'), 75 | Input(component_id='payload-slider', component_property='value')]) 76 | 77 | def update_graph(site_dropdown, payload_slider): 78 | if site_dropdown == 'ALL': 79 | filtered_data = spacex_df[(spacex_df['Payload Mass (kg)']>=payload_slider[0]) 80 | &(spacex_df['Payload Mass (kg)']<=payload_slider[1])] 81 | scatterplot = px.scatter(data_frame=filtered_data, x="Payload Mass (kg)", y="class", 82 | color="Booster Version Category") 83 | return scatterplot 84 | else: 85 | specific_df=spacex_df.loc[spacex_df['Launch Site'] == site_dropdown] 86 | filtered_data = specific_df[(specific_df['Payload Mass (kg)']>=payload_slider[0]) 87 | &(spacex_df['Payload Mass (kg)']<=payload_slider[1])] 88 | scatterplot = px.scatter(data_frame=filtered_data, x="Payload Mass (kg)", y="class", 89 | color="Booster Version Category") 90 | return scatterplot 91 | 92 | # Run the app 93 | if __name__ == '__main__': 94 | app.run_server() -------------------------------------------------------------------------------- /Week 1 : Spacex-Data wrangling.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": "
\n \"cognitiveclass.ai\n
\n" 7 | }, 8 | { 9 | "cell_type": "markdown", 10 | "metadata": {}, 11 | "source": "# **Space X Falcon 9 First Stage Landing Prediction**\n" 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": "## Lab 2: Data wrangling\n" 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": "Estimated time needed: **60** minutes\n" 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": "In this lab, we will perform some Exploratory Data Analysis (EDA) to find some patterns in the data and determine what would be the label for training supervised models.\n\nIn the data set, there are several different cases where the booster did not land successfully. Sometimes a landing was attempted but failed due to an accident; for example, True Ocean means the mission outcome was successfully landed to a specific region of the ocean while False Ocean means the mission outcome was unsuccessfully landed to a specific region of the ocean. True RTLS means the mission outcome was successfully landed to a ground pad False RTLS means the mission outcome was unsuccessfully landed to a ground pad.True ASDS means the mission outcome was successfully landed on a drone ship False ASDS means the mission outcome was unsuccessfully landed on a drone ship.\n\nIn this lab we will mainly convert those outcomes into Training Labels with `1` means the booster successfully landed `0` means it was unsuccessful.\n" 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": "Falcon 9 first stage will land successfully\n" 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": "![](https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/api/Images/landing\\_1.gif)\n" 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": "Several examples of an unsuccessful landing are shown here:\n" 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": "![](https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/api/Images/crash.gif)\n" 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": "" 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": "## Objectives\n\nPerform exploratory Data Analysis and determine Training Labels\n\n* Exploratory Data Analysis\n* Determine Training Labels\n" 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": "***\n" 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": "## Import Libraries and Define Auxiliary Functions\n" 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": "We will import the following libraries.\n" 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 1, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": "# Pandas is a software library written for the Python programming language for data manipulation and analysis.\nimport pandas as pd\n#NumPy is a library for the Python programming language, adding support for large, multi-dimensional arrays and matrices, along with a large collection of high-level mathematical functions to operate on these arrays\nimport numpy as np" 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": "### Data Analysis\n" 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": "Load Space X dataset, from last section.\n" 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 2, 93 | "metadata": {}, 94 | "outputs": [ 95 | { 96 | "data": { 97 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
FlightNumberDateBoosterVersionPayloadMassOrbitLaunchSiteOutcomeFlightsGridFinsReusedLegsLandingPadBlockReusedCountSerialLongitudeLatitude
012010-06-04Falcon 96104.959412LEOCCAFS SLC 40None None1FalseFalseFalseNaN1.00B0003-80.57736628.561857
122012-05-22Falcon 9525.000000LEOCCAFS SLC 40None None1FalseFalseFalseNaN1.00B0005-80.57736628.561857
232013-03-01Falcon 9677.000000ISSCCAFS SLC 40None None1FalseFalseFalseNaN1.00B0007-80.57736628.561857
342013-09-29Falcon 9500.000000POVAFB SLC 4EFalse Ocean1FalseFalseFalseNaN1.00B1003-120.61082934.632093
452013-12-03Falcon 93170.000000GTOCCAFS SLC 40None None1FalseFalseFalseNaN1.00B1004-80.57736628.561857
562014-01-06Falcon 93325.000000GTOCCAFS SLC 40None None1FalseFalseFalseNaN1.00B1005-80.57736628.561857
672014-04-18Falcon 92296.000000ISSCCAFS SLC 40True Ocean1FalseFalseTrueNaN1.00B1006-80.57736628.561857
782014-07-14Falcon 91316.000000LEOCCAFS SLC 40True Ocean1FalseFalseTrueNaN1.00B1007-80.57736628.561857
892014-08-05Falcon 94535.000000GTOCCAFS SLC 40None None1FalseFalseFalseNaN1.00B1008-80.57736628.561857
9102014-09-07Falcon 94428.000000GTOCCAFS SLC 40None None1FalseFalseFalseNaN1.00B1011-80.57736628.561857
\n
", 98 | "text/plain": " FlightNumber Date BoosterVersion PayloadMass Orbit LaunchSite \\\n0 1 2010-06-04 Falcon 9 6104.959412 LEO CCAFS SLC 40 \n1 2 2012-05-22 Falcon 9 525.000000 LEO CCAFS SLC 40 \n2 3 2013-03-01 Falcon 9 677.000000 ISS CCAFS SLC 40 \n3 4 2013-09-29 Falcon 9 500.000000 PO VAFB SLC 4E \n4 5 2013-12-03 Falcon 9 3170.000000 GTO CCAFS SLC 40 \n5 6 2014-01-06 Falcon 9 3325.000000 GTO CCAFS SLC 40 \n6 7 2014-04-18 Falcon 9 2296.000000 ISS CCAFS SLC 40 \n7 8 2014-07-14 Falcon 9 1316.000000 LEO CCAFS SLC 40 \n8 9 2014-08-05 Falcon 9 4535.000000 GTO CCAFS SLC 40 \n9 10 2014-09-07 Falcon 9 4428.000000 GTO CCAFS SLC 40 \n\n Outcome Flights GridFins Reused Legs LandingPad Block \\\n0 None None 1 False False False NaN 1.0 \n1 None None 1 False False False NaN 1.0 \n2 None None 1 False False False NaN 1.0 \n3 False Ocean 1 False False False NaN 1.0 \n4 None None 1 False False False NaN 1.0 \n5 None None 1 False False False NaN 1.0 \n6 True Ocean 1 False False True NaN 1.0 \n7 True Ocean 1 False False True NaN 1.0 \n8 None None 1 False False False NaN 1.0 \n9 None None 1 False False False NaN 1.0 \n\n ReusedCount Serial Longitude Latitude \n0 0 B0003 -80.577366 28.561857 \n1 0 B0005 -80.577366 28.561857 \n2 0 B0007 -80.577366 28.561857 \n3 0 B1003 -120.610829 34.632093 \n4 0 B1004 -80.577366 28.561857 \n5 0 B1005 -80.577366 28.561857 \n6 0 B1006 -80.577366 28.561857 \n7 0 B1007 -80.577366 28.561857 \n8 0 B1008 -80.577366 28.561857 \n9 0 B1011 -80.577366 28.561857 " 99 | }, 100 | "execution_count": 2, 101 | "metadata": {}, 102 | "output_type": "execute_result" 103 | } 104 | ], 105 | "source": "df=pd.read_csv(\"https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/dataset_part_1.csv\")\ndf.head(10)" 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": "Identify and calculate the percentage of the missing values in each attribute\n" 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 3, 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "data": { 119 | "text/plain": "FlightNumber 0.000\nDate 0.000\nBoosterVersion 0.000\nPayloadMass 0.000\nOrbit 0.000\nLaunchSite 0.000\nOutcome 0.000\nFlights 0.000\nGridFins 0.000\nReused 0.000\nLegs 0.000\nLandingPad 40.625\nBlock 0.000\nReusedCount 0.000\nSerial 0.000\nLongitude 0.000\nLatitude 0.000\ndtype: float64" 120 | }, 121 | "execution_count": 3, 122 | "metadata": {}, 123 | "output_type": "execute_result" 124 | } 125 | ], 126 | "source": "df.isnull().sum()/df.count()*100" 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": "Identify which columns are numerical and categorical:\n" 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 4, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "data": { 140 | "text/plain": "FlightNumber int64\nDate object\nBoosterVersion object\nPayloadMass float64\nOrbit object\nLaunchSite object\nOutcome object\nFlights int64\nGridFins bool\nReused bool\nLegs bool\nLandingPad object\nBlock float64\nReusedCount int64\nSerial object\nLongitude float64\nLatitude float64\ndtype: object" 141 | }, 142 | "execution_count": 4, 143 | "metadata": {}, 144 | "output_type": "execute_result" 145 | } 146 | ], 147 | "source": "df.dtypes" 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": "### TASK 1: Calculate the number of launches on each site\n\nThe data contains several Space X launch facilities: Cape Canaveral Space Launch Complex 40 VAFB SLC 4E , Vandenberg Air Force Base Space Launch Complex 4E (SLC-4E), Kennedy Space Center Launch Complex 39A KSC LC 39A .The location of each Launch Is placed in the column LaunchSite\n" 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": "Next, let's see the number of launches for each site.\n\nUse the method value_counts() on the column LaunchSite to determine the number of launches on each site:\n" 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 5, 162 | "metadata": {}, 163 | "outputs": [ 164 | { 165 | "data": { 166 | "text/plain": "CCAFS SLC 40 55\nKSC LC 39A 22\nVAFB SLC 4E 13\nName: LaunchSite, dtype: int64" 167 | }, 168 | "execution_count": 5, 169 | "metadata": {}, 170 | "output_type": "execute_result" 171 | } 172 | ], 173 | "source": "# Apply value_counts() on column LaunchSite\ndf.LaunchSite.value_counts()" 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": "Each launch aims to an dedicated orbit, and here are some common orbit types:\n" 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": "* LEO: Low Earth orbit (LEO)is an Earth-centred orbit with an altitude of 2,000 km (1,200 mi) or less (approximately one-third of the radius of Earth),\\[1] or with at least 11.25 periods per day (an orbital period of 128 minutes or less) and an eccentricity less than 0.25.\\[2] Most of the manmade objects in outer space are in LEO \\[1].\n\n* VLEO: Very Low Earth Orbits (VLEO) can be defined as the orbits with a mean altitude below 450 km. Operating in these orbits can provide a number of benefits to Earth observation spacecraft as the spacecraft operates closer to the observation\\[2].\n\n* GTO A geosynchronous orbit is a high Earth orbit that allows satellites to match Earth's rotation. Located at 22,236 miles (35,786 kilometers) above Earth's equator, this position is a valuable spot for monitoring weather, communications and surveillance. Because the satellite orbits at the same speed that the Earth is turning, the satellite seems to stay in place over a single longitude, though it may drift north to south,\u201d NASA wrote on its Earth Observatory website \\[3] .\n\n* SSO (or SO): It is a Sun-synchronous orbit also called a heliosynchronous orbit is a nearly polar orbit around a planet, in which the satellite passes over any given point of the planet's surface at the same local mean solar time \\[4] .\n\n* ES-L1 :At the Lagrange points the gravitational forces of the two large bodies cancel out in such a way that a small object placed in orbit there is in equilibrium relative to the center of mass of the large bodies. L1 is one such point between the sun and the earth \\[5] .\n\n* HEO A highly elliptical orbit, is an elliptic orbit with high eccentricity, usually referring to one around Earth \\[6].\n\n* ISS A modular space station (habitable artificial satellite) in low Earth orbit. It is a multinational collaborative project between five participating space agencies: NASA (United States), Roscosmos (Russia), JAXA (Japan), ESA (Europe), and CSA (Canada) \\[7] \n\n* MEO Geocentric orbits ranging in altitude from 2,000 km (1,200 mi) to just below geosynchronous orbit at 35,786 kilometers (22,236 mi). Also known as an intermediate circular orbit. These are \"most commonly at 20,200 kilometers (12,600 mi), or 20,650 kilometers (12,830 mi), with an orbital period of 12 hours \\[8] \n\n* HEO Geocentric orbits above the altitude of geosynchronous orbit (35,786 km or 22,236 mi) \\[9] \n\n* GEO It is a circular geosynchronous orbit 35,786 kilometres (22,236 miles) above Earth's equator and following the direction of Earth's rotation \\[10] \n\n* PO It is one type of satellites in which a satellite passes above or nearly above both poles of the body being orbited (usually a planet such as the Earth \\[11] \n\nsome are shown in the following plot:\n" 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": "![](https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/api/Images/Orbits.png)\n" 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": {}, 193 | "source": "### TASK 2: Calculate the number and occurrence of each orbit\n" 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": "Use the method .value_counts() to determine the number and occurrence of each orbit in the column Orbit\n" 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 6, 203 | "metadata": {}, 204 | "outputs": [ 205 | { 206 | "data": { 207 | "text/plain": "GTO 27\nISS 21\nVLEO 14\nPO 9\nLEO 7\nSSO 5\nMEO 3\nSO 1\nHEO 1\nGEO 1\nES-L1 1\nName: Orbit, dtype: int64" 208 | }, 209 | "execution_count": 6, 210 | "metadata": {}, 211 | "output_type": "execute_result" 212 | } 213 | ], 214 | "source": "# Apply value_counts on Orbit column\ndf.Orbit.value_counts()" 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": {}, 219 | "source": "### TASK 3: Calculate the number and occurence of mission outcome per orbit type\n" 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "metadata": {}, 224 | "source": "Use the method value_counts() to determine the number and occurrence of each orbit in the column Outcome , then assign it to the variable landing_outcomes:\n" 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": 11, 229 | "metadata": {}, 230 | "outputs": [ 231 | { 232 | "data": { 233 | "text/plain": "True ASDS 41\nNone None 19\nTrue RTLS 14\nFalse ASDS 6\nTrue Ocean 5\nFalse Ocean 2\nNone ASDS 2\nFalse RTLS 1\nName: Outcome, dtype: int64" 234 | }, 235 | "execution_count": 11, 236 | "metadata": {}, 237 | "output_type": "execute_result" 238 | } 239 | ], 240 | "source": "# landing_outcomes = values on Outcome column\nlanding_outcomes = df.Outcome.value_counts()\nlanding_outcomes" 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": {}, 245 | "source": "True Ocean means the mission outcome was successfully landed to a specific region of the ocean while False Ocean means the mission outcome was unsuccessfully landed to a specific region of the ocean. True RTLS means the mission outcome was successfully landed to a ground pad False RTLS means the mission outcome was unsuccessfully landed to a ground pad.True ASDS means the mission outcome was successfully landed to a drone ship False ASDS means the mission outcome was unsuccessfully landed to a drone ship. None ASDS and None None these represent a failure to land.\n" 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": 12, 250 | "metadata": {}, 251 | "outputs": [ 252 | { 253 | "name": "stdout", 254 | "output_type": "stream", 255 | "text": "0 True ASDS\n1 None None\n2 True RTLS\n3 False ASDS\n4 True Ocean\n5 False Ocean\n6 None ASDS\n7 False RTLS\n" 256 | } 257 | ], 258 | "source": "for i,outcome in enumerate(landing_outcomes.keys()):\n print(i,outcome)" 259 | }, 260 | { 261 | "cell_type": "markdown", 262 | "metadata": {}, 263 | "source": "We create a set of outcomes where the second stage did not land successfully:\n" 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 13, 268 | "metadata": {}, 269 | "outputs": [ 270 | { 271 | "data": { 272 | "text/plain": "{'False ASDS', 'False Ocean', 'False RTLS', 'None ASDS', 'None None'}" 273 | }, 274 | "execution_count": 13, 275 | "metadata": {}, 276 | "output_type": "execute_result" 277 | } 278 | ], 279 | "source": "bad_outcomes=set(landing_outcomes.keys()[[1,3,5,6,7]])\nbad_outcomes" 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": {}, 284 | "source": "### TASK 4: Create a landing outcome label from Outcome column\n" 285 | }, 286 | { 287 | "cell_type": "markdown", 288 | "metadata": {}, 289 | "source": "Using the Outcome, create a list where the element is zero if the corresponding row in Outcome is in the set bad_outcome; otherwise, it's one. Then assign it to the variable landing_class:\n" 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": null, 294 | "metadata": {}, 295 | "outputs": [], 296 | "source": "# landing_class = 0 if bad_outcome\ndf['Class'] = df['Ou'].apply(lambda x: 'value if condition is met' if x condition else 'value if condition is not met')\n# landing_class = 1 otherwise" 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": "This variable will represent the classification variable that represents the outcome of each launch. If the value is zero, the first stage did not land successfully; one means the first stage landed Successfully\n" 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": null, 306 | "metadata": {}, 307 | "outputs": [], 308 | "source": "df['Class']=landing_class\ndf[['Class']].head(8)" 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": null, 313 | "metadata": {}, 314 | "outputs": [], 315 | "source": "df.head(5)" 316 | }, 317 | { 318 | "cell_type": "markdown", 319 | "metadata": {}, 320 | "source": "We can use the following line of code to determine the success rate:\n" 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": null, 325 | "metadata": {}, 326 | "outputs": [], 327 | "source": "df[\"Class\"].mean()" 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": "We can now export it to a CSV for the next section,but to make the answers consistent, in the next lab we will provide data in a pre-selected date range.\n" 333 | }, 334 | { 335 | "cell_type": "markdown", 336 | "metadata": {}, 337 | "source": "df.to_csv(\"dataset_part\\_2.csv\", index=False)\n" 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": {}, 342 | "source": "## Authors\n" 343 | }, 344 | { 345 | "cell_type": "markdown", 346 | "metadata": {}, 347 | "source": "Joseph Santarcangelo has a PhD in Electrical Engineering, his research focused on using machine learning, signal processing, and computer vision to determine how videos impact human cognition. Joseph has been working for IBM since he completed his PhD.\n" 348 | }, 349 | { 350 | "cell_type": "markdown", 351 | "metadata": {}, 352 | "source": "Nayef Abou Tayoun is a Data Scientist at IBM and pursuing a Master of Management in Artificial intelligence degree at Queen's University.\n" 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": {}, 357 | "source": "## Change Log\n" 358 | }, 359 | { 360 | "cell_type": "markdown", 361 | "metadata": {}, 362 | "source": "| Date (YYYY-MM-DD) | Version | Changed By | Change Description |\n| ----------------- | ------- | ---------- | ----------------------- |\n| 2020-09-20 | 1.0 | Joseph | Modified Multiple Areas |\n| 2020-11-04 | 1.1. | Nayef | updating the input data |\n| 2021-05-026 | 1.1. | Joseph | updating the input data |\n" 363 | }, 364 | { 365 | "cell_type": "markdown", 366 | "metadata": {}, 367 | "source": "Copyright \u00a9 2021 IBM Corporation. All rights reserved.\n" 368 | } 369 | ], 370 | "metadata": { 371 | "kernelspec": { 372 | "display_name": "Python 3.8", 373 | "language": "python", 374 | "name": "python3" 375 | }, 376 | "language_info": { 377 | "codemirror_mode": { 378 | "name": "ipython", 379 | "version": 3 380 | }, 381 | "file_extension": ".py", 382 | "mimetype": "text/x-python", 383 | "name": "python", 384 | "nbconvert_exporter": "python", 385 | "pygments_lexer": "ipython3", 386 | "version": "3.8.10" 387 | } 388 | }, 389 | "nbformat": 4, 390 | "nbformat_minor": 4 391 | } -------------------------------------------------------------------------------- /Week 2 : EDA SQL Coursera.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": "
\n \"cognitiveclass.ai\n
\n\n

Assignment: SQL Notebook for Peer Assignment

\n\nEstimated time needed: **60** minutes.\n\n## Introduction\n\nUsing this Python notebook you will:\n\n1. Understand the Spacex DataSet\n2. Load the dataset into the corresponding table in a Db2 database\n3. Execute SQL queries to answer assignment questions\n" 7 | }, 8 | { 9 | "cell_type": "markdown", 10 | "metadata": {}, 11 | "source": "## Overview of the DataSet\n\nSpaceX has gained worldwide attention for a series of historic milestones.\n\nIt is the only private company ever to return a spacecraft from low-earth orbit, which it first accomplished in December 2010.\nSpaceX advertises Falcon 9 rocket launches on its website with a cost of 62 million dollars wheras other providers cost upward of 165 million dollars each, much of the savings is because Space X can reuse the first stage.\n\nTherefore if we can determine if the first stage will land, we can determine the cost of a launch.\n\nThis information can be used if an alternate company wants to bid against SpaceX for a rocket launch.\n\nThis dataset includes a record for each payload carried during a SpaceX mission into outer space.\n" 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": "### Download the datasets\n\nThis assignment requires you to load the spacex dataset.\n\nIn many cases the dataset to be analyzed is available as a .CSV (comma separated values) file, perhaps on the internet. Click on the link below to download and save the dataset (.CSV file):\n\nSpacex DataSet\n" 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": "### Store the dataset in database table\n\n**it is highly recommended to manually load the table using the database console LOAD tool in DB2**.\n\n\n\nNow open the Db2 console, open the LOAD tool, Select / Drag the .CSV file for the dataset, Next create a New Table, and then follow the steps on-screen instructions to load the data. Name the new table as follows:\n\n**SPACEXDATASET**\n\n**Follow these steps while using old DB2 UI which is having Open Console Screen**\n\n**Note:While loading Spacex dataset, ensure that detect datatypes is disabled. Later click on the pencil icon(edit option).**\n\n1. Change the Date Format by manually typing DD-MM-YYYY and timestamp format as DD-MM-YYYY HH\\:MM:SS\n\n2. Change the PAYLOAD_MASS\\_\\_KG\\_ datatype to INTEGER.\n\n\n" 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": "**Changes to be considered when having DB2 instance with the new UI having Go to UI screen**\n\n* Refer to this insruction in this link for viewing the new Go to UI screen.\n\n* Later click on **Data link(below SQL)** in the Go to UI screen and click on **Load Data** tab.\n\n* Later browse for the downloaded spacex file.\n\n\n\n* Once done select the schema andload the file.\n\n \n" 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 36, 31 | "metadata": {}, 32 | "outputs": [ 33 | { 34 | "name": "stdout", 35 | "output_type": "stream", 36 | "text": "Requirement already satisfied: sqlalchemy==1.3.9 in /opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages (1.3.9)\nRequirement already satisfied: ibm_db_sa in /opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages (0.3.6)\nRequirement already satisfied: sqlalchemy>=0.7.3 in /opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages (from ibm_db_sa) (1.3.9)\nRequirement already satisfied: ibm-db>=2.0.0 in /opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages (from ibm_db_sa) (3.0.4)\nRequirement already satisfied: ipython-sql in /opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages (0.4.0)\nRequirement already satisfied: sqlalchemy>=0.6.7 in /opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages (from ipython-sql) (1.3.9)\nRequirement already satisfied: ipython>=1.0 in /opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages (from ipython-sql) (7.22.0)\nRequirement already satisfied: prettytable<1 in /opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages (from ipython-sql) (0.7.2)\nRequirement already satisfied: ipython-genutils>=0.1.0 in /opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages (from ipython-sql) (0.2.0)\nRequirement already satisfied: six in /opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages (from ipython-sql) (1.15.0)\nRequirement already satisfied: sqlparse in /opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages (from ipython-sql) (0.4.1)\nRequirement already satisfied: traitlets>=4.2 in /opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages (from ipython>=1.0->ipython-sql) (5.0.5)\nRequirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages (from ipython>=1.0->ipython-sql) (3.0.17)\nRequirement already satisfied: backcall in /opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages (from ipython>=1.0->ipython-sql) (0.2.0)\nRequirement already satisfied: decorator in /opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages (from ipython>=1.0->ipython-sql) (5.0.9)\nRequirement already satisfied: pygments in /opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages (from ipython>=1.0->ipython-sql) (2.9.0)\nRequirement already satisfied: setuptools>=18.5 in /opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages (from ipython>=1.0->ipython-sql) (52.0.0.post20210125)\nRequirement already satisfied: jedi>=0.16 in /opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages (from ipython>=1.0->ipython-sql) (0.17.2)\nRequirement already satisfied: pickleshare in /opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages (from ipython>=1.0->ipython-sql) (0.7.5)\nRequirement already satisfied: pexpect>4.3 in /opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages (from ipython>=1.0->ipython-sql) (4.8.0)\nRequirement already satisfied: parso<0.8.0,>=0.7.0 in /opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages (from jedi>=0.16->ipython>=1.0->ipython-sql) (0.7.0)\nRequirement already satisfied: ptyprocess>=0.5 in /opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages (from pexpect>4.3->ipython>=1.0->ipython-sql) (0.7.0)\nRequirement already satisfied: wcwidth in /opt/conda/envs/Python-3.8-main/lib/python3.8/site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=1.0->ipython-sql) (0.2.5)\n" 37 | } 38 | ], 39 | "source": "!pip install sqlalchemy==1.3.9\n!pip install ibm_db_sa\n!pip install ipython-sql" 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": "### Connect to the database\n\nLet us first load the SQL extension and establish a connection with the database\n" 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 37, 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "name": "stdout", 53 | "output_type": "stream", 54 | "text": "The sql extension is already loaded. To reload it, use:\n %reload_ext sql\n" 55 | } 56 | ], 57 | "source": "%load_ext sql" 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": "**DB2 magic in case of old UI service credentials.**\n\nIn the next cell enter your db2 connection string. Recall you created Service Credentials for your Db2 instance before. From the **uri** field of your Db2 service credentials copy everything after db2:// (except the double quote at the end) and paste it in the cell below after ibm_db_sa://\n\n\n\nin the following format\n\n**%sql ibm_db_sa://my-username:my-password@my-hostname:my-port/my-db-name**\n\n**DB2 magic in case of new UI service credentials.**\n\n \n\n* Use the following format.\n\n* Add security=SSL at the end\n\n**%sql ibm_db_sa://my-username:my-password@my-hostname:my-port/my-db-name?security=SSL**\n" 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": "" 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": "" 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 38, 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "name": "stdout", 85 | "output_type": "stream", 86 | "text": "DB2/LINUXX8664\n" 87 | } 88 | ], 89 | "source": "%sql ibm_db_sa://sdk38546:cwn1%40l380qx5qb3k@dashdb-txn-sbox-yp-lon02-07.services.eu-gb.bluemix.net:50000/BLUDB" 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": "## Tasks\n\nNow write and execute SQL queries to solve the assignment tasks.\n\n### Task 1\n\n##### Display the names of the unique launch sites in the space mission\n" 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 39, 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "name": "stdout", 103 | "output_type": "stream", 104 | "text": " * ibm_db_sa://sdk38546:***@dashdb-txn-sbox-yp-lon02-07.services.eu-gb.bluemix.net:50000/BLUDB\nDone.\n" 105 | }, 106 | { 107 | "data": { 108 | "text/html": "\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
launch_site
CCAFS LC-40
CCAFS SLC-40
KSC LC-39A
VAFB SLC-4E
", 109 | "text/plain": "[('CCAFS LC-40',), ('CCAFS SLC-40',), ('KSC LC-39A',), ('VAFB SLC-4E',)]" 110 | }, 111 | "execution_count": 39, 112 | "metadata": {}, 113 | "output_type": "execute_result" 114 | } 115 | ], 116 | "source": "%sql select distinct(LAUNCH_SITE) from SPACEXTBL" 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": "### Task 2\n\n##### Display 5 records where launch sites begin with the string 'CCA'\n" 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 40, 126 | "metadata": {}, 127 | "outputs": [ 128 | { 129 | "name": "stdout", 130 | "output_type": "stream", 131 | "text": " * ibm_db_sa://sdk38546:***@dashdb-txn-sbox-yp-lon02-07.services.eu-gb.bluemix.net:50000/BLUDB\nDone.\n" 132 | }, 133 | { 134 | "data": { 135 | "text/html": "\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
DATEtime__utc_booster_versionlaunch_sitepayloadpayload_mass__kg_orbitcustomermission_outcomelanding__outcome
2010-06-0418:45:00F9 v1.0 B0003CCAFS LC-40Dragon Spacecraft Qualification Unit0LEOSpaceXSuccessFailure (parachute)
2010-12-0815:43:00F9 v1.0 B0004CCAFS LC-40Dragon demo flight C1, two CubeSats, barrel of Brouere cheese0LEO (ISS)NASA (COTS) NROSuccessFailure (parachute)
2012-05-2207:44:00F9 v1.0 B0005CCAFS LC-40Dragon demo flight C2525LEO (ISS)NASA (COTS)SuccessNo attempt
2012-10-0800:35:00F9 v1.0 B0006CCAFS LC-40SpaceX CRS-1500LEO (ISS)NASA (CRS)SuccessNo attempt
2013-03-0115:10:00F9 v1.0 B0007CCAFS LC-40SpaceX CRS-2677LEO (ISS)NASA (CRS)SuccessNo attempt
", 136 | "text/plain": "[(datetime.date(2010, 6, 4), datetime.time(18, 45), 'F9 v1.0 B0003', 'CCAFS LC-40', 'Dragon Spacecraft Qualification Unit', 0, 'LEO', 'SpaceX', 'Success', 'Failure (parachute)'),\n (datetime.date(2010, 12, 8), datetime.time(15, 43), 'F9 v1.0 B0004', 'CCAFS LC-40', 'Dragon demo flight C1, two CubeSats, barrel of Brouere cheese', 0, 'LEO (ISS)', 'NASA (COTS) NRO', 'Success', 'Failure (parachute)'),\n (datetime.date(2012, 5, 22), datetime.time(7, 44), 'F9 v1.0 B0005', 'CCAFS LC-40', 'Dragon demo flight C2', 525, 'LEO (ISS)', 'NASA (COTS)', 'Success', 'No attempt'),\n (datetime.date(2012, 10, 8), datetime.time(0, 35), 'F9 v1.0 B0006', 'CCAFS LC-40', 'SpaceX CRS-1', 500, 'LEO (ISS)', 'NASA (CRS)', 'Success', 'No attempt'),\n (datetime.date(2013, 3, 1), datetime.time(15, 10), 'F9 v1.0 B0007', 'CCAFS LC-40', 'SpaceX CRS-2', 677, 'LEO (ISS)', 'NASA (CRS)', 'Success', 'No attempt')]" 137 | }, 138 | "execution_count": 40, 139 | "metadata": {}, 140 | "output_type": "execute_result" 141 | } 142 | ], 143 | "source": "%sql select * from SPACEXTBL where LAUNCH_SITE like 'CCA%' limit 5" 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": "### Task 3\n\n##### Display the total payload mass carried by boosters launched by NASA (CRS)\n" 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 41, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": " * ibm_db_sa://sdk38546:***@dashdb-txn-sbox-yp-lon02-07.services.eu-gb.bluemix.net:50000/BLUDB\nDone.\n" 159 | }, 160 | { 161 | "data": { 162 | "text/html": "\n \n \n \n \n \n \n
1
45596
", 163 | "text/plain": "[(45596,)]" 164 | }, 165 | "execution_count": 41, 166 | "metadata": {}, 167 | "output_type": "execute_result" 168 | } 169 | ], 170 | "source": "%sql select sum(PAYLOAD_MASS__KG_) from SPACEXTBL where CUSTOMER = 'NASA (CRS)'" 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": "### Task 4\n\n##### Display average payload mass carried by booster version F9 v1.1\n" 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 42, 180 | "metadata": {}, 181 | "outputs": [ 182 | { 183 | "name": "stdout", 184 | "output_type": "stream", 185 | "text": " * ibm_db_sa://sdk38546:***@dashdb-txn-sbox-yp-lon02-07.services.eu-gb.bluemix.net:50000/BLUDB\nDone.\n" 186 | }, 187 | { 188 | "data": { 189 | "text/html": "\n \n \n \n \n \n \n
1
2928.400000
", 190 | "text/plain": "[(Decimal('2928.400000'),)]" 191 | }, 192 | "execution_count": 42, 193 | "metadata": {}, 194 | "output_type": "execute_result" 195 | } 196 | ], 197 | "source": "%sql select avg(PAYLOAD_MASS__KG_) from SPACEXTBL where BOOSTER_VERSION = 'F9 v1.1'" 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": "### Task 5\n\n##### List the date when the first succesful landing outcome in ground pad was acheived.\n\n*Hint:Use min function*\n" 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 43, 207 | "metadata": {}, 208 | "outputs": [ 209 | { 210 | "name": "stdout", 211 | "output_type": "stream", 212 | "text": " * ibm_db_sa://sdk38546:***@dashdb-txn-sbox-yp-lon02-07.services.eu-gb.bluemix.net:50000/BLUDB\nDone.\n" 213 | }, 214 | { 215 | "data": { 216 | "text/html": "\n \n \n \n \n \n \n
1
2015-12-22
", 217 | "text/plain": "[(datetime.date(2015, 12, 22),)]" 218 | }, 219 | "execution_count": 43, 220 | "metadata": {}, 221 | "output_type": "execute_result" 222 | } 223 | ], 224 | "source": "%sql select min(DATE) from SPACEXTBL where Landing__Outcome = 'Success (ground pad)'" 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": {}, 229 | "source": "### Task 6\n\n##### List the names of the boosters which have success in drone ship and have payload mass greater than 4000 but less than 6000\n" 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 44, 234 | "metadata": {}, 235 | "outputs": [ 236 | { 237 | "name": "stdout", 238 | "output_type": "stream", 239 | "text": " * ibm_db_sa://sdk38546:***@dashdb-txn-sbox-yp-lon02-07.services.eu-gb.bluemix.net:50000/BLUDB\nDone.\n" 240 | }, 241 | { 242 | "data": { 243 | "text/html": "\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
booster_version
F9 FT B1022
F9 FT B1026
F9 FT B1021.2
F9 FT B1031.2
", 244 | "text/plain": "[('F9 FT B1022',), ('F9 FT B1026',), ('F9 FT B1021.2',), ('F9 FT B1031.2',)]" 245 | }, 246 | "execution_count": 44, 247 | "metadata": {}, 248 | "output_type": "execute_result" 249 | } 250 | ], 251 | "source": "%sql select BOOSTER_VERSION from SPACEXTBL where Landing__Outcome = 'Success (drone ship)' and PAYLOAD_MASS__KG_ > 4000 and PAYLOAD_MASS__KG_ < 6000" 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "metadata": {}, 256 | "source": "### Task 7\n\n##### List the total number of successful and failure mission outcomes\n" 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 45, 261 | "metadata": {}, 262 | "outputs": [ 263 | { 264 | "name": "stdout", 265 | "output_type": "stream", 266 | "text": " * ibm_db_sa://sdk38546:***@dashdb-txn-sbox-yp-lon02-07.services.eu-gb.bluemix.net:50000/BLUDB\nDone.\n" 267 | }, 268 | { 269 | "data": { 270 | "text/html": "\n \n \n \n \n \n \n
1
100
", 271 | "text/plain": "[(Decimal('100'),)]" 272 | }, 273 | "execution_count": 45, 274 | "metadata": {}, 275 | "output_type": "execute_result" 276 | } 277 | ], 278 | "source": "%sql select count(MISSION_OUTCOME) from SPACEXTBL where MISSION_OUTCOME = 'Success' or MISSION_OUTCOME = 'Failure (in flight)'" 279 | }, 280 | { 281 | "cell_type": "markdown", 282 | "metadata": {}, 283 | "source": "### Task 8\n\n##### List the names of the booster_versions which have carried the maximum payload mass. Use a subquery\n" 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 46, 288 | "metadata": {}, 289 | "outputs": [ 290 | { 291 | "name": "stdout", 292 | "output_type": "stream", 293 | "text": " * ibm_db_sa://sdk38546:***@dashdb-txn-sbox-yp-lon02-07.services.eu-gb.bluemix.net:50000/BLUDB\nDone.\n" 294 | }, 295 | { 296 | "data": { 297 | "text/html": "\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
booster_version
F9 B5 B1048.4
F9 B5 B1049.4
F9 B5 B1051.3
F9 B5 B1056.4
F9 B5 B1048.5
F9 B5 B1051.4
F9 B5 B1049.5
F9 B5 B1060.2
F9 B5 B1058.3
F9 B5 B1051.6
F9 B5 B1060.3
F9 B5 B1049.7
", 298 | "text/plain": "[('F9 B5 B1048.4',),\n ('F9 B5 B1049.4',),\n ('F9 B5 B1051.3',),\n ('F9 B5 B1056.4',),\n ('F9 B5 B1048.5',),\n ('F9 B5 B1051.4',),\n ('F9 B5 B1049.5',),\n ('F9 B5 B1060.2',),\n ('F9 B5 B1058.3',),\n ('F9 B5 B1051.6',),\n ('F9 B5 B1060.3',),\n ('F9 B5 B1049.7',)]" 299 | }, 300 | "execution_count": 46, 301 | "metadata": {}, 302 | "output_type": "execute_result" 303 | } 304 | ], 305 | "source": "%sql select BOOSTER_VERSION from SPACEXTBL where PAYLOAD_MASS__KG_ = (select max(PAYLOAD_MASS__KG_) from SPACEXTBL)" 306 | }, 307 | { 308 | "cell_type": "markdown", 309 | "metadata": {}, 310 | "source": "### Task 9\n\n##### List the records which will display the month names, failure landing_outcomes in drone ship ,booster versions, launch_site for the months in year 2015\n" 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": 47, 315 | "metadata": {}, 316 | "outputs": [ 317 | { 318 | "name": "stdout", 319 | "output_type": "stream", 320 | "text": " * ibm_db_sa://sdk38546:***@dashdb-txn-sbox-yp-lon02-07.services.eu-gb.bluemix.net:50000/BLUDB\n(ibm_db_dbi.ProgrammingError) ibm_db_dbi::ProgrammingError: SQLNumResultCols failed: [IBM][CLI Driver][DB2/LINUXX8664] SQL0104N An unexpected token \"EXTRACT(MONTH, select\" was found following \"SELECT \". Expected tokens may include: \"\". SQLSTATE=42601 SQLCODE=-104\n[SQL: SELECT EXTRACT(MONTH, select min(DATE) from SPACEXTBL where Landing__Outcome = 'Success (ground pad)' )]\n(Background on this error at: http://sqlalche.me/e/f405)\n" 321 | } 322 | ], 323 | "source": "%sql SELECT EXTRACT(MONTH, select min(DATE) from SPACEXTBL where Landing__Outcome = 'Success (ground pad)')" 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "metadata": {}, 328 | "source": "### Task 10\n\n##### Rank the count of successful landing_outcomes between the date 2010-06-04 and 2017-03-20 in descending order.\n" 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 48, 333 | "metadata": {}, 334 | "outputs": [ 335 | { 336 | "name": "stdout", 337 | "output_type": "stream", 338 | "text": " * ibm_db_sa://sdk38546:***@dashdb-txn-sbox-yp-lon02-07.services.eu-gb.bluemix.net:50000/BLUDB\nDone.\n" 339 | }, 340 | { 341 | "data": { 342 | "text/html": "\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
DATEtime__utc_booster_versionlaunch_sitepayloadpayload_mass__kg_orbitcustomermission_outcomelanding__outcome
2017-02-1914:39:00F9 FT B1031.1KSC LC-39ASpaceX CRS-102490LEO (ISS)NASA (CRS)SuccessSuccess (ground pad)
2017-01-1417:54:00F9 FT B1029.1VAFB SLC-4EIridium NEXT 19600Polar LEOIridium CommunicationsSuccessSuccess (drone ship)
2016-08-1405:26:00F9 FT B1026CCAFS LC-40JCSAT-164600GTOSKY Perfect JSAT GroupSuccessSuccess (drone ship)
2016-07-1804:45:00F9 FT B1025.1CCAFS LC-40SpaceX CRS-92257LEO (ISS)NASA (CRS)SuccessSuccess (ground pad)
2016-05-2721:39:00F9 FT B1023.1CCAFS LC-40Thaicom 83100GTOThaicomSuccessSuccess (drone ship)
2016-05-0605:21:00F9 FT B1022CCAFS LC-40JCSAT-144696GTOSKY Perfect JSAT GroupSuccessSuccess (drone ship)
2016-04-0820:43:00F9 FT B1021.1CCAFS LC-40SpaceX CRS-83136LEO (ISS)NASA (CRS)SuccessSuccess (drone ship)
2015-12-2201:29:00F9 FT B1019CCAFS LC-40OG2 Mission 2 11 Orbcomm-OG2 satellites2034LEOOrbcommSuccessSuccess (ground pad)
", 343 | "text/plain": "[(datetime.date(2017, 2, 19), datetime.time(14, 39), 'F9 FT B1031.1', 'KSC LC-39A', 'SpaceX CRS-10', 2490, 'LEO (ISS)', 'NASA (CRS)', 'Success', 'Success (ground pad)'),\n (datetime.date(2017, 1, 14), datetime.time(17, 54), 'F9 FT B1029.1', 'VAFB SLC-4E', 'Iridium NEXT 1', 9600, 'Polar LEO', 'Iridium Communications', 'Success', 'Success (drone ship)'),\n (datetime.date(2016, 8, 14), datetime.time(5, 26), 'F9 FT B1026', 'CCAFS LC-40', 'JCSAT-16', 4600, 'GTO', 'SKY Perfect JSAT Group', 'Success', 'Success (drone ship)'),\n (datetime.date(2016, 7, 18), datetime.time(4, 45), 'F9 FT B1025.1', 'CCAFS LC-40', 'SpaceX CRS-9', 2257, 'LEO (ISS)', 'NASA (CRS)', 'Success', 'Success (ground pad)'),\n (datetime.date(2016, 5, 27), datetime.time(21, 39), 'F9 FT B1023.1', 'CCAFS LC-40', 'Thaicom 8', 3100, 'GTO', 'Thaicom', 'Success', 'Success (drone ship)'),\n (datetime.date(2016, 5, 6), datetime.time(5, 21), 'F9 FT B1022', 'CCAFS LC-40', 'JCSAT-14', 4696, 'GTO', 'SKY Perfect JSAT Group', 'Success', 'Success (drone ship)'),\n (datetime.date(2016, 4, 8), datetime.time(20, 43), 'F9 FT B1021.1', 'CCAFS LC-40', 'SpaceX CRS-8', 3136, 'LEO (ISS)', 'NASA (CRS)', 'Success', 'Success (drone ship)'),\n (datetime.date(2015, 12, 22), datetime.time(1, 29), 'F9 FT B1019', 'CCAFS LC-40', 'OG2 Mission 2 11 Orbcomm-OG2 satellites', 2034, 'LEO', 'Orbcomm', 'Success', 'Success (ground pad)')]" 344 | }, 345 | "execution_count": 48, 346 | "metadata": {}, 347 | "output_type": "execute_result" 348 | } 349 | ], 350 | "source": "%sql select * from SPACEXTBL where Landing__Outcome like 'Success%' and (DATE between '2010-06-04' and '2017-03-20') order by date desc" 351 | }, 352 | { 353 | "cell_type": "markdown", 354 | "metadata": {}, 355 | "source": "### Reference Links\n\n* Hands-on Lab : String Patterns, Sorting and Grouping\n\n* Hands-on Lab: Built-in functions\n\n* Hands-on Lab : Sub-queries and Nested SELECT Statements\n\n* Hands-on Tutorial: Accessing Databases with SQL magic\n\n* Hands-on Lab: Analyzing a real World Data Set\n" 356 | }, 357 | { 358 | "cell_type": "markdown", 359 | "metadata": {}, 360 | "source": "## Author(s)\n\n

Lakshmi Holla

\n" 361 | }, 362 | { 363 | "cell_type": "markdown", 364 | "metadata": {}, 365 | "source": "## Other Contributors\n\n

Rav Ahuja

\n" 366 | }, 367 | { 368 | "cell_type": "markdown", 369 | "metadata": {}, 370 | "source": "## Change log\n\n| Date | Version | Changed by | Change Description |\n|------|--------|--------|---------|\n| 2021-07-09 | 0.2 |Lakshmi Holla | Changes made in magic sql|\n| 2021-05-20 | 0.1 |Lakshmi Holla | Created Initial Version |\n" 371 | }, 372 | { 373 | "cell_type": "markdown", 374 | "metadata": {}, 375 | "source": "##

\u00a9 IBM Corporation 2021. All rights reserved.

\n" 376 | } 377 | ], 378 | "metadata": { 379 | "kernelspec": { 380 | "display_name": "Python 3.8", 381 | "language": "python", 382 | "name": "python3" 383 | }, 384 | "language_info": { 385 | "codemirror_mode": { 386 | "name": "ipython", 387 | "version": 3 388 | }, 389 | "file_extension": ".py", 390 | "mimetype": "text/x-python", 391 | "name": "python", 392 | "nbconvert_exporter": "python", 393 | "pygments_lexer": "ipython3", 394 | "version": "3.8.10" 395 | } 396 | }, 397 | "nbformat": 4, 398 | "nbformat_minor": 4 399 | } -------------------------------------------------------------------------------- /Machine Learning Prediction lab.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": "
\n \"cognitiveclass.ai\n
\n" 7 | }, 8 | { 9 | "cell_type": "markdown", 10 | "metadata": {}, 11 | "source": "# **Space X Falcon 9 First Stage Landing Prediction**\n" 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": "## Assignment: Machine Learning Prediction\n" 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": "Estimated time needed: **60** minutes\n" 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": "Space X advertises Falcon 9 rocket launches on its website with a cost of 62 million dollars; other providers cost upward of 165 million dollars each, much of the savings is because Space X can reuse the first stage. Therefore if we can determine if the first stage will land, we can determine the cost of a launch. This information can be used if an alternate company wants to bid against space X for a rocket launch. In this lab, you will create a machine learning pipeline to predict if the first stage will land given the data from the preceding labs.\n" 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": "![](https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/api/Images/landing\\_1.gif)\n" 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": "Several examples of an unsuccessful landing are shown here:\n" 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": "![](https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/api/Images/crash.gif)\n" 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": "Most unsuccessful landings are planed. Space X; performs a controlled landing in the oceans.\n" 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": "## Objectives\n" 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": "Perform exploratory Data Analysis and determine Training Labels\n\n* create a column for the class\n* Standardize the data\n* Split into training data and test data\n\n\\-Find best Hyperparameter for SVM, Classification Trees and Logistic Regression\n\n* Find the method performs best using test data\n" 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": "" 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": "***\n" 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": "## Import Libraries and Define Auxiliary Functions\n" 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": "We will import the following libraries for the lab\n" 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 1, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": "# Pandas is a software library written for the Python programming language for data manipulation and analysis.\nimport pandas as pd\n# NumPy is a library for the Python programming language, adding support for large, multi-dimensional arrays and matrices, along with a large collection of high-level mathematical functions to operate on these arrays\nimport numpy as np\n# Matplotlib is a plotting library for python and pyplot gives us a MatLab like plotting framework. We will use this in our plotter function to plot data.\nimport matplotlib.pyplot as plt\n#Seaborn is a Python data visualization library based on matplotlib. It provides a high-level interface for drawing attractive and informative statistical graphics\nimport seaborn as sns\n# Preprocessing allows us to standarsize our data\nfrom sklearn import preprocessing\n# Allows us to split our data into training and testing data\nfrom sklearn.model_selection import train_test_split\n# Allows us to test parameters of classification algorithms and find the best one\nfrom sklearn.model_selection import GridSearchCV\n# Logistic Regression classification algorithm\nfrom sklearn.linear_model import LogisticRegression\n# Support Vector Machine classification algorithm\nfrom sklearn.svm import SVC\n# Decision Tree classification algorithm\nfrom sklearn.tree import DecisionTreeClassifier\n# K Nearest Neighbors classification algorithm\nfrom sklearn.neighbors import KNeighborsClassifier" 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": "This function is to plot the confusion matrix.\n" 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 2, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": "def plot_confusion_matrix(y,y_predict):\n \"this function plots the confusion matrix\"\n from sklearn.metrics import confusion_matrix\n\n cm = confusion_matrix(y, y_predict)\n ax= plt.subplot()\n sns.heatmap(cm, annot=True, ax = ax); #annot=True to annotate cells\n ax.set_xlabel('Predicted labels')\n ax.set_ylabel('True labels')\n ax.set_title('Confusion Matrix'); \n ax.xaxis.set_ticklabels(['did not land', 'land']); ax.yaxis.set_ticklabels(['did not land', 'landed'])" 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": "## Load the dataframe\n" 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": "Load the data\n" 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 3, 110 | "metadata": {}, 111 | "outputs": [ 112 | { 113 | "data": { 114 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
FlightNumberDateBoosterVersionPayloadMassOrbitLaunchSiteOutcomeFlightsGridFinsReusedLegsLandingPadBlockReusedCountSerialLongitudeLatitudeClass
012010-06-04Falcon 96104.959412LEOCCAFS SLC 40None None1FalseFalseFalseNaN1.00B0003-80.57736628.5618570
122012-05-22Falcon 9525.000000LEOCCAFS SLC 40None None1FalseFalseFalseNaN1.00B0005-80.57736628.5618570
232013-03-01Falcon 9677.000000ISSCCAFS SLC 40None None1FalseFalseFalseNaN1.00B0007-80.57736628.5618570
342013-09-29Falcon 9500.000000POVAFB SLC 4EFalse Ocean1FalseFalseFalseNaN1.00B1003-120.61082934.6320930
452013-12-03Falcon 93170.000000GTOCCAFS SLC 40None None1FalseFalseFalseNaN1.00B1004-80.57736628.5618570
\n
", 115 | "text/plain": " FlightNumber Date BoosterVersion PayloadMass Orbit LaunchSite \\\n0 1 2010-06-04 Falcon 9 6104.959412 LEO CCAFS SLC 40 \n1 2 2012-05-22 Falcon 9 525.000000 LEO CCAFS SLC 40 \n2 3 2013-03-01 Falcon 9 677.000000 ISS CCAFS SLC 40 \n3 4 2013-09-29 Falcon 9 500.000000 PO VAFB SLC 4E \n4 5 2013-12-03 Falcon 9 3170.000000 GTO CCAFS SLC 40 \n\n Outcome Flights GridFins Reused Legs LandingPad Block \\\n0 None None 1 False False False NaN 1.0 \n1 None None 1 False False False NaN 1.0 \n2 None None 1 False False False NaN 1.0 \n3 False Ocean 1 False False False NaN 1.0 \n4 None None 1 False False False NaN 1.0 \n\n ReusedCount Serial Longitude Latitude Class \n0 0 B0003 -80.577366 28.561857 0 \n1 0 B0005 -80.577366 28.561857 0 \n2 0 B0007 -80.577366 28.561857 0 \n3 0 B1003 -120.610829 34.632093 0 \n4 0 B1004 -80.577366 28.561857 0 " 116 | }, 117 | "execution_count": 3, 118 | "metadata": {}, 119 | "output_type": "execute_result" 120 | } 121 | ], 122 | "source": "data = pd.read_csv(\"https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/dataset_part_2.csv\")\n\n# If you were unable to complete the previous lab correctly you can uncomment and load this csv\n\n# data = pd.read_csv('https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/api/dataset_part_2.csv')\n\ndata.head()" 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 4, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "data": { 131 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
FlightNumberPayloadMassFlightsBlockReusedCountOrbit_ES-L1Orbit_GEOOrbit_GTOOrbit_HEOOrbit_ISS...Serial_B1058Serial_B1059Serial_B1060Serial_B1062GridFins_FalseGridFins_TrueReused_FalseReused_TrueLegs_FalseLegs_True
01.06104.9594121.01.00.00.00.00.00.00.0...0.00.00.00.01.00.01.00.01.00.0
12.0525.0000001.01.00.00.00.00.00.00.0...0.00.00.00.01.00.01.00.01.00.0
23.0677.0000001.01.00.00.00.00.00.01.0...0.00.00.00.01.00.01.00.01.00.0
34.0500.0000001.01.00.00.00.00.00.00.0...0.00.00.00.01.00.01.00.01.00.0
45.03170.0000001.01.00.00.00.01.00.00.0...0.00.00.00.01.00.01.00.01.00.0
..................................................................
8586.015400.0000002.05.02.00.00.00.00.00.0...0.00.01.00.00.01.00.01.00.01.0
8687.015400.0000003.05.02.00.00.00.00.00.0...1.00.00.00.00.01.00.01.00.01.0
8788.015400.0000006.05.05.00.00.00.00.00.0...0.00.00.00.00.01.00.01.00.01.0
8889.015400.0000003.05.02.00.00.00.00.00.0...0.00.01.00.00.01.00.01.00.01.0
8990.03681.0000001.05.00.00.00.00.00.00.0...0.00.00.01.00.01.01.00.00.01.0
\n

90 rows \u00d7 83 columns

\n
", 132 | "text/plain": " FlightNumber PayloadMass Flights Block ReusedCount Orbit_ES-L1 \\\n0 1.0 6104.959412 1.0 1.0 0.0 0.0 \n1 2.0 525.000000 1.0 1.0 0.0 0.0 \n2 3.0 677.000000 1.0 1.0 0.0 0.0 \n3 4.0 500.000000 1.0 1.0 0.0 0.0 \n4 5.0 3170.000000 1.0 1.0 0.0 0.0 \n.. ... ... ... ... ... ... \n85 86.0 15400.000000 2.0 5.0 2.0 0.0 \n86 87.0 15400.000000 3.0 5.0 2.0 0.0 \n87 88.0 15400.000000 6.0 5.0 5.0 0.0 \n88 89.0 15400.000000 3.0 5.0 2.0 0.0 \n89 90.0 3681.000000 1.0 5.0 0.0 0.0 \n\n Orbit_GEO Orbit_GTO Orbit_HEO Orbit_ISS ... Serial_B1058 \\\n0 0.0 0.0 0.0 0.0 ... 0.0 \n1 0.0 0.0 0.0 0.0 ... 0.0 \n2 0.0 0.0 0.0 1.0 ... 0.0 \n3 0.0 0.0 0.0 0.0 ... 0.0 \n4 0.0 1.0 0.0 0.0 ... 0.0 \n.. ... ... ... ... ... ... \n85 0.0 0.0 0.0 0.0 ... 0.0 \n86 0.0 0.0 0.0 0.0 ... 1.0 \n87 0.0 0.0 0.0 0.0 ... 0.0 \n88 0.0 0.0 0.0 0.0 ... 0.0 \n89 0.0 0.0 0.0 0.0 ... 0.0 \n\n Serial_B1059 Serial_B1060 Serial_B1062 GridFins_False GridFins_True \\\n0 0.0 0.0 0.0 1.0 0.0 \n1 0.0 0.0 0.0 1.0 0.0 \n2 0.0 0.0 0.0 1.0 0.0 \n3 0.0 0.0 0.0 1.0 0.0 \n4 0.0 0.0 0.0 1.0 0.0 \n.. ... ... ... ... ... \n85 0.0 1.0 0.0 0.0 1.0 \n86 0.0 0.0 0.0 0.0 1.0 \n87 0.0 0.0 0.0 0.0 1.0 \n88 0.0 1.0 0.0 0.0 1.0 \n89 0.0 0.0 1.0 0.0 1.0 \n\n Reused_False Reused_True Legs_False Legs_True \n0 1.0 0.0 1.0 0.0 \n1 1.0 0.0 1.0 0.0 \n2 1.0 0.0 1.0 0.0 \n3 1.0 0.0 1.0 0.0 \n4 1.0 0.0 1.0 0.0 \n.. ... ... ... ... \n85 0.0 1.0 0.0 1.0 \n86 0.0 1.0 0.0 1.0 \n87 0.0 1.0 0.0 1.0 \n88 0.0 1.0 0.0 1.0 \n89 1.0 0.0 0.0 1.0 \n\n[90 rows x 83 columns]" 133 | }, 134 | "execution_count": 4, 135 | "metadata": {}, 136 | "output_type": "execute_result" 137 | } 138 | ], 139 | "source": "X = pd.read_csv('https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/dataset_part_3.csv')\n\n# If you were unable to complete the previous lab correctly you can uncomment and load this csv\n\n# X = pd.read_csv('https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/api/dataset_part_3.csv')\n\nX.head(100)" 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": "## TASK 1\n" 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": "Create a NumPy array from the column Class in data, by applying the method to_numpy() then\nassign it to the variable Y,make sure the output is a Pandas series (only one bracket df\\['name of column']).\n" 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 7, 154 | "metadata": {}, 155 | "outputs": [ 156 | { 157 | "data": { 158 | "text/plain": "array([0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1,\n 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1,\n 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n 1, 1])" 159 | }, 160 | "execution_count": 7, 161 | "metadata": {}, 162 | "output_type": "execute_result" 163 | } 164 | ], 165 | "source": "Y = data[\"Class\"].to_numpy()\nY" 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": "## TASK 2\n" 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": "Standardize the data in X then reassign it to the variable X using the transform provided below.\n" 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 8, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": "# students get this \ntransform = preprocessing.StandardScaler()" 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 9, 187 | "metadata": {}, 188 | "outputs": [ 189 | { 190 | "data": { 191 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
FlightNumberPayloadMassFlightsBlockReusedCountOrbit_ES-L1Orbit_GEOOrbit_GTOOrbit_HEOOrbit_ISS...Serial_B1058Serial_B1059Serial_B1060Serial_B1062GridFins_FalseGridFins_TrueReused_FalseReused_TrueLegs_FalseLegs_True
01.06104.9594121.01.00.00.00.00.00.00.0...0.00.00.00.01.00.01.00.01.00.0
12.0525.0000001.01.00.00.00.00.00.00.0...0.00.00.00.01.00.01.00.01.00.0
23.0677.0000001.01.00.00.00.00.00.01.0...0.00.00.00.01.00.01.00.01.00.0
34.0500.0000001.01.00.00.00.00.00.00.0...0.00.00.00.01.00.01.00.01.00.0
45.03170.0000001.01.00.00.00.01.00.00.0...0.00.00.00.01.00.01.00.01.00.0
..................................................................
8586.015400.0000002.05.02.00.00.00.00.00.0...0.00.01.00.00.01.00.01.00.01.0
8687.015400.0000003.05.02.00.00.00.00.00.0...1.00.00.00.00.01.00.01.00.01.0
8788.015400.0000006.05.05.00.00.00.00.00.0...0.00.00.00.00.01.00.01.00.01.0
8889.015400.0000003.05.02.00.00.00.00.00.0...0.00.01.00.00.01.00.01.00.01.0
8990.03681.0000001.05.00.00.00.00.00.00.0...0.00.00.01.00.01.01.00.00.01.0
\n

90 rows \u00d7 83 columns

\n
", 192 | "text/plain": " FlightNumber PayloadMass Flights Block ReusedCount Orbit_ES-L1 \\\n0 1.0 6104.959412 1.0 1.0 0.0 0.0 \n1 2.0 525.000000 1.0 1.0 0.0 0.0 \n2 3.0 677.000000 1.0 1.0 0.0 0.0 \n3 4.0 500.000000 1.0 1.0 0.0 0.0 \n4 5.0 3170.000000 1.0 1.0 0.0 0.0 \n.. ... ... ... ... ... ... \n85 86.0 15400.000000 2.0 5.0 2.0 0.0 \n86 87.0 15400.000000 3.0 5.0 2.0 0.0 \n87 88.0 15400.000000 6.0 5.0 5.0 0.0 \n88 89.0 15400.000000 3.0 5.0 2.0 0.0 \n89 90.0 3681.000000 1.0 5.0 0.0 0.0 \n\n Orbit_GEO Orbit_GTO Orbit_HEO Orbit_ISS ... Serial_B1058 \\\n0 0.0 0.0 0.0 0.0 ... 0.0 \n1 0.0 0.0 0.0 0.0 ... 0.0 \n2 0.0 0.0 0.0 1.0 ... 0.0 \n3 0.0 0.0 0.0 0.0 ... 0.0 \n4 0.0 1.0 0.0 0.0 ... 0.0 \n.. ... ... ... ... ... ... \n85 0.0 0.0 0.0 0.0 ... 0.0 \n86 0.0 0.0 0.0 0.0 ... 1.0 \n87 0.0 0.0 0.0 0.0 ... 0.0 \n88 0.0 0.0 0.0 0.0 ... 0.0 \n89 0.0 0.0 0.0 0.0 ... 0.0 \n\n Serial_B1059 Serial_B1060 Serial_B1062 GridFins_False GridFins_True \\\n0 0.0 0.0 0.0 1.0 0.0 \n1 0.0 0.0 0.0 1.0 0.0 \n2 0.0 0.0 0.0 1.0 0.0 \n3 0.0 0.0 0.0 1.0 0.0 \n4 0.0 0.0 0.0 1.0 0.0 \n.. ... ... ... ... ... \n85 0.0 1.0 0.0 0.0 1.0 \n86 0.0 0.0 0.0 0.0 1.0 \n87 0.0 0.0 0.0 0.0 1.0 \n88 0.0 1.0 0.0 0.0 1.0 \n89 0.0 0.0 1.0 0.0 1.0 \n\n Reused_False Reused_True Legs_False Legs_True \n0 1.0 0.0 1.0 0.0 \n1 1.0 0.0 1.0 0.0 \n2 1.0 0.0 1.0 0.0 \n3 1.0 0.0 1.0 0.0 \n4 1.0 0.0 1.0 0.0 \n.. ... ... ... ... \n85 0.0 1.0 0.0 1.0 \n86 0.0 1.0 0.0 1.0 \n87 0.0 1.0 0.0 1.0 \n88 0.0 1.0 0.0 1.0 \n89 1.0 0.0 0.0 1.0 \n\n[90 rows x 83 columns]" 193 | }, 194 | "execution_count": 9, 195 | "metadata": {}, 196 | "output_type": "execute_result" 197 | } 198 | ], 199 | "source": "X" 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 10, 204 | "metadata": {}, 205 | "outputs": [ 206 | { 207 | "data": { 208 | "text/plain": "array([[-1.71291154e+00, -1.94814463e-16, -6.53912840e-01, ...,\n -8.35531692e-01, 1.93309133e+00, -1.93309133e+00],\n [-1.67441914e+00, -1.19523159e+00, -6.53912840e-01, ...,\n -8.35531692e-01, 1.93309133e+00, -1.93309133e+00],\n [-1.63592675e+00, -1.16267307e+00, -6.53912840e-01, ...,\n -8.35531692e-01, 1.93309133e+00, -1.93309133e+00],\n ...,\n [ 1.63592675e+00, 1.99100483e+00, 3.49060516e+00, ...,\n 1.19684269e+00, -5.17306132e-01, 5.17306132e-01],\n [ 1.67441914e+00, 1.99100483e+00, 1.00389436e+00, ...,\n 1.19684269e+00, -5.17306132e-01, 5.17306132e-01],\n [ 1.71291154e+00, -5.19213966e-01, -6.53912840e-01, ...,\n -8.35531692e-01, -5.17306132e-01, 5.17306132e-01]])" 209 | }, 210 | "execution_count": 10, 211 | "metadata": {}, 212 | "output_type": "execute_result" 213 | } 214 | ], 215 | "source": "X = transform.fit_transform(X) \nX" 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": "We split the data into training and testing data using the function train_test_split. The training data is divided into validation data, a second set used for training data; then the models are trained and hyperparameters are selected using the function GridSearchCV.\n" 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": "## TASK 3\n" 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": {}, 230 | "source": "Use the function train_test_split to split the data X and Y into training and test data. Set the parameter test_size to 0.2 and random_state to 2. The training data and test data should be assigned to the following labels.\n" 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": {}, 235 | "source": "X_train, X_test, Y_train, Y_test\n" 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 11, 240 | "metadata": {}, 241 | "outputs": [], 242 | "source": "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)" 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": "we can see we only have 18 test samples.\n" 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 12, 252 | "metadata": {}, 253 | "outputs": [ 254 | { 255 | "data": { 256 | "text/plain": "(18,)" 257 | }, 258 | "execution_count": 12, 259 | "metadata": {}, 260 | "output_type": "execute_result" 261 | } 262 | ], 263 | "source": "Y_test.shape" 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": "## TASK 4\n" 269 | }, 270 | { 271 | "cell_type": "markdown", 272 | "metadata": {}, 273 | "source": "Create a logistic regression object using then create a GridSearchCV object logreg_cv with cv = 10. Fit the object to find the best parameters from the dictionary parameters.\n" 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": 13, 278 | "metadata": {}, 279 | "outputs": [], 280 | "source": "parameters ={'C':[0.01,0.1,1],\n 'penalty':['l2'],\n 'solver':['lbfgs']}" 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 18, 285 | "metadata": {}, 286 | "outputs": [ 287 | { 288 | "data": { 289 | "text/plain": "GridSearchCV(cv=10, estimator=LogisticRegression(),\n param_grid={'C': [0.01, 0.1, 1], 'penalty': ['l2'],\n 'solver': ['lbfgs']})" 290 | }, 291 | "execution_count": 18, 292 | "metadata": {}, 293 | "output_type": "execute_result" 294 | } 295 | ], 296 | "source": "parameters ={\"C\":[0.01,0.1,1],'penalty':['l2'], 'solver':['lbfgs']}# l1 lasso l2 ridge\nlr=LogisticRegression()\n\n# Instantiate the GridSearchCV object: logreg_cv\nlogreg_cv = GridSearchCV(lr, parameters, cv=10)\n\n# Fit it to the data\nlogreg_cv.fit(X_train, Y_train)" 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": "We output the GridSearchCV object for logistic regression. We display the best parameters using the data attribute best_params\\_ and the accuracy on the validation data using the data attribute best_score\\_.\n" 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 19, 306 | "metadata": {}, 307 | "outputs": [ 308 | { 309 | "name": "stdout", 310 | "output_type": "stream", 311 | "text": "tuned hpyerparameters :(best parameters) {'C': 0.01, 'penalty': 'l2', 'solver': 'lbfgs'}\naccuracy : 0.8464285714285713\n" 312 | } 313 | ], 314 | "source": "print(\"tuned hpyerparameters :(best parameters) \",logreg_cv.best_params_)\nprint(\"accuracy :\",logreg_cv.best_score_)" 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": {}, 319 | "source": "## TASK 5\n" 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": "Calculate the accuracy on the test data using the method score:\n" 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 20, 329 | "metadata": {}, 330 | "outputs": [ 331 | { 332 | "data": { 333 | "text/plain": "0.8333333333333334" 334 | }, 335 | "execution_count": 20, 336 | "metadata": {}, 337 | "output_type": "execute_result" 338 | } 339 | ], 340 | "source": "logreg_cv.score(X_test, Y_test)" 341 | }, 342 | { 343 | "cell_type": "markdown", 344 | "metadata": {}, 345 | "source": "Lets look at the confusion matrix:\n" 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": 21, 350 | "metadata": {}, 351 | "outputs": [ 352 | { 353 | "data": { 354 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWgAAAEWCAYAAABLzQ1kAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAfzklEQVR4nO3dd5xdVbnG8d8zkwABktA7mKCAAlKkSJEmFnoRlepVQANXmuWCoFwiYOMqXPGKJSICIUSKBKQIQSAGECSFEDooNSSQAAKhJzPv/WOvgcMwM6fM2efsk3m+fPZnztllrTczh3fWrL3W2ooIzMyseNqaHYCZmfXMCdrMrKCcoM3MCsoJ2sysoJygzcwKygnazKygnKCt3yQNkXS1pJclXdaPcg6WNLGesTWDpL9I+nKz47DW5wQ9gEg6SNJUSa9KmpMSySfqUPTngZWB5SPiC7UWEhHjIuIzdYjnPSTtKCkkXdFt/8Zp/6QKy/m+pIvKnRcRu0bEBTWGa/YOJ+gBQtK3gJ8DPyJLpmsBvwL2rkPxHwAeiYiFdSgrL/OAbSQtX7Lvy8Aj9apAGf8/ZXXjD9MAIGk4cBpwVERcERGvRcSCiLg6Io5P5ywu6eeSZqft55IWT8d2lDRL0rclzU2t70PTsVOBU4D9U8v88O4tTUkjUkt1UHr/FUmPSZov6XFJB5fsv63kum0kTUldJ1MkbVNybJKk0yXdnsqZKGmFPr4NbwNXAgek69uBLwLjun2vzpb0tKRXJE2TtF3avwvw3ZJ/5z0lcfxQ0u3A68Daad9X0/FfS7q8pPwzJN0kSZX+/GzgcoIeGLYGlgAm9HHO94CtgE2AjYEtgZNLjq8CDAdWBw4HzpG0bESMJmuVXxIRS0fE7/sKRNJSwC+AXSNiKLANMKOH85YDrk3nLg+cBVzbrQV8EHAosBKwGPBffdUNXAj8R3r9WeB+YHa3c6aQfQ+WAy4GLpO0RERc3+3fuXHJNV8CRgFDgSe7lfdtYKP0y2c7su/dl8NrLFgFnKAHhuWB58t0QRwMnBYRcyNiHnAqWeLpsiAdXxAR1wGvAuvVGE8nsKGkIRExJyLu7+Gc3YFHI2JsRCyMiPHAQ8CeJef8ISIeiYg3gEvJEmuvIuLvwHKS1iNL1Bf2cM5FEfFCqvNMYHHK/zvPj4j70zULupX3OnAI2S+Yi4BjImJWmfLMACfogeIFYIWuLoZerMZ7W39Ppn3vlNEtwb8OLF1tIBHxGrA/cCQwR9K1kj5cQTxdMa1e8v7ZGuIZCxwN7EQPf1GkbpwHU7fKS2R/NfTVdQLwdF8HI+Iu4DFAZL9IzCriBD0w3AG8CezTxzmzyW72dVmL9//5X6nXgCVL3q9SejAiboiITwOrkrWKf1dBPF0xPVNjTF3GAl8Hrkut23ekLojvkPVNLxsRywAvkyVWgN66JfrsrpB0FFlLfDZwQs2R24DjBD0ARMTLZDfyzpG0j6QlJQ2WtKuk/0mnjQdOlrRiutl2Ctmf5LWYAWwvaa10g/KkrgOSVpa0V+qLfousq6SjhzKuA9ZNQwMHSdofWB+4psaYAIiIx4EdyPrcuxsKLCQb8TFI0inAsJLjzwEjqhmpIWld4Adk3RxfAk6QtElt0dtA4wQ9QETEWcC3yG78zSP7s/xospENkCWRqcBM4F5getpXS103Apeksqbx3qTaRnbjbDbwIlmy/HoPZbwA7JHOfYGs5blHRDxfS0zdyr4tInr66+AG4C9kQ++eJPuro7T7omsSzguSpperJ3UpXQScERH3RMSjZCNBxnaNkDHri3wz2cysmNyCNjMrKCdoM7M6k3RemtR1X8m+n0p6SNJMSRMkLVOuHCdoM7P6Ox/Ypdu+G4ENI2IjsvscJ3W/qDsnaDOzOouIyWQ3wUv3TSyZS3AnsEa5cvqauNBUh434vO9emllFznvi8n6vbbLg+ccqzjmLrfjBI8im93cZExFjqqjuMLKRTn0qbII2MyuqlIyrScjvkPQ9svH248qd6wRtZgbQ2dN8qfpS9iCHPYCdK1kwywnazAygI9/lzNOStd8Bdui+zEBvnKDNzICIzrqVJWk8sCPZImWzgNFkozYWB25My4HfGRFH9lWOE7SZGUBn/RJ0RBzYw+4+10rviRO0mRlAHVvQ9eIEbWYGDblJWC0naDMzcAvazKyoIudRHLVwgjYzg7reJKwXJ2gzM3AXh5lZYfkmoZlZQbkFbWZWUL5JaGZWUL5JaGZWTBHugzYzKyb3QZuZFZS7OMzMCsotaDOzgupY0OwI3scJ2swM3MVhZlZY7uIwMysot6DNzArKCdrMrJjCNwnNzArKfdBmZgXlLg4zs4JyC9rMrKDcgjYzKyi3oM3MCmqhF+w3MyumArag25odgJlZIXR2Vr6VIek8SXMl3VeybzlJN0p6NH1dtlw5TtBmZpC1oCvdyjsf2KXbvhOBmyJiHeCm9L5PTtBmZlDXFnRETAZe7LZ7b+CC9PoCYJ9y5bgP2swMGtEHvXJEzAGIiDmSVip3gRO0mRlUNYpD0ihgVMmuMRExpt4hOUGbmQFEVHFqjAGqTcjPSVo1tZ5XBeaWu8B90GZmUNc+6F78Gfhyev1l4KpyF7gFbWYGdZ3qLWk8sCOwgqRZwGjgJ8Clkg4HngK+UK4cJ2gzM6jrTcKIOLCXQztXU44TtJkZQEdHsyN4HydoMzPwanZmZoXlBG1mVlAFXCzJCdrMDIjOysdBN4oTtJkZuIvDzKywPIrDzKyg3II2MysoJ2irxqDFB3PiJacxePHBtLW3M/Uvd3DV/17a7LCsyfy5yEkViyU1ihN0gS18awE/PehU3nr9TdoHtXPS5T/g3kl389jdjzY7NGsify5yMhBa0JLmA73+KoqIYfWuc1H21utvAtA+qJ32Qe19fGdtIPHnIgcDYZhdRAwFkHQa8CwwFhBwMDC03vUt6tTWxuhrzmClD6zCzWNv4LEZbiWZPxe5KOAojjzXg/5sRPwqIuZHxCsR8Wtgv74ukDRK0lRJUx+e/1iOobWO6Ozk+7sdz7e3PoKRG3+I1ddds9khWQH4c1F/0dlZ8dYoeSboDkkHS2qX1CbpYKDPX1ERMSYiNo+IzdcbunaOobWeN155nYfvvJ8Nd9i02aFYgfhzUUedUfnWIHkm6IOALwLPpe0LaZ9VaOhywxgybEkABi++GOtvuxHP/uuZJkdlzebPRU6is/KtQXIbxRERT5A9ZtxqNHylZTn8zKNpa2tDbWLKtX/nnpunNTssazJ/LnIyEG4SdpG0IvA1YERpPRFxWF51LmpmPfQkp+5+fLPDsILx5yInC4t3kzDPcdBXAbcCf6VM37OZWdMNsOVGl4yI7+RYvplZ/RSwiyPPm4TXSNotx/LNzOqmiMPs8mxBHwd8V9JbwAKyySrhmYRmVkgFbEHnOYrDswbNrHUMpAQNIGlZYB1gia59ETE5zzrNzGpSwKneeQ6z+ypZN8cawAxgK+AO4JN51WlmVqsiPpMwz5uExwFbAE9GxE7ApsC8HOszM6tdAad659nF8WZEvCkJSYtHxEOS1suxPjOz2g2E9aBLzJK0DHAlcKOkfwOzc6zPzKx2BeziyHMUx77p5fcl3QIMB67Pqz4zs36pY4KW9E3gq2SPUrgXODQi3qy2nDyeqLJcD7vvTV+XBl6sd51mZv0VHfXp4pC0OnAssH5EvCHpUuAA4Pxqy8qjBT2N7LeGSvZ1vQ/ACz2bWfHUt4tjEDBE0gJgSWrs3s3jkVcj612mmVneqhlmJ2kUMKpk15iIGAMQEc9I+hnwFPAGMDEiJtYSk5/qbWYGVbWgUzIe09OxNEFvb2Ak8BJwmaRDIuKiakPKcxy0mVnr6Kxi69ungMcjYl5ELACuALapJSS3oM3MgFhYt3HQTwFbSVqSrItjZ2BqLQXl1oKWNLaSfWZmhVCnFnRE/AO4HJhONoKtjV66Q8rJswW9QekbSe3AZjnWZ2ZWs3quxRERo4HR/S2n7i1oSSdJmg9sJOkVSfPT+7lkj8EyMyue+vVB103dE3RE/DitBf3TiBgWEUPTtnxEnFTv+szM6iE6o+KtUfKc6n2SpL2A7dOuSRFxTV71mZn1S/HWSsp1PegfA1sC49Ku4yRt61a0mRVRLGx2BO+X503C3YFNIrJnmUu6ALgbcII2s8KJAragq+qDlrSspI2quGSZktfDq6nLzKyhCniTsGwLWtIkYK907gxgnqS/RcS3ylz6Y+DutNSoyPqi3Xo2s0IqYgu6ki6O4RHxSnrG4B8iYrSkmeUuiojxKblvQZagvxMRz/YvXDOzfBQxQVfSxTFI0qrAF4FqR2G0Ac8D/wbWlbR9mfPNzJoiOlTx1iiVtKBPA24AbouIKZLWBh4td5GkM4D9gft5t9cmgMk1xmpmlpsitqDLJuiIuAy4rOT9Y8B+FZS9D7BeRLxVc3RmZg0SnY1rGVeq1wQt6f/IWrw9iohjy5T9GDAYcII2s8JrtRZ0TcvjlXgdmCHpJkqSdAWJ3cys4SJaqAUdEReUvpe0VES8VkXZf06bmVnhtVoLGgBJWwO/J3si91qSNgaOiIiv93Vd9wRvZlZknQ0cnVGpSobZ/Rz4LPACQETcw7sLIJmZLRKiUxVvjVLRWhwR8bT0nqA68gnHzKw5WmoUR4mnJW0DhKTFgGOBB/MNy8yssaJxyzxXrJIEfSRwNrA68AzZpJWjejtZ0tX0PTxvrypjNDPLXUu2oCPieeDgKsr8Wfr6OWAV4KL0/kDgiWqCMzNrlJYaZtclTe0+G9iKrGV8B/DNNKPwfSLib+m60yOi9Gbi1ZI8zdvMCqmjRUdxXAxcCqwKrEY27Xt8BdetmJI7AJJGAivWEqSZWd4iVPHWKJX0QSsixpa8v0jS0RVc901gkqSulvYI4Igq4zMza4iW6oOWtFx6eYukE4E/knVx7A9cW67giLhe0jrAh9Ouh7xwkpkVVauN4phGlpC7fq2Utn4DOL2niyR9MiJulvS5boc+KImIuKLmaM3MctJSLeiIGFljmTsANwN79lQs4ARtZoXT0VnVI1oboqKZhJI2BNYHlujaFxEX9nRuRIxOXw+tR4BmZo3Qal0cAEgaDexIlqCvA3YFbgN6TNCS+nyYbEScVXWUZmY566zj6AxJywDnAhuS9RwcFhF3VFtOJS3ozwMbA3dHxKGSVk4V92Zo+roe2QNju5Yc3RM/7srMCqrOw+fOBq6PiM+nJTKWrKWQShL0GxHRKWmhpGHAXGDt3k6OiFMBJE0EPhYR89P771Py6CwzsyKpVxdHypPbA1/Jyo23gbdrKauSBD01Ndd/Rzay41XgrgquW6tbUG+TjYWuyIWzq/5rwAaAN2bf2uwQbBFVTReHpFHAqJJdYyJiTHq9NjAP+ENaP38acFyVDzwBKluLo2th/t9Iuh4YFhEzKyh7LHCXpAlkfTD7Al7E38wKqZpRHCkZj+nl8CDgY8AxEfEPSWcDJwL/XW1MfU1U+VhfxyJiel8FR8QPJf0F2C7tOjQi7q42QDOzRqjjII5ZwKyI+Ed6fzlZgq5aXy3oM/s4FsAnyxWeknifidzMrAjqNYojIp6V9LSk9SLiYWBn4IFayuprospOtQZoZtZq6jyK4xhgXBrB8RhQ07yQiiaqmJkt6ur5UO+ImAFs3t9ynKDNzICghdbiMDMbSBYW8IkqZceVKHOIpFPS+7UkbZl/aGZmjROo4q1RKhn49ytga7JnCgLMB87JLSIzsyborGJrlEq6OD4eER+TdDdARPw73Zk0M1tktGof9AJJ7aRx3JJWpLG/RMzMclfEpFZJgv4FMAFYSdIPyVa3OznXqMzMGqyjFVvQETFO0jSy2TAC9omIB3OPzMysgQr4xKuKFuxfC3gduLp0X0Q8lWdgZmaN1NmKLWiyJ3h3PTx2CWAk8DCwQY5xmZk1VAGfeFVRF8dHS9+nVe6O6OV0M7OW1Ko3Cd8jIqZL2iKPYMzMmqVTLdjF0e0hsG1kC1HPyy0iM7Mm6Gh2AD2opAU9tOT1QrI+6T/lE46ZWXO03CiONEFl6Yg4vkHxmJk1RUuN4pA0KCIW9vXoKzOzRUWrjeK4i6y/eYakPwOXAe88lTYirsg5NjOzhmm5Lo5kOeAFsmcQdo2HDsAJ2swWGa02zG6lNILjPt5NzF2K+NeAmVnNOlqsBd0OLA099pw7QZvZIqXVWtBzIuK0hkViZtZErZagC9jgNzPLRwEfSdhngt65YVGYmTVZS7WgI+LFRgZiZtZMrTrV28xskdeq46DNzBZ5LdXFYWY2kBQxQbc1OwAzsyKIKrZKSGqXdLeka2qNyS1oMzNy6YM+DngQGFZrAW5Bm5mRjeKodCtH0hrA7sC5/YnJCdrMDOgkKt4kjZI0tWQb1a24nwMn0M+ubXdxmJlRXSaNiDHAmJ6OSdoDmBsR0yTt2J+YnKDNzKjrCnDbAntJ2g1YAhgm6aKIOKTagtzFYWZG1oKudOtLRJwUEWtExAjgAODmWpIzuAVtZgbAQhVvFWUnaDMz8lnkPiImAZNqvd4J2syMYs4kdII2MyMbZlc0TtBmZhTzOX5O0GZmuIvDzKywOgrYhnaCNjPDLWgzs8IKt6DNzIqpiC1oT/UuuM9+Zkfuv28yDz1wGyccf1Szw7EmOflHZ7H97gewzyFHvrPvZ788lz0P/Br7/sd/cuxJp/HK/FebGGHrq2Y1u0Zxgi6wtrY2fnH2D9ljz0P46MY7sf/++/CRj6zT7LCsCfbZ7dP85qwfvGff1ltsyoSxv2HChb9mxJqrc+7YS5oU3aKh3k9UqQcn6ALbcotN+de/nuDxx59iwYIFXHrpVey152ebHZY1weabfJThw4a+Z9+2H9+MQYPaAdhogw/z3NznmxHaImMhUfHWKE7QBbba6qvw9KzZ77yf9cwcVlttlSZGZEU14dqJfGLrLZodRkuLKv5rlFxuEkr6XF/HI+KKXq4bBYwCUPtw2tqWyiG61iG9/yFpEcW702zN9dsLxtPe3s4en9mp2aG0tCLeJMxrFMee6etKwDbAzen9TmQrO/WYoEufUjBosdUHfCZ6ZtYc1lxjtXfer7H6qsyZ81wTI7Kiueq6G5l8+12c+4sf9/gL3SpXxGF2uXRxRMShEXEoWX/6+hGxX0TsB2yQR32LqilTZ/ChD41kxIg1GTx4MF/84t5cfc3EZodlBXHbnVP5/bjL+L8zRjNkiSWaHU7Lq9eC/fWU9zjoERExp+T9c8C6Ode5yOjo6OC4b5zMdddeTHtbG+dfcAkPPPBIs8OyJjh+9E+YcvdMXnrpFXbe5xC+fviXOHfsJby9YAFf+8b3gOxG4egTjmlypK2ro4Ddh8qzT1PSL4F1gPFkrekDgH9GRNlPkbs4rCdvzL612SFYAQ1eYe1+9+8c9IF9K845Fz85oSH9Sbm2oCPiaEn7AtunXWMiYkKedZqZ1aKIfdCNmOo9HZgfEX+VtKSkoRExvwH1mplVrIijOHIdBy3pa8DlwG/TrtWBK/Os08ysFgNxqvdRwLbAKwAR8SjZ0Dszs0IZMBNVSrwVEW93jc+UNIhiPlnGzAa4Io7iyDtB/03Sd4Ehkj4NfB24Ouc6zcyqVsSHxubdxXEiMA+4FzgCuA44Oec6zcyqNuAmqkREJ/C7tJmZFdaAGWYn6V766GuOiI3yqNfMrFZF7OLIqwW9R/ra9QiQsenrwcDrOdVpZlazIq4UmUuCjognASRtGxHblhw6UdLtwGl51GtmVquOOrWgJa0JXAisQtZlPSYizq6lrLxvEi4l6RNdbyRtAwzsRZ7NrJDqOFFlIfDtiPgIsBVwlKT1a4kp72F2hwPnSRqe3r8EHJZznWZmVatXF0dawXNOej1f0oNks6gfqLasvEdxTAM2ljSMbOW8l/Osz8ysVnncJJQ0AtgU+Ect1+eaoCUtDuwHjAAGdc0ojAj3QZtZoVQzzK708XzJmPREqNJzlgb+BHwjIl6pJaa8uziuAl4GpgFv5VyXmVnNqpnqXfp4vp5IGkyWnMf19gzWSuSdoNeIiF1yrsPMrN/q1cWhrKvg98CDEXFWf8rKexTH3yV9NOc6zMz6rY6jOLYFvgR8UtKMtO1WS0x5t6A/AXxF0uNkXRwCwjMJzaxo6jiK4zayXNdveSfoXXMu38ysLgbSVG/gPTMKVwL8XHgzK6wBs1hSF0l7AWcCqwFzgQ8ADwIb5FmvmVm1OqJ4TyXM+ybh6WRTHR+JiJHAzsDtOddpZla1iKh4a5S8E/SCiHgBaJPUFhG3AJvkXKeZWdWK+NDYvG8SvpRm00wGxkmaS7aQiJlZoRSxDzrvFvTewBvAN4HrgX8Be+Zcp5lZ1TojKt4aJe9RHK+VvL0gz7rMzPqjiC3ovB55NZ+eH3nVNVFlWB71mpnVqoijOPJ6osrQPMo1M8tLI7suKpX3TUIzs5YwYLo4zMxajVvQZmYF5Ra0mVlBdURHs0N4HydoMzPqt9xoPTlBm5kxAJcbNTNrFW5Bm5kVlEdxmJkVlEdxmJkV1ICZ6m1m1mrcB21mVlDugzYzKyi3oM3MCsrjoM3MCsotaDOzgvIoDjOzgvJNQjOzgipiF0feT/U2M2sJUcV/5UjaRdLDkv4p6cRaY3IL2syM+rWgJbUD5wCfBmYBUyT9OSIeqLYsJ2gzM+raB70l8M+IeAxA0h+BvYFFJ0EvfPsZNTuGopA0KiLGNDsOKxZ/LuqrmpwjaRQwqmTXmJKfxerA0yXHZgEfryUm90G3hlHlT7EByJ+LJomIMRGxeclW+ouyp0RfU/PcCdrMrL5mAWuWvF8DmF1LQU7QZmb1NQVYR9JISYsBBwB/rqWgwvZB23u4n9F64s9FAUXEQklHAzcA7cB5EXF/LWWpiIOzzczMXRxmZoXlBG1mVlBO0P0g6fuS/iu9Pk3Sp3o4Z0dJ19Spvu/2cewJSSvUqZ5X61GO1aZe339JIyTdV4+yrDmcoOskIk6JiL/mXE2vCdrMFj1O0FWS9L20CMpfgfVK9p8v6fPp9S6SHpJ0G/C5Xsr5iqQrJF0v6VFJ/1Ny7EBJ90q6T9IZad9PgCGSZkgaVybGKyVNk3R/mvHUtf9VST+UdI+kOyWtnPaPlHSHpCmSTu/Ht8fqSNLSkm6SND19HvZO+0dIelDS79LPeKKkIenYZunnewdwVFP/AdZvTtBVkLQZ2ZjGTckS7xY9nLME8DtgT2A7YJU+itwE2B/4KLC/pDUlrQacAXwyHd9C0j4RcSLwRkRsEhEHlwn1sIjYDNgcOFbS8mn/UsCdEbExMBn4Wtp/NvDriNgCeLZM2dY4bwL7RsTHgJ2AMyV1zVJbBzgnIjYAXgL2S/v/ABwbEVs3OlirPyfo6mwHTIiI1yPiFXoefP5h4PGIeDSyMYwX9VHeTRHxckS8SbaQygfIkv6kiJgXEQuBccD2VcZ5rKR7gDvJZjStk/a/DXT1h08DRqTX2wLj0+uxVdZl+RHwI0kzgb+SrfGwcjr2eETMSK+nASMkDQeWiYi/pf3+WbY4T1SpXiUDxysdXP5WyesOsp9HvxaJkrQj8Clg64h4XdIkYIl0eEG8O/C9q74uHhBfPAcDKwKbRcQCSU/w7s+y+2dnCNlnxz/HRYhb0NWZDOwraYikoWTdGN09BIyU9MH0/sAq6/gHsIOkFdK6sgcCXS2iBZIGl7l+OPDvlJw/DGxVQZ23k3XdQJYUrBiGA3NTct6J7C+sXkXES8DLkj6Rdvln2eKcoKsQEdOBS4AZwJ+AW3s4502yVcauTTcJn6yyjjnAScAtwD3A9Ii4Kh0eA8wsc5PwemBQ+rP4dLJujnKOA46SNIUsKVgxjAM2lzSVLNk+VME1hwLnpJuEb+QZnOXPU73NzArKLWgzs4JygjYzKygnaDOzgnKCNjMrKCdoM7OCcoK295HUkdb8uE/SZZKW7EdZpWuUnCtp/T7O3VHSNjXU0eNKfpWs8FftynGlKxia5c0J2nrStebHhmTTw48sPZgm0FQtIr4aEQ/0ccqOQNUJ2mxR5QRt5dwKfCi1bm+RdDFwr6R2ST9NK+DNlHQEgDK/lPSApGuBlboKkjRJ0ubp9S5plbZ70optI8h+EXwztd63k7SipD+lOqZI2jZdu3xawe1uSb+lgunxva3wl46dmWK5SdKKad8Hla00OE3SrWlWZvcyj03/zpmS/ljj99esV16Lw3olaRCwK9nsRIAtgQ0j4vGU5F6OiC0kLQ7cLmki2Up/65Gt0Lcy2SJQ53Urd0WyFf+2T2UtFxEvSvoN8GpE/CyddzHwvxFxm6S1yB7C+RFgNHBbRJwmaXeymZvlHJbqGAJMkfSniHiBbIW/6RHxbUmnpLKPJpu1eWREPCrp48CvyFYYLHUiMDIi3pK0TCXfU7NqOEFbT4ZImpFe3wr8nqzr4a6IeDzt/wywUVf/MtkU8XXIVt4bHxEdwGxJN/dQ/lbA5K6yIuLFXuL4FLD+uytsMiytgbI9aZ3tiLhW0r8r+DcdK2nf9Lprhb8XgE6y6fuQrTx4haSl07/3spK6F++hzJnAOElXAldWEINZVZygrSdvRMQmpTtSonqtdBdwTETc0O283Si/olqlq661ka3K9541JVIsFa9RUGaFv+4i1ftS9+9BD3Yn+2WxF/DfkjZIS8Sa1YX7oK1WNwD/2bW6nqR1JS1FtuLfAamPelWyhea7u4Nsxb6R6drl0v75wNCS8yaSdTeQztskvZxMWqlN0q7AsmVi7WuFvzag66+Ag8i6Tl4BHpf0hVSHJG1cWqCkNmDNiLgFOAFYBli6TBxmVXEL2mp1LtmC/9OVNWnnAfsAE8j6au8FHuHdpVLfERHzUh/2FSnRzQU+DVwNXK7s0U7HAMeSrcw2k+yzOpnsRuKpwHhJ01P5T5WJ9XrgyFTOw7x3hb/XgA0kTQNeJnvCDWS/AH4t6WRgMPBHstUFu7QDFylbJF9kfeUvlYnDrCpezc7MrKDcxWFmVlBO0GZmBeUEbWZWUE7QZmYF5QRtZlZQTtBmZgXlBG1mVlD/Dx8p7cSxtyKOAAAAAElFTkSuQmCC\n", 355 | "text/plain": "
" 356 | }, 357 | "metadata": { 358 | "needs_background": "light" 359 | }, 360 | "output_type": "display_data" 361 | } 362 | ], 363 | "source": "yhat=logreg_cv.predict(X_test)\nplot_confusion_matrix(Y_test,yhat)" 364 | }, 365 | { 366 | "cell_type": "markdown", 367 | "metadata": {}, 368 | "source": "Examining the confusion matrix, we see that logistic regression can distinguish between the different classes. We see that the major problem is false positives.\n" 369 | }, 370 | { 371 | "cell_type": "markdown", 372 | "metadata": {}, 373 | "source": "## TASK 6\n" 374 | }, 375 | { 376 | "cell_type": "markdown", 377 | "metadata": {}, 378 | "source": "Create a support vector machine object then create a GridSearchCV object svm_cv with cv - 10. Fit the object to find the best parameters from the dictionary parameters.\n" 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": 29, 383 | "metadata": {}, 384 | "outputs": [], 385 | "source": "parameters = {'kernel':('linear', 'rbf','poly','rbf', 'sigmoid'),\n 'C': np.logspace(-3, 3, 5),\n 'gamma':np.logspace(-3, 3, 5)}\nsvm = SVC()" 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": 30, 390 | "metadata": {}, 391 | "outputs": [ 392 | { 393 | "data": { 394 | "text/plain": "GridSearchCV(cv=10, estimator=SVC(),\n param_grid={'C': array([1.00000000e-03, 3.16227766e-02, 1.00000000e+00, 3.16227766e+01,\n 1.00000000e+03]),\n 'gamma': array([1.00000000e-03, 3.16227766e-02, 1.00000000e+00, 3.16227766e+01,\n 1.00000000e+03]),\n 'kernel': ('linear', 'rbf', 'poly', 'rbf', 'sigmoid')})" 395 | }, 396 | "execution_count": 30, 397 | "metadata": {}, 398 | "output_type": "execute_result" 399 | } 400 | ], 401 | "source": "# Instantiate the GridSearchCV object: svm_cv\nsvm_cv = GridSearchCV(svm, parameters, cv=10)\n\n# Fit it to the data\nsvm_cv.fit(X_train, Y_train)" 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": 31, 406 | "metadata": {}, 407 | "outputs": [ 408 | { 409 | "name": "stdout", 410 | "output_type": "stream", 411 | "text": "tuned hpyerparameters :(best parameters) {'C': 1.0, 'gamma': 0.03162277660168379, 'kernel': 'sigmoid'}\naccuracy : 0.8482142857142856\n" 412 | } 413 | ], 414 | "source": "print(\"tuned hpyerparameters :(best parameters) \",svm_cv.best_params_)\nprint(\"accuracy :\",svm_cv.best_score_)" 415 | }, 416 | { 417 | "cell_type": "markdown", 418 | "metadata": {}, 419 | "source": "## TASK 7\n" 420 | }, 421 | { 422 | "cell_type": "markdown", 423 | "metadata": {}, 424 | "source": "Calculate the accuracy on the test data using the method score:\n" 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": 32, 429 | "metadata": {}, 430 | "outputs": [ 431 | { 432 | "data": { 433 | "text/plain": "0.8333333333333334" 434 | }, 435 | "execution_count": 32, 436 | "metadata": {}, 437 | "output_type": "execute_result" 438 | } 439 | ], 440 | "source": "svm_cv.score(X_test, Y_test)" 441 | }, 442 | { 443 | "cell_type": "markdown", 444 | "metadata": {}, 445 | "source": "We can plot the confusion matrix\n" 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": 33, 450 | "metadata": {}, 451 | "outputs": [ 452 | { 453 | "data": { 454 | "image/png": "\n", 455 | "text/plain": "
" 456 | }, 457 | "metadata": { 458 | "needs_background": "light" 459 | }, 460 | "output_type": "display_data" 461 | } 462 | ], 463 | "source": "yhat=svm_cv.predict(X_test)\nplot_confusion_matrix(Y_test,yhat)" 464 | }, 465 | { 466 | "cell_type": "markdown", 467 | "metadata": {}, 468 | "source": "## TASK 8\n" 469 | }, 470 | { 471 | "cell_type": "markdown", 472 | "metadata": {}, 473 | "source": "Create a decision tree classifier object then create a GridSearchCV object tree_cv with cv = 10. Fit the object to find the best parameters from the dictionary parameters.\n" 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": 34, 478 | "metadata": {}, 479 | "outputs": [], 480 | "source": "parameters = {'criterion': ['gini', 'entropy'],\n 'splitter': ['best', 'random'],\n 'max_depth': [2*n for n in range(1,10)],\n 'max_features': ['auto', 'sqrt'],\n 'min_samples_leaf': [1, 2, 4],\n 'min_samples_split': [2, 5, 10]}\n\ntree = DecisionTreeClassifier()" 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": 35, 485 | "metadata": {}, 486 | "outputs": [ 487 | { 488 | "data": { 489 | "text/plain": "GridSearchCV(cv=10, estimator=DecisionTreeClassifier(),\n param_grid={'criterion': ['gini', 'entropy'],\n 'max_depth': [2, 4, 6, 8, 10, 12, 14, 16, 18],\n 'max_features': ['auto', 'sqrt'],\n 'min_samples_leaf': [1, 2, 4],\n 'min_samples_split': [2, 5, 10],\n 'splitter': ['best', 'random']})" 490 | }, 491 | "execution_count": 35, 492 | "metadata": {}, 493 | "output_type": "execute_result" 494 | } 495 | ], 496 | "source": "# Instantiate the GridSearchCV object: tree_cv\ntree_cv = GridSearchCV(tree, parameters, cv=10)\n\n# Fit it to the data\ntree_cv.fit(X_train, Y_train)\n" 497 | }, 498 | { 499 | "cell_type": "code", 500 | "execution_count": 36, 501 | "metadata": {}, 502 | "outputs": [ 503 | { 504 | "name": "stdout", 505 | "output_type": "stream", 506 | "text": "tuned hpyerparameters :(best parameters) {'criterion': 'entropy', 'max_depth': 8, 'max_features': 'auto', 'min_samples_leaf': 4, 'min_samples_split': 10, 'splitter': 'random'}\naccuracy : 0.8857142857142856\n" 507 | } 508 | ], 509 | "source": "print(\"tuned hpyerparameters :(best parameters) \",tree_cv.best_params_)\nprint(\"accuracy :\",tree_cv.best_score_)" 510 | }, 511 | { 512 | "cell_type": "markdown", 513 | "metadata": {}, 514 | "source": "## TASK 9\n" 515 | }, 516 | { 517 | "cell_type": "markdown", 518 | "metadata": {}, 519 | "source": "Calculate the accuracy of tree_cv on the test data using the method score:\n" 520 | }, 521 | { 522 | "cell_type": "code", 523 | "execution_count": 38, 524 | "metadata": {}, 525 | "outputs": [ 526 | { 527 | "data": { 528 | "text/plain": "0.6666666666666666" 529 | }, 530 | "execution_count": 38, 531 | "metadata": {}, 532 | "output_type": "execute_result" 533 | } 534 | ], 535 | "source": "tree_cv.score(X_test, Y_test)" 536 | }, 537 | { 538 | "cell_type": "markdown", 539 | "metadata": {}, 540 | "source": "We can plot the confusion matrix\n" 541 | }, 542 | { 543 | "cell_type": "code", 544 | "execution_count": 39, 545 | "metadata": {}, 546 | "outputs": [ 547 | { 548 | "data": { 549 | "image/png": "\n", 550 | "text/plain": "
" 551 | }, 552 | "metadata": { 553 | "needs_background": "light" 554 | }, 555 | "output_type": "display_data" 556 | } 557 | ], 558 | "source": "yhat = svm_cv.predict(X_test)\nplot_confusion_matrix(Y_test,yhat)" 559 | }, 560 | { 561 | "cell_type": "markdown", 562 | "metadata": {}, 563 | "source": "## TASK 10\n" 564 | }, 565 | { 566 | "cell_type": "markdown", 567 | "metadata": {}, 568 | "source": "Create a k nearest neighbors object then create a GridSearchCV object knn_cv with cv = 10. Fit the object to find the best parameters from the dictionary parameters.\n" 569 | }, 570 | { 571 | "cell_type": "code", 572 | "execution_count": 40, 573 | "metadata": {}, 574 | "outputs": [], 575 | "source": "parameters = {'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n 'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],\n 'p': [1,2]}\n\nKNN = KNeighborsClassifier()" 576 | }, 577 | { 578 | "cell_type": "code", 579 | "execution_count": 41, 580 | "metadata": {}, 581 | "outputs": [ 582 | { 583 | "data": { 584 | "text/plain": "GridSearchCV(cv=10, estimator=KNeighborsClassifier(),\n param_grid={'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],\n 'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n 'p': [1, 2]})" 585 | }, 586 | "execution_count": 41, 587 | "metadata": {}, 588 | "output_type": "execute_result" 589 | } 590 | ], 591 | "source": "# Instantiate the GridSearchCV object: knn_cv\nknn_cv = GridSearchCV(KNN, parameters, cv=10)\n\n# Fit it to the data\nknn_cv.fit(X_train, Y_train)" 592 | }, 593 | { 594 | "cell_type": "code", 595 | "execution_count": 42, 596 | "metadata": {}, 597 | "outputs": [ 598 | { 599 | "name": "stdout", 600 | "output_type": "stream", 601 | "text": "tuned hpyerparameters :(best parameters) {'algorithm': 'auto', 'n_neighbors': 10, 'p': 1}\naccuracy : 0.8482142857142858\n" 602 | } 603 | ], 604 | "source": "print(\"tuned hpyerparameters :(best parameters) \",knn_cv.best_params_)\nprint(\"accuracy :\",knn_cv.best_score_)" 605 | }, 606 | { 607 | "cell_type": "markdown", 608 | "metadata": {}, 609 | "source": "## TASK 11\n" 610 | }, 611 | { 612 | "cell_type": "markdown", 613 | "metadata": {}, 614 | "source": "Calculate the accuracy of tree_cv on the test data using the method score:\n" 615 | }, 616 | { 617 | "cell_type": "code", 618 | "execution_count": 43, 619 | "metadata": {}, 620 | "outputs": [ 621 | { 622 | "data": { 623 | "text/plain": "0.8333333333333334" 624 | }, 625 | "execution_count": 43, 626 | "metadata": {}, 627 | "output_type": "execute_result" 628 | } 629 | ], 630 | "source": "knn_cv.score(X_test, Y_test)" 631 | }, 632 | { 633 | "cell_type": "markdown", 634 | "metadata": {}, 635 | "source": "We can plot the confusion matrix\n" 636 | }, 637 | { 638 | "cell_type": "code", 639 | "execution_count": 44, 640 | "metadata": {}, 641 | "outputs": [ 642 | { 643 | "data": { 644 | "image/png": "\n", 645 | "text/plain": "
" 646 | }, 647 | "metadata": { 648 | "needs_background": "light" 649 | }, 650 | "output_type": "display_data" 651 | } 652 | ], 653 | "source": "yhat = knn_cv.predict(X_test)\nplot_confusion_matrix(Y_test,yhat)" 654 | }, 655 | { 656 | "cell_type": "markdown", 657 | "metadata": {}, 658 | "source": "## TASK 12\n" 659 | }, 660 | { 661 | "cell_type": "markdown", 662 | "metadata": {}, 663 | "source": "Find the method performs best:\n" 664 | }, 665 | { 666 | "cell_type": "code", 667 | "execution_count": 45, 668 | "metadata": {}, 669 | "outputs": [ 670 | { 671 | "data": { 672 | "text/plain": "0.8333333333333334" 673 | }, 674 | "execution_count": 45, 675 | "metadata": {}, 676 | "output_type": "execute_result" 677 | } 678 | ], 679 | "source": "predictors = [knn_cv, svm_cv, logreg_cv, tree_cv]\nbest_predictor = \"\"\nbest_result = 0\nfor predictor in predictors:\n \n predictor.score(X_test, Y_test)\n" 680 | }, 681 | { 682 | "cell_type": "markdown", 683 | "metadata": {}, 684 | "source": "## Authors\n" 685 | }, 686 | { 687 | "cell_type": "markdown", 688 | "metadata": {}, 689 | "source": "Joseph Santarcangelo has a PhD in Electrical Engineering, his research focused on using machine learning, signal processing, and computer vision to determine how videos impact human cognition. Joseph has been working for IBM since he completed his PhD.\n" 690 | }, 691 | { 692 | "cell_type": "markdown", 693 | "metadata": {}, 694 | "source": "## Change Log\n" 695 | }, 696 | { 697 | "cell_type": "markdown", 698 | "metadata": {}, 699 | "source": "| Date (YYYY-MM-DD) | Version | Changed By | Change Description |\n| ----------------- | ------- | ---------- | ----------------------- |\n| 2020-09-20 | 1.0 | Joseph | Modified Multiple Areas |\n" 700 | }, 701 | { 702 | "cell_type": "markdown", 703 | "metadata": {}, 704 | "source": "Copyright \u00a9 2020 IBM Corporation. All rights reserved.\n" 705 | } 706 | ], 707 | "metadata": { 708 | "kernelspec": { 709 | "display_name": "Python 3.8", 710 | "language": "python", 711 | "name": "python3" 712 | }, 713 | "language_info": { 714 | "codemirror_mode": { 715 | "name": "ipython", 716 | "version": 3 717 | }, 718 | "file_extension": ".py", 719 | "mimetype": "text/x-python", 720 | "name": "python", 721 | "nbconvert_exporter": "python", 722 | "pygments_lexer": "ipython3", 723 | "version": "3.8.10" 724 | } 725 | }, 726 | "nbformat": 4, 727 | "nbformat_minor": 4 728 | } --------------------------------------------------------------------------------