├── Amazon Web Scraper Project.ipynb
├── Automate API Extraction + Appending Data + Extra -- Project.ipynb
├── COVID Portfolio Project - Data Exploration.sql
├── CovidDeaths.xlsx
├── CovidVaccinations.xlsx
├── Data Cleaning Portfolio Project Queries.sql
├── Global YouTube Statistics.csv
├── Movie Portfolio Project.ipynb
├── Nashville Housing Data for Data Cleaning (reuploaded).xlsx
├── Nashville Housing Data for Data Cleaning.xlsx
├── README.md
├── Tableau Joins File.xlsx
├── Tableau Portfolio Project SQL Queries.sql
├── app.py
├── app_starting_code.py
└── us-housing-app.zip


/Amazon Web Scraper Project.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "f236cbb9",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "# import libraries \n",
 11 |     "\n",
 12 |     "from bs4 import BeautifulSoup\n",
 13 |     "import requests\n",
 14 |     "import time\n",
 15 |     "import datetime\n",
 16 |     "\n",
 17 |     "import smtplib\n"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 16,
 23 |    "id": "9b531b61",
 24 |    "metadata": {},
 25 |    "outputs": [
 26 |     {
 27 |      "name": "stdout",
 28 |      "output_type": "stream",
 29 |      "text": [
 30 |       "\n",
 31 |       "                   Funny Got Data MIS Data Systems Business Analyst T-Shirt\n",
 32 |       "                  \n",
 33 |       "\n",
 34 |       "                    $16.99\n",
 35 |       "                   \n"
 36 |      ]
 37 |     }
 38 |    ],
 39 |    "source": [
 40 |     "# Connect to Website and pull in data\n",
 41 |     "\n",
 42 |     "URL = 'https://www.amazon.com/Funny-Data-Systems-Business-Analyst/dp/B07FNW9FGJ/ref=sr_1_3?dchild=1&keywords=data%2Banalyst%2Btshirt&qid=1626655184&sr=8-3&customId=B0752XJYNL&th=1'\n",
 43 |     "\n",
 44 |     "headers = {\"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36\", \"Accept-Encoding\":\"gzip, deflate\", \"Accept\":\"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\", \"DNT\":\"1\",\"Connection\":\"close\", \"Upgrade-Insecure-Requests\":\"1\"}\n",
 45 |     "\n",
 46 |     "page = requests.get(URL, headers=headers)\n",
 47 |     "\n",
 48 |     "soup1 = BeautifulSoup(page.content, \"html.parser\")\n",
 49 |     "\n",
 50 |     "soup2 = BeautifulSoup(soup1.prettify(), \"html.parser\")\n",
 51 |     "\n",
 52 |     "title = soup2.find(id='productTitle').get_text()\n",
 53 |     "\n",
 54 |     "price = soup2.find(id='priceblock_ourprice').get_text()\n",
 55 |     "\n",
 56 |     "\n",
 57 |     "print(title)\n",
 58 |     "print(price)\n",
 59 |     "\n",
 60 |     "\n"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 17,
 66 |    "id": "b6f7d66e",
 67 |    "metadata": {},
 68 |    "outputs": [
 69 |     {
 70 |      "name": "stdout",
 71 |      "output_type": "stream",
 72 |      "text": [
 73 |       "Funny Got Data MIS Data Systems Business Analyst T-Shirt\n",
 74 |       "16.99\n"
 75 |      ]
 76 |     }
 77 |    ],
 78 |    "source": [
 79 |     "# Clean up the data a little bit\n",
 80 |     "\n",
 81 |     "price = price.strip()[1:]\n",
 82 |     "title = title.strip()\n",
 83 |     "\n",
 84 |     "print(title)\n",
 85 |     "print(price)"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 21,
 91 |    "id": "4f021c23",
 92 |    "metadata": {},
 93 |    "outputs": [
 94 |     {
 95 |      "name": "stdout",
 96 |      "output_type": "stream",
 97 |      "text": [
 98 |       "2021-08-21\n"
 99 |      ]
100 |     }
101 |    ],
102 |    "source": [
103 |     "# Create a Timestamp for your output to track when data was collected\n",
104 |     "\n",
105 |     "import datetime\n",
106 |     "\n",
107 |     "today = datetime.date.today()\n",
108 |     "\n",
109 |     "print(today)\n"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": 22,
115 |    "id": "14d703ca",
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "# Create CSV and write headers and data into the file\n",
120 |     "\n",
121 |     "import csv \n",
122 |     "\n",
123 |     "header = ['Title', 'Price', 'Date']\n",
124 |     "data = [title, price, today]\n",
125 |     "\n",
126 |     "\n",
127 |     "with open('AmazonWebScraperDataset.csv', 'w', newline='', encoding='UTF8') as f:\n",
128 |     "    writer = csv.writer(f)\n",
129 |     "    writer.writerow(header)\n",
130 |     "    writer.writerow(data)\n",
131 |     "    \n"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "id": "d07eeb86",
138 |    "metadata": {},
139 |    "outputs": [],
140 |    "source": [
141 |     "import pandas as pd\n",
142 |     "\n",
143 |     "df = pd.read_csv(r'C:\\Users\\alexf\\AmazonWebScraperDataset.csv')\n",
144 |     "\n",
145 |     "print(df)\n",
146 |     "\n"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 29,
152 |    "id": "6b05c1eb",
153 |    "metadata": {},
154 |    "outputs": [],
155 |    "source": [
156 |     "#Now we are appending data to the csv\n",
157 |     "\n",
158 |     "with open('AmazonWebScraperDataset.csv', 'a+', newline='', encoding='UTF8') as f:\n",
159 |     "    writer = csv.writer(f)\n",
160 |     "    writer.writerow(data)"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": 31,
166 |    "id": "8e95b9e0",
167 |    "metadata": {},
168 |    "outputs": [],
169 |    "source": [
170 |     "#Combine all of the above code into one function\n",
171 |     "\n",
172 |     "\n",
173 |     "def check_price():\n",
174 |     "    URL = 'https://www.amazon.com/Funny-Data-Systems-Business-Analyst/dp/B07FNW9FGJ/ref=sr_1_3?dchild=1&keywords=data%2Banalyst%2Btshirt&qid=1626655184&sr=8-3&customId=B0752XJYNL&th=1'\n",
175 |     "\n",
176 |     "    headers = {\"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36\", \"Accept-Encoding\":\"gzip, deflate\", \"Accept\":\"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\", \"DNT\":\"1\",\"Connection\":\"close\", \"Upgrade-Insecure-Requests\":\"1\"}\n",
177 |     "\n",
178 |     "    page = requests.get(URL, headers=headers)\n",
179 |     "\n",
180 |     "    soup1 = BeautifulSoup(page.content, \"html.parser\")\n",
181 |     "\n",
182 |     "    soup2 = BeautifulSoup(soup1.prettify(), \"html.parser\")\n",
183 |     "\n",
184 |     "    title = soup2.find(id='productTitle').get_text()\n",
185 |     "\n",
186 |     "    price = soup2.find(id='priceblock_ourprice').get_text()\n",
187 |     "\n",
188 |     "    price = price.strip()[1:]\n",
189 |     "    title = title.strip()\n",
190 |     "\n",
191 |     "    import datetime\n",
192 |     "\n",
193 |     "    today = datetime.date.today()\n",
194 |     "    \n",
195 |     "    import csv \n",
196 |     "\n",
197 |     "    header = ['Title', 'Price', 'Date']\n",
198 |     "    data = [title, price, today]\n",
199 |     "\n",
200 |     "    with open('AmazonWebScraperDataset.csv', 'a+', newline='', encoding='UTF8') as f:\n",
201 |     "        writer = csv.writer(f)\n",
202 |     "        writer.writerow(data)\n",
203 |     " \n",
204 |     "    "
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": null,
210 |    "id": "c72f2c4e",
211 |    "metadata": {},
212 |    "outputs": [],
213 |    "source": [
214 |     "# Runs check_price after a set time and inputs data into your CSV\n",
215 |     "\n",
216 |     "while(True):\n",
217 |     "    check_price()\n",
218 |     "    time.sleep(86400)"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": null,
224 |    "id": "00af7126",
225 |    "metadata": {},
226 |    "outputs": [],
227 |    "source": [
228 |     "import pandas as pd\n",
229 |     "\n",
230 |     "df = pd.read_csv(r'C:\\Users\\alexf\\AmazonWebScraperDataset.csv')\n",
231 |     "\n",
232 |     "print(df)"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "code",
237 |    "execution_count": null,
238 |    "id": "d14fce5f",
239 |    "metadata": {},
240 |    "outputs": [],
241 |    "source": [
242 |     "# If uou want to try sending yourself an email (just for fun) when a price hits below a certain level you can try it\n",
243 |     "# out with this script\n",
244 |     "\n",
245 |     "def send_mail():\n",
246 |     "    server = smtplib.SMTP_SSL('smtp.gmail.com',465)\n",
247 |     "    server.ehlo()\n",
248 |     "    #server.starttls()\n",
249 |     "    server.ehlo()\n",
250 |     "    server.login('AlexTheAnalyst95@gmail.com','xxxxxxxxxxxxxx')\n",
251 |     "    \n",
252 |     "    subject = \"The Shirt you want is below $15! Now is your chance to buy!\"\n",
253 |     "    body = \"Alex, This is the moment we have been waiting for. Now is your chance to pick up the shirt of your dreams. Don't mess it up! Link here: https://www.amazon.com/Funny-Data-Systems-Business-Analyst/dp/B07FNW9FGJ/ref=sr_1_3?dchild=1&keywords=data+analyst+tshirt&qid=1626655184&sr=8-3\"\n",
254 |     "   \n",
255 |     "    msg = f\"Subject: {subject}\\n\\n{body}\"\n",
256 |     "    \n",
257 |     "    server.sendmail(\n",
258 |     "        'AlexTheAnalyst95@gmail.com',\n",
259 |     "        msg\n",
260 |     "     \n",
261 |     "    )"
262 |    ]
263 |   }
264 |  ],
265 |  "metadata": {
266 |   "kernelspec": {
267 |    "display_name": "Python 3",
268 |    "language": "python",
269 |    "name": "python3"
270 |   },
271 |   "language_info": {
272 |    "codemirror_mode": {
273 |     "name": "ipython",
274 |     "version": 3
275 |    },
276 |    "file_extension": ".py",
277 |    "mimetype": "text/x-python",
278 |    "name": "python",
279 |    "nbconvert_exporter": "python",
280 |    "pygments_lexer": "ipython3",
281 |    "version": "3.8.8"
282 |   }
283 |  },
284 |  "nbformat": 4,
285 |  "nbformat_minor": 5
286 | }
287 | 


--------------------------------------------------------------------------------
/Automate API Extraction + Appending Data + Extra -- Project.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 6,
  6 |    "id": "70cbe983",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "from requests import Request, Session\n",
 11 |     "from requests.exceptions import ConnectionError, Timeout, TooManyRedirects\n",
 12 |     "import json\n",
 13 |     "\n",
 14 |     "url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest' \n",
 15 |     "#Original Sandbox Environment: 'https://sandbox-api.coinmarketcap.com/v1/cryptocurrency/listings/latest'\n",
 16 |     "parameters = {\n",
 17 |     "  'start':'1',\n",
 18 |     "  'limit':'15',\n",
 19 |     "  'convert':'USD'\n",
 20 |     "}\n",
 21 |     "headers = {\n",
 22 |     "  'Accepts': 'application/json',\n",
 23 |     "  'X-CMC_PRO_API_KEY': '0ad53085-1cb2-4eb8-ad9e-3ffbd7e56509',\n",
 24 |     "}\n",
 25 |     "\n",
 26 |     "session = Session()\n",
 27 |     "session.headers.update(headers)\n",
 28 |     "\n",
 29 |     "try:\n",
 30 |     "  response = session.get(url, params=parameters)\n",
 31 |     "  data = json.loads(response.text)\n",
 32 |     "  #print(data)\n",
 33 |     "except (ConnectionError, Timeout, TooManyRedirects) as e:\n",
 34 |     "  print(e)\n",
 35 |     "\n",
 36 |     "#NOTE:\n",
 37 |     "# I had to go in and put \"jupyter notebook --NotebookApp.iopub_data_rate_limit=1e10\"\n",
 38 |     "# Into the Anaconda Prompt to change this to allow to pull data\n",
 39 |     "\n",
 40 |     "# If that didn't work try using the local host URL as shown in the video"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 63,
 46 |    "id": "31bdff98",
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "type(data)"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 18,
 56 |    "id": "4cbf82ee",
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "import pandas as pd\n",
 61 |     "\n",
 62 |     "\n",
 63 |     "#This allows you to see all the columns, not just like 15\n",
 64 |     "pd.set_option('display.max_columns', None)\n",
 65 |     "pd.set_option('display.max_rows', None)"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 64,
 71 |    "id": "48c3b340",
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "#This normalizes the data and makes it all pretty in a dataframe\n",
 76 |     "\n",
 77 |     "df = pd.json_normalize(data['data'])\n",
 78 |     "df['timestamp'] = pd.to_datetime('now')\n",
 79 |     "df"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 60,
 85 |    "id": "d792e388",
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "\n",
 90 |     "def api_runner():\n",
 91 |     "    global df\n",
 92 |     "    url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest' \n",
 93 |     "    #Original Sandbox Environment: 'https://sandbox-api.coinmarketcap.com/v1/cryptocurrency/listings/latest'\n",
 94 |     "    parameters = {\n",
 95 |     "      'start':'1',\n",
 96 |     "      'limit':'15',\n",
 97 |     "      'convert':'USD'\n",
 98 |     "    }\n",
 99 |     "    headers = {\n",
100 |     "      'Accepts': 'application/json',\n",
101 |     "      'X-CMC_PRO_API_KEY': '0ad53085-1cb2-4eb8-ad9e-3ffbd7e56509',\n",
102 |     "    }\n",
103 |     "\n",
104 |     "    session = Session()\n",
105 |     "    session.headers.update(headers)\n",
106 |     "\n",
107 |     "    try:\n",
108 |     "      response = session.get(url, params=parameters)\n",
109 |     "      data = json.loads(response.text)\n",
110 |     "      #print(data)\n",
111 |     "    except (ConnectionError, Timeout, TooManyRedirects) as e:\n",
112 |     "      print(e)\n",
113 |     "\n",
114 |     "#NOTE:\n",
115 |     "# I had to go in and put \"jupyter notebook --NotebookApp.iopub_data_rate_limit=1e10\"\n",
116 |     "# Into the Anaconda Prompt to change this to allow to pull data\n",
117 |     "    \n",
118 |     "    # Use this if you just want to keep it in a dataframe\n",
119 |     "    df2 = pd.json_normalize(data['data'])\n",
120 |     "    df2['Timestamp'] = pd.to_datetime('now')\n",
121 |     "    df = df.append(df2)\n",
122 |     "\n",
123 |     "\n",
124 |     "    # Use this if you want to create a csv and append data to it\n",
125 |     "    #df = pd.json_normalize(data['data'])\n",
126 |     "    #df['timestamp'] = pd.to_datetime('now')\n",
127 |     "    #df\n",
128 |     "\n",
129 |     "    #if not os.path.isfile(r'C:\\Users\\alexf\\OneDrive\\Documents\\Python Scripts\\API.csv'):\n",
130 |     "        #df.to_csv(r'C:\\Users\\alexf\\OneDrive\\Documents\\Python Scripts\\API.csv', header='column_names')\n",
131 |     "    #else:\n",
132 |     "        #df.to_csv(r'C:\\Users\\alexf\\OneDrive\\Documents\\Python Scripts\\API.csv', mode='a', header=False)\n",
133 |     "        \n",
134 |     "    #Then to read in the file: df = pd.read_csv(r'C:\\Users\\alexf\\OneDrive\\Documents\\Python Scripts\\API.csv')\n",
135 |     "\n",
136 |     "# If that didn't work try using the local host URL as shown in the video"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": 67,
142 |    "id": "9e272cea",
143 |    "metadata": {},
144 |    "outputs": [],
145 |    "source": [
146 |     "import os \n",
147 |     "from time import time\n",
148 |     "from time import sleep\n",
149 |     "\n",
150 |     "for i in range(333):\n",
151 |     "    api_runner()\n",
152 |     "    print('API Runner completed')\n",
153 |     "    sleep(60) #sleep for 1 minute\n",
154 |     "exit()"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": 68,
160 |    "id": "bf9a55d9",
161 |    "metadata": {},
162 |    "outputs": [],
163 |    "source": [
164 |     "df72 = pd.read_csv(r'C:\\Users\\alexf\\OneDrive\\Documents\\Python Scripts\\API.csv')\n",
165 |     "df72"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": 69,
171 |    "id": "8902053b",
172 |    "metadata": {},
173 |    "outputs": [],
174 |    "source": [
175 |     "df"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": 22,
181 |    "id": "b7c56101",
182 |    "metadata": {},
183 |    "outputs": [],
184 |    "source": [
185 |     "# One thing I noticed was the scientific notation. I like it, but I want to be able to see the numbers in this case\n",
186 |     "\n",
187 |     "pd.set_option('display.float_format', lambda x: '%.5f' % x)"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": 70,
193 |    "id": "56b5a577",
194 |    "metadata": {},
195 |    "outputs": [],
196 |    "source": [
197 |     "df"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": 71,
203 |    "id": "e4227c53",
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "# Now let's look at the coin trends over time\n",
208 |     "\n",
209 |     "df3 = df.groupby('name', sort=False)[['quote.USD.percent_change_1h','quote.USD.percent_change_24h','quote.USD.percent_change_7d','quote.USD.percent_change_30d','quote.USD.percent_change_60d','quote.USD.percent_change_90d']].mean()\n",
210 |     "df3"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": 72,
216 |    "id": "f3e2d1db",
217 |    "metadata": {},
218 |    "outputs": [],
219 |    "source": [
220 |     "df4 = df3.stack()\n",
221 |     "df4"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": 73,
227 |    "id": "2ef8ee34",
228 |    "metadata": {},
229 |    "outputs": [],
230 |    "source": [
231 |     "type(df4)"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": 74,
237 |    "id": "4b7b94bf",
238 |    "metadata": {},
239 |    "outputs": [],
240 |    "source": [
241 |     "df5 = df4.to_frame(name='values')\n",
242 |     "df5"
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "code",
247 |    "execution_count": 75,
248 |    "id": "b8125368",
249 |    "metadata": {},
250 |    "outputs": [],
251 |    "source": [
252 |     "df5.count()"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": 76,
258 |    "id": "fc6ade71",
259 |    "metadata": {},
260 |    "outputs": [],
261 |    "source": [
262 |     "#Because of how it's structured above we need to set an index. I don't want to pass a column as an index for this dataframe\n",
263 |     "#So I'm going to create a range and pass that as the dataframe. You can make this more dynamic, but I'm just going to hard code it\n",
264 |     "\n",
265 |     "\n",
266 |     "index = pd.Index(range(90))\n",
267 |     "\n",
268 |     "# Set the above DataFrame index object as the index\n",
269 |     "# using set_index() function\n",
270 |     "df6 = df5.set_index(index)\n",
271 |     "df6\n",
272 |     "\n",
273 |     "# If it only has the index and values try doing reset_index like \"df5.reset_index()\""
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": 77,
279 |    "id": "7d13cd4d",
280 |    "metadata": {},
281 |    "outputs": [],
282 |    "source": [
283 |     "# Change the column name\n",
284 |     "\n",
285 |     "df7 = df6.rename(columns={'level_1': 'percent_change'})\n",
286 |     "df7"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "code",
291 |    "execution_count": 78,
292 |    "id": "a72480a3",
293 |    "metadata": {},
294 |    "outputs": [],
295 |    "source": [
296 |     "df7['percent_change'] = df7['percent_change'].replace(['quote.USD.percent_change_24h','quote.USD.percent_change_7d','quote.USD.percent_change_30d','quote.USD.percent_change_60d','quote.USD.percent_change_90d'],['24h','7d','30d','60d','90d'])\n",
297 |     "df7"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "code",
302 |    "execution_count": 47,
303 |    "id": "16a3121f",
304 |    "metadata": {},
305 |    "outputs": [],
306 |    "source": [
307 |     "import seaborn as sns\n",
308 |     "import matplotlib.pyplot as plt"
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "code",
313 |    "execution_count": 79,
314 |    "id": "c287a308",
315 |    "metadata": {},
316 |    "outputs": [],
317 |    "source": [
318 |     "sns.catplot(x='percent_change', y='values', hue='name', data=df7, kind='point')"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": 80,
324 |    "id": "2915d494",
325 |    "metadata": {},
326 |    "outputs": [],
327 |    "source": [
328 |     "# Now to do something much simpler\n",
329 |     "# we are going to create a dataframe with the columns we want\n",
330 |     "\n",
331 |     "df10 = df[['name','quote.USD.price','timestamp']]\n",
332 |     "df10 = df10.query(\"name == 'Bitcoin'\")\n",
333 |     "df10"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "code",
338 |    "execution_count": 81,
339 |    "id": "ae8459af",
340 |    "metadata": {},
341 |    "outputs": [],
342 |    "source": [
343 |     "sns.set_theme(style=\"darkgrid\")\n",
344 |     "\n",
345 |     "sns.lineplot(x='timestamp', y='quote.USD.price', data = df10)"
346 |    ]
347 |   },
348 |   {
349 |    "cell_type": "code",
350 |    "execution_count": null,
351 |    "id": "db10f9de",
352 |    "metadata": {},
353 |    "outputs": [],
354 |    "source": []
355 |   }
356 |  ],
357 |  "metadata": {
358 |   "kernelspec": {
359 |    "display_name": "Python 3",
360 |    "language": "python",
361 |    "name": "python3"
362 |   },
363 |   "language_info": {
364 |    "codemirror_mode": {
365 |     "name": "ipython",
366 |     "version": 3
367 |    },
368 |    "file_extension": ".py",
369 |    "mimetype": "text/x-python",
370 |    "name": "python",
371 |    "nbconvert_exporter": "python",
372 |    "pygments_lexer": "ipython3",
373 |    "version": "3.8.8"
374 |   }
375 |  },
376 |  "nbformat": 4,
377 |  "nbformat_minor": 5
378 | }
379 | 


--------------------------------------------------------------------------------
/COVID Portfolio Project - Data Exploration.sql:
--------------------------------------------------------------------------------
  1 | /*
  2 | Covid 19 Data Exploration 
  3 | 
  4 | Skills used: Joins, CTE's, Temp Tables, Windows Functions, Aggregate Functions, Creating Views, Converting Data Types
  5 | 
  6 | */
  7 | 
  8 | Select *
  9 | From PortfolioProject..CovidDeaths
 10 | Where continent is not null 
 11 | order by 3,4
 12 | 
 13 | 
 14 | -- Select Data that we are going to be starting with
 15 | 
 16 | Select Location, date, total_cases, new_cases, total_deaths, population
 17 | From PortfolioProject..CovidDeaths
 18 | Where continent is not null 
 19 | order by 1,2
 20 | 
 21 | 
 22 | -- Total Cases vs Total Deaths
 23 | -- Shows likelihood of dying if you contract covid in your country
 24 | 
 25 | Select Location, date, total_cases,total_deaths, (total_deaths/total_cases)*100 as DeathPercentage
 26 | From PortfolioProject..CovidDeaths
 27 | Where location like '%states%'
 28 | and continent is not null 
 29 | order by 1,2
 30 | 
 31 | 
 32 | -- Total Cases vs Population
 33 | -- Shows what percentage of population infected with Covid
 34 | 
 35 | Select Location, date, Population, total_cases,  (total_cases/population)*100 as PercentPopulationInfected
 36 | From PortfolioProject..CovidDeaths
 37 | --Where location like '%states%'
 38 | order by 1,2
 39 | 
 40 | 
 41 | -- Countries with Highest Infection Rate compared to Population
 42 | 
 43 | Select Location, Population, MAX(total_cases) as HighestInfectionCount,  Max((total_cases/population))*100 as PercentPopulationInfected
 44 | From PortfolioProject..CovidDeaths
 45 | --Where location like '%states%'
 46 | Group by Location, Population
 47 | order by PercentPopulationInfected desc
 48 | 
 49 | 
 50 | -- Countries with Highest Death Count per Population
 51 | 
 52 | Select Location, MAX(cast(Total_deaths as int)) as TotalDeathCount
 53 | From PortfolioProject..CovidDeaths
 54 | --Where location like '%states%'
 55 | Where continent is not null 
 56 | Group by Location
 57 | order by TotalDeathCount desc
 58 | 
 59 | 
 60 | 
 61 | -- BREAKING THINGS DOWN BY CONTINENT
 62 | 
 63 | -- Showing contintents with the highest death count per population
 64 | 
 65 | Select continent, MAX(cast(Total_deaths as int)) as TotalDeathCount
 66 | From PortfolioProject..CovidDeaths
 67 | --Where location like '%states%'
 68 | Where continent is not null 
 69 | Group by continent
 70 | order by TotalDeathCount desc
 71 | 
 72 | 
 73 | 
 74 | -- GLOBAL NUMBERS
 75 | 
 76 | Select SUM(new_cases) as total_cases, SUM(cast(new_deaths as int)) as total_deaths, SUM(cast(new_deaths as int))/SUM(New_Cases)*100 as DeathPercentage
 77 | From PortfolioProject..CovidDeaths
 78 | --Where location like '%states%'
 79 | where continent is not null 
 80 | --Group By date
 81 | order by 1,2
 82 | 
 83 | 
 84 | 
 85 | -- Total Population vs Vaccinations
 86 | -- Shows Percentage of Population that has recieved at least one Covid Vaccine
 87 | 
 88 | Select dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations
 89 | , SUM(CONVERT(int,vac.new_vaccinations)) OVER (Partition by dea.Location Order by dea.location, dea.Date) as RollingPeopleVaccinated
 90 | --, (RollingPeopleVaccinated/population)*100
 91 | From PortfolioProject..CovidDeaths dea
 92 | Join PortfolioProject..CovidVaccinations vac
 93 | 	On dea.location = vac.location
 94 | 	and dea.date = vac.date
 95 | where dea.continent is not null 
 96 | order by 2,3
 97 | 
 98 | 
 99 | -- Using CTE to perform Calculation on Partition By in previous query
100 | 
101 | With PopvsVac (Continent, Location, Date, Population, New_Vaccinations, RollingPeopleVaccinated)
102 | as
103 | (
104 | Select dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations
105 | , SUM(CONVERT(int,vac.new_vaccinations)) OVER (Partition by dea.Location Order by dea.location, dea.Date) as RollingPeopleVaccinated
106 | --, (RollingPeopleVaccinated/population)*100
107 | From PortfolioProject..CovidDeaths dea
108 | Join PortfolioProject..CovidVaccinations vac
109 | 	On dea.location = vac.location
110 | 	and dea.date = vac.date
111 | where dea.continent is not null 
112 | --order by 2,3
113 | )
114 | Select *, (RollingPeopleVaccinated/Population)*100
115 | From PopvsVac
116 | 
117 | 
118 | 
119 | -- Using Temp Table to perform Calculation on Partition By in previous query
120 | 
121 | DROP Table if exists #PercentPopulationVaccinated
122 | Create Table #PercentPopulationVaccinated
123 | (
124 | Continent nvarchar(255),
125 | Location nvarchar(255),
126 | Date datetime,
127 | Population numeric,
128 | New_vaccinations numeric,
129 | RollingPeopleVaccinated numeric
130 | )
131 | 
132 | Insert into #PercentPopulationVaccinated
133 | Select dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations
134 | , SUM(CONVERT(int,vac.new_vaccinations)) OVER (Partition by dea.Location Order by dea.location, dea.Date) as RollingPeopleVaccinated
135 | --, (RollingPeopleVaccinated/population)*100
136 | From PortfolioProject..CovidDeaths dea
137 | Join PortfolioProject..CovidVaccinations vac
138 | 	On dea.location = vac.location
139 | 	and dea.date = vac.date
140 | --where dea.continent is not null 
141 | --order by 2,3
142 | 
143 | Select *, (RollingPeopleVaccinated/Population)*100
144 | From #PercentPopulationVaccinated
145 | 
146 | 
147 | 
148 | 
149 | -- Creating View to store data for later visualizations
150 | 
151 | Create View PercentPopulationVaccinated as
152 | Select dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations
153 | , SUM(CONVERT(int,vac.new_vaccinations)) OVER (Partition by dea.Location Order by dea.location, dea.Date) as RollingPeopleVaccinated
154 | --, (RollingPeopleVaccinated/population)*100
155 | From PortfolioProject..CovidDeaths dea
156 | Join PortfolioProject..CovidVaccinations vac
157 | 	On dea.location = vac.location
158 | 	and dea.date = vac.date
159 | where dea.continent is not null 
160 | 
161 | 
162 | 


--------------------------------------------------------------------------------
/CovidDeaths.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlexTheAnalyst/PortfolioProjects/39c541bae76eb109652e8d834b0fa2aa3f15fd8b/CovidDeaths.xlsx


--------------------------------------------------------------------------------
/CovidVaccinations.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlexTheAnalyst/PortfolioProjects/39c541bae76eb109652e8d834b0fa2aa3f15fd8b/CovidVaccinations.xlsx


--------------------------------------------------------------------------------
/Data Cleaning Portfolio Project Queries.sql:
--------------------------------------------------------------------------------
  1 | /*
  2 | 
  3 | Cleaning Data in SQL Queries
  4 | 
  5 | */
  6 | 
  7 | 
  8 | Select *
  9 | From PortfolioProject.dbo.NashvilleHousing
 10 | 
 11 | --------------------------------------------------------------------------------------------------------------------------
 12 | 
 13 | -- Standardize Date Format
 14 | 
 15 | 
 16 | Select saleDateConverted, CONVERT(Date,SaleDate)
 17 | From PortfolioProject.dbo.NashvilleHousing
 18 | 
 19 | 
 20 | Update NashvilleHousing
 21 | SET SaleDate = CONVERT(Date,SaleDate)
 22 | 
 23 | -- If it doesn't Update properly
 24 | 
 25 | ALTER TABLE NashvilleHousing
 26 | Add SaleDateConverted Date;
 27 | 
 28 | Update NashvilleHousing
 29 | SET SaleDateConverted = CONVERT(Date,SaleDate)
 30 | 
 31 | 
 32 |  --------------------------------------------------------------------------------------------------------------------------
 33 | 
 34 | -- Populate Property Address data
 35 | 
 36 | Select *
 37 | From PortfolioProject.dbo.NashvilleHousing
 38 | --Where PropertyAddress is null
 39 | order by ParcelID
 40 | 
 41 | 
 42 | 
 43 | Select a.ParcelID, a.PropertyAddress, b.ParcelID, b.PropertyAddress, ISNULL(a.PropertyAddress,b.PropertyAddress)
 44 | From PortfolioProject.dbo.NashvilleHousing a
 45 | JOIN PortfolioProject.dbo.NashvilleHousing b
 46 | 	on a.ParcelID = b.ParcelID
 47 | 	AND a.[UniqueID ] <> b.[UniqueID ]
 48 | Where a.PropertyAddress is null
 49 | 
 50 | 
 51 | Update a
 52 | SET PropertyAddress = ISNULL(a.PropertyAddress,b.PropertyAddress)
 53 | From PortfolioProject.dbo.NashvilleHousing a
 54 | JOIN PortfolioProject.dbo.NashvilleHousing b
 55 | 	on a.ParcelID = b.ParcelID
 56 | 	AND a.[UniqueID ] <> b.[UniqueID ]
 57 | Where a.PropertyAddress is null
 58 | 
 59 | 
 60 | 
 61 | 
 62 | --------------------------------------------------------------------------------------------------------------------------
 63 | 
 64 | -- Breaking out Address into Individual Columns (Address, City, State)
 65 | 
 66 | 
 67 | Select PropertyAddress
 68 | From PortfolioProject.dbo.NashvilleHousing
 69 | --Where PropertyAddress is null
 70 | --order by ParcelID
 71 | 
 72 | SELECT
 73 | SUBSTRING(PropertyAddress, 1, CHARINDEX(',', PropertyAddress) -1 ) as Address
 74 | , SUBSTRING(PropertyAddress, CHARINDEX(',', PropertyAddress) + 1 , LEN(PropertyAddress)) as Address
 75 | 
 76 | From PortfolioProject.dbo.NashvilleHousing
 77 | 
 78 | 
 79 | ALTER TABLE NashvilleHousing
 80 | Add PropertySplitAddress Nvarchar(255);
 81 | 
 82 | Update NashvilleHousing
 83 | SET PropertySplitAddress = SUBSTRING(PropertyAddress, 1, CHARINDEX(',', PropertyAddress) -1 )
 84 | 
 85 | 
 86 | ALTER TABLE NashvilleHousing
 87 | Add PropertySplitCity Nvarchar(255);
 88 | 
 89 | Update NashvilleHousing
 90 | SET PropertySplitCity = SUBSTRING(PropertyAddress, CHARINDEX(',', PropertyAddress) + 1 , LEN(PropertyAddress))
 91 | 
 92 | 
 93 | 
 94 | 
 95 | Select *
 96 | From PortfolioProject.dbo.NashvilleHousing
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | Select OwnerAddress
103 | From PortfolioProject.dbo.NashvilleHousing
104 | 
105 | 
106 | Select
107 | PARSENAME(REPLACE(OwnerAddress, ',', '.') , 3)
108 | ,PARSENAME(REPLACE(OwnerAddress, ',', '.') , 2)
109 | ,PARSENAME(REPLACE(OwnerAddress, ',', '.') , 1)
110 | From PortfolioProject.dbo.NashvilleHousing
111 | 
112 | 
113 | 
114 | ALTER TABLE NashvilleHousing
115 | Add OwnerSplitAddress Nvarchar(255);
116 | 
117 | Update NashvilleHousing
118 | SET OwnerSplitAddress = PARSENAME(REPLACE(OwnerAddress, ',', '.') , 3)
119 | 
120 | 
121 | ALTER TABLE NashvilleHousing
122 | Add OwnerSplitCity Nvarchar(255);
123 | 
124 | Update NashvilleHousing
125 | SET OwnerSplitCity = PARSENAME(REPLACE(OwnerAddress, ',', '.') , 2)
126 | 
127 | 
128 | 
129 | ALTER TABLE NashvilleHousing
130 | Add OwnerSplitState Nvarchar(255);
131 | 
132 | Update NashvilleHousing
133 | SET OwnerSplitState = PARSENAME(REPLACE(OwnerAddress, ',', '.') , 1)
134 | 
135 | 
136 | 
137 | Select *
138 | From PortfolioProject.dbo.NashvilleHousing
139 | 
140 | 
141 | 
142 | 
143 | --------------------------------------------------------------------------------------------------------------------------
144 | 
145 | 
146 | -- Change Y and N to Yes and No in "Sold as Vacant" field
147 | 
148 | 
149 | Select Distinct(SoldAsVacant), Count(SoldAsVacant)
150 | From PortfolioProject.dbo.NashvilleHousing
151 | Group by SoldAsVacant
152 | order by 2
153 | 
154 | 
155 | 
156 | 
157 | Select SoldAsVacant
158 | , CASE When SoldAsVacant = 'Y' THEN 'Yes'
159 | 	   When SoldAsVacant = 'N' THEN 'No'
160 | 	   ELSE SoldAsVacant
161 | 	   END
162 | From PortfolioProject.dbo.NashvilleHousing
163 | 
164 | 
165 | Update NashvilleHousing
166 | SET SoldAsVacant = CASE When SoldAsVacant = 'Y' THEN 'Yes'
167 | 	   When SoldAsVacant = 'N' THEN 'No'
168 | 	   ELSE SoldAsVacant
169 | 	   END
170 | 
171 | 
172 | 
173 | 
174 | 
175 | 
176 | -----------------------------------------------------------------------------------------------------------------------------------------------------------
177 | 
178 | -- Remove Duplicates
179 | 
180 | WITH RowNumCTE AS(
181 | Select *,
182 | 	ROW_NUMBER() OVER (
183 | 	PARTITION BY ParcelID,
184 | 				 PropertyAddress,
185 | 				 SalePrice,
186 | 				 SaleDate,
187 | 				 LegalReference
188 | 				 ORDER BY
189 | 					UniqueID
190 | 					) row_num
191 | 
192 | From PortfolioProject.dbo.NashvilleHousing
193 | --order by ParcelID
194 | )
195 | Select *
196 | From RowNumCTE
197 | Where row_num > 1
198 | Order by PropertyAddress
199 | 
200 | 
201 | 
202 | Select *
203 | From PortfolioProject.dbo.NashvilleHousing
204 | 
205 | 
206 | 
207 | 
208 | ---------------------------------------------------------------------------------------------------------
209 | 
210 | -- Delete Unused Columns
211 | 
212 | 
213 | 
214 | Select *
215 | From PortfolioProject.dbo.NashvilleHousing
216 | 
217 | 
218 | ALTER TABLE PortfolioProject.dbo.NashvilleHousing
219 | DROP COLUMN OwnerAddress, TaxDistrict, PropertyAddress, SaleDate
220 | 
221 | 
222 | 
223 | 
224 | 
225 | 
226 | 
227 | 
228 | 
229 | 
230 | 
231 | 
232 | 
233 | 
234 | 
235 | -----------------------------------------------------------------------------------------------
236 | -----------------------------------------------------------------------------------------------
237 | 
238 | --- Importing Data using OPENROWSET and BULK INSERT	
239 | 
240 | --  More advanced and looks cooler, but have to configure server appropriately to do correctly
241 | --  Wanted to provide this in case you wanted to try it
242 | 
243 | 
244 | --sp_configure 'show advanced options', 1;
245 | --RECONFIGURE;
246 | --GO
247 | --sp_configure 'Ad Hoc Distributed Queries', 1;
248 | --RECONFIGURE;
249 | --GO
250 | 
251 | 
252 | --USE PortfolioProject 
253 | 
254 | --GO 
255 | 
256 | --EXEC master.dbo.sp_MSset_oledb_prop N'Microsoft.ACE.OLEDB.12.0', N'AllowInProcess', 1 
257 | 
258 | --GO 
259 | 
260 | --EXEC master.dbo.sp_MSset_oledb_prop N'Microsoft.ACE.OLEDB.12.0', N'DynamicParameters', 1 
261 | 
262 | --GO 
263 | 
264 | 
265 | ---- Using BULK INSERT
266 | 
267 | --USE PortfolioProject;
268 | --GO
269 | --BULK INSERT nashvilleHousing FROM 'C:\Temp\SQL Server Management Studio\Nashville Housing Data for Data Cleaning Project.csv'
270 | --   WITH (
271 | --      FIELDTERMINATOR = ',',
272 | --      ROWTERMINATOR = '\n'
273 | --);
274 | --GO
275 | 
276 | 
277 | ---- Using OPENROWSET
278 | --USE PortfolioProject;
279 | --GO
280 | --SELECT * INTO nashvilleHousing
281 | --FROM OPENROWSET('Microsoft.ACE.OLEDB.12.0',
282 | --    'Excel 12.0; Database=C:\Users\alexf\OneDrive\Documents\SQL Server Management Studio\Nashville Housing Data for Data Cleaning Project.csv', [Sheet1$]);
283 | --GO
284 | 
285 | 
286 | 
287 | 
288 | 
289 | 
290 | 
291 | 
292 | 
293 | 
294 | 
295 | 
296 | 
297 | 
298 | 
299 | 
300 | 
301 | 
302 | 


--------------------------------------------------------------------------------
/Nashville Housing Data for Data Cleaning (reuploaded).xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlexTheAnalyst/PortfolioProjects/39c541bae76eb109652e8d834b0fa2aa3f15fd8b/Nashville Housing Data for Data Cleaning (reuploaded).xlsx


--------------------------------------------------------------------------------
/Nashville Housing Data for Data Cleaning.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlexTheAnalyst/PortfolioProjects/39c541bae76eb109652e8d834b0fa2aa3f15fd8b/Nashville Housing Data for Data Cleaning.xlsx


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Data Analyst Portfolio Project Repository
2 | 
3 | This Repository will hold all of the code and queries from the Portfolio Projects we create.
4 | 
5 | Please feel free to take these and run with them. Make them your own and find you own insights
6 | 
7 | I really do hope this is helpful and helps you land that dream job! :D
8 | 


--------------------------------------------------------------------------------
/Tableau Joins File.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlexTheAnalyst/PortfolioProjects/39c541bae76eb109652e8d834b0fa2aa3f15fd8b/Tableau Joins File.xlsx


--------------------------------------------------------------------------------
/Tableau Portfolio Project SQL Queries.sql:
--------------------------------------------------------------------------------
  1 | /*
  2 | 
  3 | Queries used for Tableau Project
  4 | 
  5 | */
  6 | 
  7 | 
  8 | 
  9 | -- 1. 
 10 | 
 11 | Select SUM(new_cases) as total_cases, SUM(cast(new_deaths as int)) as total_deaths, SUM(cast(new_deaths as int))/SUM(New_Cases)*100 as DeathPercentage
 12 | From PortfolioProject..CovidDeaths
 13 | --Where location like '%states%'
 14 | where continent is not null 
 15 | --Group By date
 16 | order by 1,2
 17 | 
 18 | -- Just a double check based off the data provided
 19 | -- numbers are extremely close so we will keep them - The Second includes "International"  Location
 20 | 
 21 | 
 22 | --Select SUM(new_cases) as total_cases, SUM(cast(new_deaths as int)) as total_deaths, SUM(cast(new_deaths as int))/SUM(New_Cases)*100 as DeathPercentage
 23 | --From PortfolioProject..CovidDeaths
 24 | ----Where location like '%states%'
 25 | --where location = 'World'
 26 | ----Group By date
 27 | --order by 1,2
 28 | 
 29 | 
 30 | -- 2. 
 31 | 
 32 | -- We take these out as they are not inluded in the above queries and want to stay consistent
 33 | -- European Union is part of Europe
 34 | 
 35 | Select location, SUM(cast(new_deaths as int)) as TotalDeathCount
 36 | From PortfolioProject..CovidDeaths
 37 | --Where location like '%states%'
 38 | Where continent is null 
 39 | and location not in ('World', 'European Union', 'International')
 40 | Group by location
 41 | order by TotalDeathCount desc
 42 | 
 43 | 
 44 | -- 3.
 45 | 
 46 | Select Location, Population, MAX(total_cases) as HighestInfectionCount,  Max((total_cases/population))*100 as PercentPopulationInfected
 47 | From PortfolioProject..CovidDeaths
 48 | --Where location like '%states%'
 49 | Group by Location, Population
 50 | order by PercentPopulationInfected desc
 51 | 
 52 | 
 53 | -- 4.
 54 | 
 55 | 
 56 | Select Location, Population,date, MAX(total_cases) as HighestInfectionCount,  Max((total_cases/population))*100 as PercentPopulationInfected
 57 | From PortfolioProject..CovidDeaths
 58 | --Where location like '%states%'
 59 | Group by Location, Population, date
 60 | order by PercentPopulationInfected desc
 61 | 
 62 | 
 63 | 
 64 | 
 65 | 
 66 | 
 67 | 
 68 | 
 69 | 
 70 | 
 71 | 
 72 | 
 73 | -- Queries I originally had, but excluded some because it created too long of video
 74 | -- Here only in case you want to check them out
 75 | 
 76 | 
 77 | -- 1.
 78 | 
 79 | Select dea.continent, dea.location, dea.date, dea.population
 80 | , MAX(vac.total_vaccinations) as RollingPeopleVaccinated
 81 | --, (RollingPeopleVaccinated/population)*100
 82 | From PortfolioProject..CovidDeaths dea
 83 | Join PortfolioProject..CovidVaccinations vac
 84 | 	On dea.location = vac.location
 85 | 	and dea.date = vac.date
 86 | where dea.continent is not null 
 87 | group by dea.continent, dea.location, dea.date, dea.population
 88 | order by 1,2,3
 89 | 
 90 | 
 91 | 
 92 | 
 93 | -- 2.
 94 | Select SUM(new_cases) as total_cases, SUM(cast(new_deaths as int)) as total_deaths, SUM(cast(new_deaths as int))/SUM(New_Cases)*100 as DeathPercentage
 95 | From PortfolioProject..CovidDeaths
 96 | --Where location like '%states%'
 97 | where continent is not null 
 98 | --Group By date
 99 | order by 1,2
100 | 
101 | 
102 | -- Just a double check based off the data provided
103 | -- numbers are extremely close so we will keep them - The Second includes "International"  Location
104 | 
105 | 
106 | --Select SUM(new_cases) as total_cases, SUM(cast(new_deaths as int)) as total_deaths, SUM(cast(new_deaths as int))/SUM(New_Cases)*100 as DeathPercentage
107 | --From PortfolioProject..CovidDeaths
108 | ----Where location like '%states%'
109 | --where location = 'World'
110 | ----Group By date
111 | --order by 1,2
112 | 
113 | 
114 | -- 3.
115 | 
116 | -- We take these out as they are not inluded in the above queries and want to stay consistent
117 | -- European Union is part of Europe
118 | 
119 | Select location, SUM(cast(new_deaths as int)) as TotalDeathCount
120 | From PortfolioProject..CovidDeaths
121 | --Where location like '%states%'
122 | Where continent is null 
123 | and location not in ('World', 'European Union', 'International')
124 | Group by location
125 | order by TotalDeathCount desc
126 | 
127 | 
128 | 
129 | -- 4.
130 | 
131 | Select Location, Population, MAX(total_cases) as HighestInfectionCount,  Max((total_cases/population))*100 as PercentPopulationInfected
132 | From PortfolioProject..CovidDeaths
133 | --Where location like '%states%'
134 | Group by Location, Population
135 | order by PercentPopulationInfected desc
136 | 
137 | 
138 | 
139 | -- 5.
140 | 
141 | --Select Location, date, total_cases,total_deaths, (total_deaths/total_cases)*100 as DeathPercentage
142 | --From PortfolioProject..CovidDeaths
143 | ----Where location like '%states%'
144 | --where continent is not null 
145 | --order by 1,2
146 | 
147 | -- took the above query and added population
148 | Select Location, date, population, total_cases, total_deaths
149 | From PortfolioProject..CovidDeaths
150 | --Where location like '%states%'
151 | where continent is not null 
152 | order by 1,2
153 | 
154 | 
155 | -- 6. 
156 | 
157 | 
158 | With PopvsVac (Continent, Location, Date, Population, New_Vaccinations, RollingPeopleVaccinated)
159 | as
160 | (
161 | Select dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations
162 | , SUM(CONVERT(int,vac.new_vaccinations)) OVER (Partition by dea.Location Order by dea.location, dea.Date) as RollingPeopleVaccinated
163 | --, (RollingPeopleVaccinated/population)*100
164 | From PortfolioProject..CovidDeaths dea
165 | Join PortfolioProject..CovidVaccinations vac
166 | 	On dea.location = vac.location
167 | 	and dea.date = vac.date
168 | where dea.continent is not null 
169 | --order by 2,3
170 | )
171 | Select *, (RollingPeopleVaccinated/Population)*100 as PercentPeopleVaccinated
172 | From PopvsVac
173 | 
174 | 
175 | -- 7. 
176 | 
177 | Select Location, Population,date, MAX(total_cases) as HighestInfectionCount,  Max((total_cases/population))*100 as PercentPopulationInfected
178 | From PortfolioProject..CovidDeaths
179 | --Where location like '%states%'
180 | Group by Location, Population, date
181 | order by PercentPopulationInfected desc
182 | 
183 | 
184 | 
185 | 
186 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from pathlib import Path
  3 | 
  4 | import pandas as pd
  5 | import plotly.express as px
  6 | from faicons import icon_svg
  7 | from shinywidgets import render_plotly
  8 | from state_choices import STATE_CHOICES
  9 | 
 10 | from shiny import reactive
 11 | from shiny.express import input, render, ui
 12 | 
 13 | # ---------------------------------------------------------------------
 14 | # Reading in Files
 15 | # ---------------------------------------------------------------------
 16 | new_listings_df = pd.read_csv(Path(__file__).parent / "Metro_new_listings_uc_sfrcondo_sm_month.csv")
 17 | median_listing_price_df = pd.read_csv(Path(__file__).parent / "Metro_mlp_uc_sfrcondo_sm_month.csv")
 18 | for_sale_inventory_df = pd.read_csv(Path(__file__).parent / "Metro_invt_fs_uc_sfrcondo_sm_month.csv")
 19 | 
 20 | 
 21 | # ---------------------------------------------------------------------
 22 | # Helper functions - converting to DateTime
 23 | # ---------------------------------------------------------------------
 24 | def string_to_date(date_str):
 25 |     return datetime.strptime(date_str, "%Y-%m-%d").date()
 26 | 
 27 | 
 28 | def filter_by_date(df: pd.DataFrame,date_range: tuple):
 29 |     rng = sorted(date_range)
 30 |     dates = pd.to_datetime(df["Date"], format="%Y-%m-%d").dt.date
 31 |     return df[(dates >= rng[0]) & (dates <= rng[1])]
 32 | 
 33 | 
 34 | # ---------------------------------------------------------------------
 35 | # Visualizations
 36 | # ---------------------------------------------------------------------
 37 | 
 38 | #for_sale_inventory_df2 = for_sale_inventory_df["StateName"].fillna("United States")
 39 | #for_sale_inventory_df2 = for_sale_inventory_df["StateName"].drop_duplicates()
 40 | #for_sale_inventory_df2 = for_sale_inventory_df2.sort_values().tolist()
 41 | 
 42 | 
 43 | ui.page_opts(title= "US Housing App")
 44 | 
 45 | 
 46 | 
 47 | 
 48 | 
 49 | with ui.sidebar():
 50 |     ui.input_select("state","Filter by State", choices=STATE_CHOICES),
 51 |     ui.input_slider("date_range","Filter by Date Range",
 52 |                 min = string_to_date("2018-3-31"),
 53 |                 max = string_to_date("2024-4-30"),
 54 |                 value = [string_to_date(x) for x in ["2018-3-31","2024-4-30"]])
 55 |     
 56 | with ui.layout_column_wrap():
 57 |     with ui.value_box(showcase = icon_svg("dollar-sign")):
 58 |         "Current Median List Price"
 59 | 
 60 |         @render.ui
 61 |         def price():
 62 |             date_columns = median_listing_price_df.columns[6:]
 63 |             states = median_listing_price_df.groupby("StateName").mean(numeric_only=True)
 64 |             dates = states[date_columns].reset_index()
 65 |             states = dates.melt(id_vars=["StateName"], var_name="Date", value_name="Value")
 66 |             country = median_listing_price_df[median_listing_price_df["RegionType"] == "country"]
 67 |             country_dates = country[date_columns].reset_index()
 68 |             country_dates["StateName"] = "United States"
 69 |             country = country_dates.melt(
 70 |                 id_vars=["StateName"], var_name="Date", value_name="Value"
 71 |             )
 72 | 
 73 |             res = pd.concat([states, country])
 74 | 
 75 |             res = res[res["Date"] != "index"]
 76 | 
 77 |             df = res[res["StateName"] == input.state()]
 78 | 
 79 |             last_value = df.iloc[-1,-1]
 80 |             return f"${last_value:,.0f}"
 81 | 
 82 |     with ui.value_box(showcase = icon_svg("house")):
 83 |         "Home Inventory % Change"
 84 |         @render.ui
 85 |         def change():
 86 |             date_columns = median_listing_price_df.columns[6:]
 87 |             states = median_listing_price_df.groupby("StateName").mean(numeric_only=True)
 88 |             dates = states[date_columns].reset_index()
 89 |             states = dates.melt(id_vars=["StateName"], var_name="Date", value_name="Value")
 90 |             country = median_listing_price_df[median_listing_price_df["RegionType"] == "country"]
 91 |             country_dates = country[date_columns].reset_index()
 92 |             country_dates["StateName"] = "United States"
 93 |             country = country_dates.melt(
 94 |                 id_vars=["StateName"], var_name="Date", value_name="Value"
 95 |             )
 96 | 
 97 |             res = pd.concat([states, country])
 98 | 
 99 |             res = res[res["Date"] != "index"]
100 | 
101 |             df = res[res["StateName"] == input.state()]
102 | 
103 |             last_value = df.iloc[-1,-1]
104 |             second_last_value = df.iloc[-2,-1]
105 | 
106 |             percentage_change = ((last_value - second_last_value)/second_last_value *100)
107 |             sign = "+" if percentage_change > 0 else "-"
108 |             return f"{sign}{percentage_change:.2f}%"
109 | 
110 | # Plotly visualization of Median Home Price Per State
111 |     
112 | with ui.navset_card_underline(title = "Median List Price"):
113 | 
114 |     with ui.nav_panel("Plot", icon = icon_svg("chart-line")):
115 | 
116 |         @render_plotly
117 |         def list_price_plot():
118 |             # Grouping by State Name and specifying the Date Columns
119 |             price_grouped = median_listing_price_df.groupby('StateName').mean(numeric_only=True)     
120 |             date_columns = median_listing_price_df.columns[6:]
121 |             price_grouped_dates = price_grouped[date_columns].reset_index()   
122 |             price_df_for_viz = price_grouped_dates.melt(id_vars=["StateName"], var_name="Date", value_name="Value")
123 | 
124 |             price_df_for_viz = filter_by_date(price_df_for_viz, input.date_range())
125 | 
126 |             if input.state() == "United States":
127 |                 df = price_df_for_viz
128 |             else:
129 |                 df = price_df_for_viz[price_df_for_viz["StateName"] == input.state()]
130 | 
131 | 
132 |             # Creating Visualization using Ployly
133 |             fig = px.line(df, x="Date", y="Value", color="StateName")
134 |             fig.update_xaxes(title_text="")
135 |             fig.update_yaxes(title_text="")
136 |             return fig
137 |     with ui.nav_panel("Table", icon = icon_svg("table")):
138 |         @render.data_frame
139 |         def list_price_data():
140 |             if input.state() == "United States":
141 |                 df = median_listing_price_df
142 |             else:
143 |                 df = median_listing_price_df[median_listing_price_df["StateName"] == input.state()]
144 |             return render.DataGrid(df)
145 | 
146 | # Plotly visualization of Homes For Sale Per State
147 | 
148 | with ui.navset_card_underline(title = "Home Inventory"):
149 | 
150 |     with ui.nav_panel("Plot", icon = icon_svg("chart-line")):
151 |         @render_plotly
152 |         def for_sale_plot():
153 |             # Grouping by State Name and specifying the Date Columns
154 |             for_sale_grouped = for_sale_inventory_df.groupby('StateName').sum(numeric_only=True)
155 |             date_columns = for_sale_inventory_df.columns[6:]
156 |             for_sale_grouped_grouped_dates = for_sale_grouped[date_columns].reset_index()
157 |             for_sale_df_for_viz = for_sale_grouped_grouped_dates.melt(id_vars=["StateName"], var_name="Date", value_name="Value")
158 | 
159 | 
160 |             for_sale_df_for_viz = filter_by_date(for_sale_df_for_viz, input.date_range())
161 | 
162 |             if input.state() == "United States":
163 |                 df = for_sale_df_for_viz
164 |             else:
165 |                 df = for_sale_df_for_viz[for_sale_df_for_viz["StateName"] == input.state()]
166 | 
167 |             # Creating Visualization using Ployly
168 |             fig = px.line(df, x="Date", y="Value", color="StateName")
169 |             fig.update_xaxes(title_text="")
170 |             fig.update_yaxes(title_text="")
171 |             return fig
172 |     with ui.nav_panel("Table", icon = icon_svg("table")):
173 |         @render.data_frame
174 |         def for_sale_data():
175 |             if input.state() == "United States":
176 |                 df = for_sale_inventory_df
177 |             else:
178 |                 df = for_sale_inventory_df[for_sale_inventory_df["StateName"] == input.state()]
179 |             return render.DataGrid(df)
180 | 
181 | # Plotly visualization of Listings Per State
182 | 
183 | with ui.navset_card_underline(title = "New Listings"):
184 | 
185 |     with ui.nav_panel("Plot", icon = icon_svg("chart-line")):
186 | 
187 |         @render_plotly
188 |         def listings_plot():
189 |             # Grouping by State Name and specifying the Date Columns
190 |             new_listings_grouped = new_listings_df.groupby('StateName').sum(numeric_only=True)
191 |             date_columns = new_listings_df.columns[6:]
192 |             new_listings_grouped_dates = new_listings_grouped[date_columns].reset_index()
193 |             new_listings_df_for_viz = new_listings_grouped_dates.melt(id_vars=["StateName"], var_name="Date", value_name="Value")
194 |             
195 |             new_listings_df_for_viz = filter_by_date(new_listings_df_for_viz, input.date_range())
196 |             
197 |             if input.state() == "United States":
198 |                 df = new_listings_df_for_viz
199 |             else:
200 |                 df = new_listings_df_for_viz[new_listings_df_for_viz["StateName"] == input.state()]
201 | 
202 | 
203 |             # Creating Visualization using Ployly
204 |             fig = px.line(df, x="Date", y="Value", color="StateName")
205 |             fig.update_xaxes(title_text="")
206 |             fig.update_yaxes(title_text="")
207 |             return fig
208 | 
209 |     with ui.nav_panel("Table", icon = icon_svg("table")):
210 |         @render.data_frame
211 |         def listings_data():
212 |             if input.state() == "United States":
213 |                 df = new_listings_df
214 |             else:
215 |                 df = new_listings_df[new_listings_df["StateName"] == input.state()]
216 |             return render.DataGrid(df)
217 | 


--------------------------------------------------------------------------------
/app_starting_code.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from pathlib import Path
 3 | 
 4 | import pandas as pd
 5 | import plotly.express as px
 6 | from faicons import icon_svg
 7 | from shinywidgets import render_plotly
 8 | from state_choices import STATE_CHOICES
 9 | 
10 | from shiny import reactive
11 | from shiny.express import input, render, ui
12 | 
13 | # ---------------------------------------------------------------------
14 | # Reading in Files
15 | # ---------------------------------------------------------------------
16 | new_listings_df = pd.read_csv(Path(__file__).parent / "Metro_new_listings_uc_sfrcondo_sm_month.csv")
17 | median_listing_price_df = pd.read_csv(Path(__file__).parent / "Metro_mlp_uc_sfrcondo_sm_month.csv")
18 | for_sale_inventory_df = pd.read_csv(Path(__file__).parent / "Metro_invt_fs_uc_sfrcondo_sm_month.csv")
19 | 
20 | 
21 | # ---------------------------------------------------------------------
22 | # Helper functions - converting to DateTime
23 | # ---------------------------------------------------------------------
24 | def string_to_date(date_str):
25 |     return datetime.strptime(date_str, "%Y-%m-%d").date()
26 | 
27 | 
28 | def filter_by_date(df: pd.DataFrame, date_range: tuple):
29 |     rng = sorted(date_range)
30 |     dates = pd.to_datetime(df["Date"], format="%Y-%m-%d").dt.date
31 |     return df[(dates >= rng[0]) & (dates <= rng[1])]
32 | 
33 | 
34 | # ---------------------------------------------------------------------
35 | # Visualizations
36 | # ---------------------------------------------------------------------
37 | 
38 | # Plotly visualization of Median Home Price Per State
39 | def list_price_plot():
40 |     # Grouping by State Name and specifying the Date Columns
41 |     price_grouped = median_listing_price_df.groupby('StateName').mean(numeric_only=True)     
42 |     date_columns = median_listing_price_df.columns[6:]
43 |     price_grouped_dates = price_grouped[date_columns].reset_index()   
44 |     price_df_for_viz = price_grouped_dates.melt(id_vars=["StateName"], var_name="Date", value_name="Value")
45 |     # Creating Visualization using Ployly
46 |     fig = px.line(price_df_for_viz, x="Date", y="Value", color="StateName")
47 |     fig.update_xaxes(title_text="")
48 |     fig.update_yaxes(title_text="")
49 |     return fig
50 | 
51 | 
52 | # Plotly visualization of Homes For Sale Per State
53 | def for_sale_plot():
54 |     # Grouping by State Name and specifying the Date Columns
55 |     df2_grouped = for_sale_inventory_df.groupby('StateName').sum(numeric_only=True)
56 |     date_columns = for_sale_inventory_df.columns[6:]
57 |     df2_grouped_dates = df2_grouped[date_columns].reset_index()
58 |     df2_melted = df2_grouped_dates.melt(id_vars=["StateName"], var_name="Date", value_name="Value")
59 |     # Creating Visualization using Ployly
60 |     df = for_sale_filtered()
61 |     fig = px.line(df, x="Date", y="Value", color="StateName")
62 |     fig.update_xaxes(title_text="")
63 |     fig.update_yaxes(title_text="")
64 |     return fig
65 | 
66 | # Plotly visualization of Listings Per State
67 | def listings_plot():
68 |     # Grouping by State Name and specifying the Date Columns
69 |     df3_grouped = new_listings_df.groupby('StateName').sum(numeric_only=True)
70 |     date_columns = new_listings_df.columns[6:]
71 |     df3_grouped_dates = df3_grouped[date_columns].reset_index()
72 |     df3_melted = df3_grouped_dates.melt(id_vars=["StateName"], var_name="Date", value_name="Value")
73 |     # Creating Visualization using Ployly
74 |     df = listings_filtered()
75 |     fig = px.line(df, x="Date", y="Value", color="StateName")
76 |     fig.update_xaxes(title_text="")
77 |     fig.update_yaxes(title_text="")
78 |     return fig
79 | 
80 | 
81 | 
82 | 


--------------------------------------------------------------------------------
/us-housing-app.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlexTheAnalyst/PortfolioProjects/39c541bae76eb109652e8d834b0fa2aa3f15fd8b/us-housing-app.zip


--------------------------------------------------------------------------------