├── 01-TMDB-Dataset-Analysis
├── README.md
├── TMDB-dataset-analysis.ipynb
└── tmdb-movies.csv
├── 02-Auto-MPG-Dataset-Analysis
├── README.md
├── auto-mpg.csv
└── mpg-dataset-analysis.ipynb
├── 03-Medical-Appointment-No-Show
├── README.md
├── medical-appointment-dataset-analysis.ipynb
└── noshowappointments-kagglev2-may-2016.csv
├── 04-9000+-Movies-Dataset-Analysis
├── 9000-movies-dataset-analysis.ipynb
├── README.md
└── mymoviedb.csv
├── 05-Wine-Quality-Dataset
├── README.md
├── wine-quality-analysis.ipynb
├── wineQualityReds.csv
├── wineQualityWhites.csv
└── wine_full.csv
├── 06-Query-a-Digital-Music-Store-Database
├── Chinook-SQL-Project-Report.pdf
├── Chinook-SQL-Queries.sql
├── README.md
└── img
│ ├── q1.png
│ ├── q2.png
│ ├── q3.png
│ └── q4.png
├── 07-Create-a-Data-Model-for-Seven-Sages-Brewing-Company
├── README.md
├── SSBC-Data-Model.png
├── SSBC-Project.pbix
├── SSBC-Report-Tab1.jpg
├── SSBC-Report-Tab2.jpg
├── SSBC-Report.pdf
└── Source Files
│ ├── CFO Metrics Tracker.xlsx
│ ├── Customer List (as of FY2021).txt
│ ├── Monthly Sales Logs
│ ├── SSBC - Apr 2021 Sales.xlsx
│ ├── SSBC - Aug 2021 Sales.xlsx
│ ├── SSBC - Dec 2020 Sales.xlsx
│ ├── SSBC - Feb 2021 Sales.xlsx
│ ├── SSBC - Jan 2021 Sales.xlsx
│ ├── SSBC - Jul 2021 Sales.xlsx
│ ├── SSBC - Jun 2021 Sales.xlsx
│ ├── SSBC - Mar 2021 Sales.xlsx
│ ├── SSBC - May 2021 Sales.xlsx
│ ├── SSBC - Nov 2020 Sales.xlsx
│ ├── SSBC - Oct 2020 Sales.xlsx
│ └── SSBC - Sep 2021 Sales.xlsx
│ ├── SSBC Product Offerings.pdf
│ └── USD-CAD Exchange Rates.csv
├── 08-Building-Power-BI-Report-for-Waggle
├── README.md
├── Waggle-Project.pbix
├── Waggle-Theme.json
├── Waggle-color-palette.png
├── Waggle-dashboard
│ ├── Waggle-Project.pdf
│ ├── Waggle-tab1.jfif
│ ├── Waggle-tab2.jfif
│ └── Waggle-tab3.jfif
├── Waggle-data-model.png
├── Waggle-datasets.xlsx
└── marketing_collateral
│ ├── cat_face_icon_blue.png
│ ├── cat_face_icon_gray.png
│ ├── cat_face_icon_green.png
│ ├── cat_face_icon_pink.png
│ ├── cat_face_icon_teal.png
│ ├── cat_face_icon_violet.png
│ ├── cat_face_icon_yellow.png
│ ├── color_palette.png
│ ├── dog_face_icon_blue.png
│ ├── dog_face_icon_gray.png
│ ├── dog_face_icon_green.png
│ ├── dog_face_icon_pink.png
│ ├── dog_face_icon_teal.png
│ ├── dog_face_icon_violet.png
│ ├── dog_face_icon_yellow.png
│ ├── lapcat_logo_blue_background.png
│ ├── lapcat_logo_green_background.png
│ ├── lapcat_logo_pink_background.png
│ ├── lapcat_logo_teal_background.png
│ ├── lapcat_logo_transparent_blue.png
│ ├── lapcat_logo_transparent_darker_gray.png
│ ├── lapcat_logo_transparent_gray.png
│ ├── lapcat_logo_transparent_green.png
│ ├── lapcat_logo_transparent_pink.png
│ ├── lapcat_logo_transparent_teal.png
│ ├── lapcat_logo_transparent_violet.png
│ ├── lapcat_logo_transparent_yellow.png
│ ├── lapcat_logo_violet_background.png
│ ├── lapcat_logo_white_transparent_blue.png
│ ├── lapcat_logo_white_transparent_green.png
│ ├── lapcat_logo_white_transparent_pink.png
│ ├── lapcat_logo_white_transparent_teal.png
│ ├── lapcat_logo_white_transparent_violet.png
│ ├── lapcat_logo_white_transparent_wine.png
│ ├── lapcat_logo_white_transparent_yellow.png
│ ├── lapcat_logo_yellow_background.png
│ ├── lapdog_logo_blue_background.png
│ ├── lapdog_logo_green_background.png
│ ├── lapdog_logo_pink_background.png
│ ├── lapdog_logo_teal_background.png
│ ├── lapdog_logo_transparent_blue.png
│ ├── lapdog_logo_transparent_darker_gray.png
│ ├── lapdog_logo_transparent_gray.png
│ ├── lapdog_logo_transparent_green.png
│ ├── lapdog_logo_transparent_pink.png
│ ├── lapdog_logo_transparent_teal.png
│ ├── lapdog_logo_transparent_violet.png
│ ├── lapdog_logo_transparent_yellow.png
│ ├── lapdog_logo_violet_background.png
│ ├── lapdog_logo_white_transparent_blue.png
│ ├── lapdog_logo_white_transparent_green.png
│ ├── lapdog_logo_white_transparent_pink.png
│ ├── lapdog_logo_white_transparent_teal.png
│ ├── lapdog_logo_white_transparent_violet.png
│ ├── lapdog_logo_white_transparent_yellow.png
│ ├── lapdog_logo_yellow_background.png
│ ├── waggle_logo_black.png
│ ├── waggle_logo_blue.png
│ ├── waggle_logo_green.png
│ ├── waggle_logo_pink.png
│ ├── waggle_logo_red.png
│ ├── waggle_logo_teal.png
│ ├── waggle_logo_violet.png
│ ├── waggle_logo_white.png
│ ├── waggle_logo_wine.png
│ └── waggle_logo_yellow.png
├── 09-Market-Analysis-Report-for-National-Clothing-Chain
├── Data-Source
│ ├── census-data.xlsx
│ ├── customer-list.xlsx
│ ├── purchase-list.xlsx
│ └── state-list.xlsx
├── National-Clothing-Chain-Data-Model.png
├── National-Clothing-Chain-Project.pbix
├── National-Clothing-Chain-Report.pdf
├── National-Clothing-Chain-Summary.doc
├── README.md
└── img
│ ├── avg-income-by-state.png
│ ├── customer-return-rate.png
│ ├── customers-by-income.png
│ ├── predicted-income-by-state.png
│ ├── product-by-price.png
│ ├── product-instock.png
│ ├── product-recomm.png
│ └── sales-income-corr.png
├── 10-Coursera-Sales-Analysis-in-Power-BI-Guided-Project
└── README.md
├── 11-dyslexia-and-music-notes-paper-analysis
└── README.md
└── README.md
/01-TMDB-Dataset-Analysis/README.md:
--------------------------------------------------------------------------------
1 | # TMDB Movies Dataset Analysis
2 | ### Udacity Become a Data Analyst Nanodegree | Project 2
3 |
4 | | Contents |
5 | | -------- |
6 | | [Dataset Description](#Dataset-Description) |
7 | | [Columns Descreption](#Columns-Descreption) |
8 | | [Questions for Analysis](#Questions-for-Analysis) |
9 | | [Data Wrangling](#Data-Wrangling) |
10 | | [Data Cleaning](#Data-Cleaning) |
11 | | [Exploratory Data Analysis](#Exploratory-Data-Analysis) |
12 | | [Built with](#Built-with) |
13 |
14 | ## Dataset Description:
15 | This data set contains information about 10,000 movies extracted from [TMDB](https://www.themoviedb.org/). The dataset contains movies from 1960 to 2015. Including user ratings and revenue. Original data from [Kaggle](https://www.kaggle.com/tmdb/tmdb-movie-metadata)
16 |
17 | ## Columns Descreption:
18 | - `id, imdb_id`: unique id or imdb id for each movie on TMDB
19 | - `popularity`: a metric used to measure the popularity of the movie.
20 | - `budget`:the total budget of the moviein USD.
21 | - `revenue`:the total revenue of the movie in USD.
22 | - `original_title`: the original title of the movie.
23 | - `cast`:the names of the cast of the movie separated by "|".
24 | - `homepage`: the website of the movie (if it existed).
25 | - `director`:name(s) of the director(s) of the movie (separated by "|" if there are more than one director).
26 | - `tagline`:a catchphrase describing the movie.
27 | - `keywords`: keywords related to the movie.
28 | - `overview`:summary of the plot of the movie.
29 | - `runtime`:total runtime of the movie in minutes.
30 | - `genres`: genres of the movie separated by "|".
31 | - `production_companies`:production compan(y/ies) of the movie.
32 | - `release_date`:release date of the movie.
33 | - `vote_count`:number of voters of te movie.
34 | - `vote_average`:the average user rating of the movie
35 | - `release_year`:release year of the movie (from 1960 to 2015)
36 | - `budget_adj`:the total budget of the moviein USD in terms of 2010 dollars, accounting for inflation over time.
37 | - `revenue_adj`:the total budget of the movie in USD in terms of 2010 dollars, accounting for inflation over time.
38 |
39 | ## Questions for Analysis:
40 | - Do movies with high popularity achive high revenvue?
41 | - What are the most filmed genres in this whole dataset?
42 | - Is there a correlation between a movie budget and its revenue?
43 |
44 | ## Data Wrangling:
45 | Our data can be found on `tmdb-movies.csv` file provided on this repository. It is an edited version of the original Kaggle's [TMDB 5000 Movie Dataset](https://www.kaggle.com/tmdb/tmdb-movie-metadata) provided by Udacity on the Become a Data Analyst Nanodegree Program.
46 |
47 | ## Data Cleaning:
48 | **Main Observations:**
49 | 1. Our dataset consisted of a total of 10866 rows and 21 columns.
50 | 2. We had only 1 duplicated row which had been dropped.
51 | 3. Some columns wont be useful in answering our questions so they were dropped.
52 | 4. Few columns had many missing values that needed to be handled.
53 | 5. Columns `cast` `director` `genre` had values saperated with a '|'.
54 | 6. `release_date`'s data type needed to be casted.
55 | 7. We could append a column for the movie `profit` using the formula: $profit = revenue - budget$.
56 | 8. `vote_average` better be presented as a catecorical variable that groubs multible ratings values.
57 | 9. We might also catigorize `profit` column for better EDA
58 |
59 | ## Exploratory Data Analysis:
60 | After finishing our dataset cleaning, we endded up with a total of 10840 records and 10 columns. The dataset now has no duplicates nor null values, and the data types are consistant with suitable categorical variable to address our questions.
61 | We then perfomed some analytics and created some visualizations to answer our targeted questions.
62 | ### Q1: Do movies with high popularity achive high revenvue?
63 | > More popular movies recieve way more revenue than the less popular movies.
64 |
65 | ### Q2: What are the most filmed genres in this whole dataset?
66 | > - `Drama`, `Comedy` and `Action` are the most three filmed genres in total of 10839 movies in our dataset.
67 | > - `Drama` genre alone is filmed 22.6% of the times on our dataset.
68 |
69 | ### Q3: Is there a correlation between a movie budget and its revenue?
70 | > There is positive correlation between `budget` and `revenue`, indecating a relation between them with little outliers found.
71 |
72 | ## Built with:
73 | - JupyterLab
74 | - Python3
75 | - Pandas
76 | - Numpy
77 |
--------------------------------------------------------------------------------
/01-TMDB-Dataset-Analysis/TMDB-dataset-analysis.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | ""
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# Project: Investigate a Dataset - [TMDB movie data]\n",
15 | "\n",
16 | "## Table of Contents\n",
17 | "
\n", 125 | " | id | \n", 126 | "imdb_id | \n", 127 | "popularity | \n", 128 | "budget | \n", 129 | "revenue | \n", 130 | "original_title | \n", 131 | "cast | \n", 132 | "homepage | \n", 133 | "director | \n", 134 | "tagline | \n", 135 | "... | \n", 136 | "overview | \n", 137 | "runtime | \n", 138 | "genres | \n", 139 | "production_companies | \n", 140 | "release_date | \n", 141 | "vote_count | \n", 142 | "vote_average | \n", 143 | "release_year | \n", 144 | "budget_adj | \n", 145 | "revenue_adj | \n", 146 | "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", 151 | "135397 | \n", 152 | "tt0369610 | \n", 153 | "32.985763 | \n", 154 | "150000000 | \n", 155 | "1513528810 | \n", 156 | "Jurassic World | \n", 157 | "Chris Pratt|Bryce Dallas Howard|Irrfan Khan|Vi... | \n", 158 | "http://www.jurassicworld.com/ | \n", 159 | "Colin Trevorrow | \n", 160 | "The park is open. | \n", 161 | "... | \n", 162 | "Twenty-two years after the events of Jurassic ... | \n", 163 | "124 | \n", 164 | "Action|Adventure|Science Fiction|Thriller | \n", 165 | "Universal Studios|Amblin Entertainment|Legenda... | \n", 166 | "6/9/15 | \n", 167 | "5562 | \n", 168 | "6.5 | \n", 169 | "2015 | \n", 170 | "1.379999e+08 | \n", 171 | "1.392446e+09 | \n", 172 | "
1 | \n", 175 | "76341 | \n", 176 | "tt1392190 | \n", 177 | "28.419936 | \n", 178 | "150000000 | \n", 179 | "378436354 | \n", 180 | "Mad Max: Fury Road | \n", 181 | "Tom Hardy|Charlize Theron|Hugh Keays-Byrne|Nic... | \n", 182 | "http://www.madmaxmovie.com/ | \n", 183 | "George Miller | \n", 184 | "What a Lovely Day. | \n", 185 | "... | \n", 186 | "An apocalyptic story set in the furthest reach... | \n", 187 | "120 | \n", 188 | "Action|Adventure|Science Fiction|Thriller | \n", 189 | "Village Roadshow Pictures|Kennedy Miller Produ... | \n", 190 | "5/13/15 | \n", 191 | "6185 | \n", 192 | "7.1 | \n", 193 | "2015 | \n", 194 | "1.379999e+08 | \n", 195 | "3.481613e+08 | \n", 196 | "
2 | \n", 199 | "262500 | \n", 200 | "tt2908446 | \n", 201 | "13.112507 | \n", 202 | "110000000 | \n", 203 | "295238201 | \n", 204 | "Insurgent | \n", 205 | "Shailene Woodley|Theo James|Kate Winslet|Ansel... | \n", 206 | "http://www.thedivergentseries.movie/#insurgent | \n", 207 | "Robert Schwentke | \n", 208 | "One Choice Can Destroy You | \n", 209 | "... | \n", 210 | "Beatrice Prior must confront her inner demons ... | \n", 211 | "119 | \n", 212 | "Adventure|Science Fiction|Thriller | \n", 213 | "Summit Entertainment|Mandeville Films|Red Wago... | \n", 214 | "3/18/15 | \n", 215 | "2480 | \n", 216 | "6.3 | \n", 217 | "2015 | \n", 218 | "1.012000e+08 | \n", 219 | "2.716190e+08 | \n", 220 | "
3 | \n", 223 | "140607 | \n", 224 | "tt2488496 | \n", 225 | "11.173104 | \n", 226 | "200000000 | \n", 227 | "2068178225 | \n", 228 | "Star Wars: The Force Awakens | \n", 229 | "Harrison Ford|Mark Hamill|Carrie Fisher|Adam D... | \n", 230 | "http://www.starwars.com/films/star-wars-episod... | \n", 231 | "J.J. Abrams | \n", 232 | "Every generation has a story. | \n", 233 | "... | \n", 234 | "Thirty years after defeating the Galactic Empi... | \n", 235 | "136 | \n", 236 | "Action|Adventure|Science Fiction|Fantasy | \n", 237 | "Lucasfilm|Truenorth Productions|Bad Robot | \n", 238 | "12/15/15 | \n", 239 | "5292 | \n", 240 | "7.5 | \n", 241 | "2015 | \n", 242 | "1.839999e+08 | \n", 243 | "1.902723e+09 | \n", 244 | "
4 | \n", 247 | "168259 | \n", 248 | "tt2820852 | \n", 249 | "9.335014 | \n", 250 | "190000000 | \n", 251 | "1506249360 | \n", 252 | "Furious 7 | \n", 253 | "Vin Diesel|Paul Walker|Jason Statham|Michelle ... | \n", 254 | "http://www.furious7.com/ | \n", 255 | "James Wan | \n", 256 | "Vengeance Hits Home | \n", 257 | "... | \n", 258 | "Deckard Shaw seeks revenge against Dominic Tor... | \n", 259 | "137 | \n", 260 | "Action|Crime|Thriller | \n", 261 | "Universal Pictures|Original Film|Media Rights ... | \n", 262 | "4/1/15 | \n", 263 | "2947 | \n", 264 | "7.3 | \n", 265 | "2015 | \n", 266 | "1.747999e+08 | \n", 267 | "1.385749e+09 | \n", 268 | "
5 rows × 21 columns
\n", 272 | "\n", 730 | " | popularity | \n", 731 | "budget | \n", 732 | "revenue | \n", 733 | "original_title | \n", 734 | "runtime | \n", 735 | "genres | \n", 736 | "vote_count | \n", 737 | "vote_average | \n", 738 | "release_year | \n", 739 | "
---|---|---|---|---|---|---|---|---|---|
0 | \n", 744 | "32.985763 | \n", 745 | "150000000 | \n", 746 | "1513528810 | \n", 747 | "Jurassic World | \n", 748 | "124 | \n", 749 | "Action | \n", 750 | "5562 | \n", 751 | "6.5 | \n", 752 | "2015 | \n", 753 | "
1 | \n", 756 | "28.419936 | \n", 757 | "150000000 | \n", 758 | "378436354 | \n", 759 | "Mad Max: Fury Road | \n", 760 | "120 | \n", 761 | "Action | \n", 762 | "6185 | \n", 763 | "7.1 | \n", 764 | "2015 | \n", 765 | "
2 | \n", 768 | "13.112507 | \n", 769 | "110000000 | \n", 770 | "295238201 | \n", 771 | "Insurgent | \n", 772 | "119 | \n", 773 | "Adventure | \n", 774 | "2480 | \n", 775 | "6.3 | \n", 776 | "2015 | \n", 777 | "
3 | \n", 780 | "11.173104 | \n", 781 | "200000000 | \n", 782 | "2068178225 | \n", 783 | "Star Wars: The Force Awakens | \n", 784 | "136 | \n", 785 | "Action | \n", 786 | "5292 | \n", 787 | "7.5 | \n", 788 | "2015 | \n", 789 | "
4 | \n", 792 | "9.335014 | \n", 793 | "190000000 | \n", 794 | "1506249360 | \n", 795 | "Furious 7 | \n", 796 | "137 | \n", 797 | "Action | \n", 798 | "2947 | \n", 799 | "7.3 | \n", 800 | "2015 | \n", 801 | "
\n", 867 | " | popularity | \n", 868 | "budget | \n", 869 | "revenue | \n", 870 | "original_title | \n", 871 | "runtime | \n", 872 | "genres | \n", 873 | "vote_count | \n", 874 | "vote_average | \n", 875 | "release_year | \n", 876 | "profit | \n", 877 | "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", 882 | "32.985763 | \n", 883 | "150000000 | \n", 884 | "1513528810 | \n", 885 | "Jurassic World | \n", 886 | "124 | \n", 887 | "Action | \n", 888 | "5562 | \n", 889 | "6.5 | \n", 890 | "2015 | \n", 891 | "1363528810 | \n", 892 | "
1 | \n", 895 | "28.419936 | \n", 896 | "150000000 | \n", 897 | "378436354 | \n", 898 | "Mad Max: Fury Road | \n", 899 | "120 | \n", 900 | "Action | \n", 901 | "6185 | \n", 902 | "7.1 | \n", 903 | "2015 | \n", 904 | "228436354 | \n", 905 | "
2 | \n", 908 | "13.112507 | \n", 909 | "110000000 | \n", 910 | "295238201 | \n", 911 | "Insurgent | \n", 912 | "119 | \n", 913 | "Adventure | \n", 914 | "2480 | \n", 915 | "6.3 | \n", 916 | "2015 | \n", 917 | "185238201 | \n", 918 | "
3 | \n", 921 | "11.173104 | \n", 922 | "200000000 | \n", 923 | "2068178225 | \n", 924 | "Star Wars: The Force Awakens | \n", 925 | "136 | \n", 926 | "Action | \n", 927 | "5292 | \n", 928 | "7.5 | \n", 929 | "2015 | \n", 930 | "1868178225 | \n", 931 | "
4 | \n", 934 | "9.335014 | \n", 935 | "190000000 | \n", 936 | "1506249360 | \n", 937 | "Furious 7 | \n", 938 | "137 | \n", 939 | "Action | \n", 940 | "2947 | \n", 941 | "7.3 | \n", 942 | "2015 | \n", 943 | "1316249360 | \n", 944 | "
\n", 1228 | " | popularity | \n", 1229 | "budget | \n", 1230 | "revenue | \n", 1231 | "original_title | \n", 1232 | "runtime | \n", 1233 | "genres | \n", 1234 | "vote_count | \n", 1235 | "vote_average | \n", 1236 | "release_year | \n", 1237 | "profit | \n", 1238 | "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", 1243 | "32.985763 | \n", 1244 | "150000000 | \n", 1245 | "1513528810 | \n", 1246 | "Jurassic World | \n", 1247 | "124 | \n", 1248 | "Action | \n", 1249 | "5562 | \n", 1250 | "average | \n", 1251 | "2015 | \n", 1252 | "high | \n", 1253 | "
1 | \n", 1256 | "28.419936 | \n", 1257 | "150000000 | \n", 1258 | "378436354 | \n", 1259 | "Mad Max: Fury Road | \n", 1260 | "120 | \n", 1261 | "Action | \n", 1262 | "6185 | \n", 1263 | "popular | \n", 1264 | "2015 | \n", 1265 | "high | \n", 1266 | "
2 | \n", 1269 | "13.112507 | \n", 1270 | "110000000 | \n", 1271 | "295238201 | \n", 1272 | "Insurgent | \n", 1273 | "119 | \n", 1274 | "Adventure | \n", 1275 | "2480 | \n", 1276 | "average | \n", 1277 | "2015 | \n", 1278 | "high | \n", 1279 | "
3 | \n", 1282 | "11.173104 | \n", 1283 | "200000000 | \n", 1284 | "2068178225 | \n", 1285 | "Star Wars: The Force Awakens | \n", 1286 | "136 | \n", 1287 | "Action | \n", 1288 | "5292 | \n", 1289 | "popular | \n", 1290 | "2015 | \n", 1291 | "high | \n", 1292 | "
4 | \n", 1295 | "9.335014 | \n", 1296 | "190000000 | \n", 1297 | "1506249360 | \n", 1298 | "Furious 7 | \n", 1299 | "137 | \n", 1300 | "Action | \n", 1301 | "2947 | \n", 1302 | "popular | \n", 1303 | "2015 | \n", 1304 | "high | \n", 1305 | "