├── Excel - LOOKUP, INDEX, MATCH, SUMIFS.xlsx
├── Excel - Pivot Tables, Pivot Chart, Slicers.xlsx
├── Excel - Sales Performance Dashboard.xlsx
├── Excel - Scenario Manager, Solver (Data Modeling).xlsx
├── GroverDataAnalystDashboard.png
├── Hadoop(Hive) - NYC Yellow Taxi Case Study.txt
├── Instagram Clone SQL - Database & Inserting Data.sql
├── Instagram Clone SQL - Exploratory Data Analysis.sql
├── MunicipalityDataAnalysisDashboard.png
├── PostgreSQL-BI-CHALLENGE
├── Python - Movie Industry EDA Project.ipynb
├── README.md
├── SQL - Data Cleaning.sql
├── SQL - Data Exploration.sql
└── visuals
├── BottomUpDashboard.png
├── E-commerceRetail.png
├── GeoChart.png
├── GroverDataAnalystDashboard.png
├── InstagramCloneDashboard.png
├── KPIDashboard.png
├── LondonBusSafety.png
├── MunicipalityDataAnalysisDashboard.png
├── Q&ADashboard.png
├── RetailPricingAnalytics.png
├── TopDownDashboard.png
├── WorkFromHome.png
├── WorldBankCO2Emission.png
└── excel
├── Dashboards.png
├── DataModeling.png
├── INDEX.png
├── LOOKUP.png
├── PivotReports.png
├── ScenarioManager.png
└── Solver.png
/Excel - LOOKUP, INDEX, MATCH, SUMIFS.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/Excel - LOOKUP, INDEX, MATCH, SUMIFS.xlsx
--------------------------------------------------------------------------------
/Excel - Pivot Tables, Pivot Chart, Slicers.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/Excel - Pivot Tables, Pivot Chart, Slicers.xlsx
--------------------------------------------------------------------------------
/Excel - Sales Performance Dashboard.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/Excel - Sales Performance Dashboard.xlsx
--------------------------------------------------------------------------------
/Excel - Scenario Manager, Solver (Data Modeling).xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/Excel - Scenario Manager, Solver (Data Modeling).xlsx
--------------------------------------------------------------------------------
/GroverDataAnalystDashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/GroverDataAnalystDashboard.png
--------------------------------------------------------------------------------
/Hadoop(Hive) - NYC Yellow Taxi Case Study.txt:
--------------------------------------------------------------------------------
1 | NYC Yellow Taxi Case Study using HiveQL
2 |
3 | Tasks:
4 |
5 | 1. Create a table named taxidata.
6 |
7 | Create database hive;
8 | Use hive;
9 |
10 | CREATE TABLE IF NOT EXISTS taxidata
11 | (vendor_id string, pickup_datetime string, dropoff_datetime string, passenger_count
12 | int, trip_distance DECIMAL(9,6), pickup_longitude DECIMAL(9,6), pickup_latitude
13 | DECIMAL(9,6), rate_code int, store_and_fwd_flag string, dropoff_longitude
14 | DECIMAL(9,6), dropoff_latitude DECIMAL(9,6), payment_type string, fare_amount
15 | DECIMAL(9,6), extra DECIMAL(9,6), mta_tax DECIMAL(9,6), tip_amount
16 | DECIMAL(9,6), tolls_amount DECIMAL(9,6), total_amount DECIMAL(9,6),
17 | trip_time_in_secs int )
18 | ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
19 | STORED as TEXTFILE TBLPROPERTIES ("skip.header.line.count"="1");
20 |
21 | 2. Load data from the csv file. (yellow_tripdata_2015-01-
22 | 06.csv)
23 |
24 | LOAD DATA INPATH '/user/hive OVERWRITE INTO TABLE taxidata;
25 |
26 | 3. Run some basic queries to check the data is loaded
27 | properly.
28 |
29 | Select * from taxidata;
30 |
31 | 4. Run the queries required to answer the following
32 | questions.
33 |
34 | Problem Statement:
35 |
36 | * What is the total Number of trips (equal to number of rows)?
37 |
38 | Select count (*) from taxidata;
39 |
40 | * What is the total revenue generated by all the trips?
41 |
42 | Select sum(total_amount) as total_revenue from taxidata;
43 |
44 | * What fraction of the total is paid for tolls?
45 |
46 | Select sum(tolls_amount)/sum(total_amount) as toll_pct from taxidata;
47 |
48 | * What fraction of it is driver tips?
49 |
50 | Select sum(tip_amount)/sum(total_amount) as tip_pct from taxidata;
51 |
52 | * What is the average trip amount?
53 |
54 | Select avg(total_amount) as avg_tripamount from taxidata;
55 |
56 | * For each payment type, display the following details:
57 |
58 | i. Average fare generated
59 | ii. Average tip
60 | iii. Average tax
61 |
62 |
63 | select payment_type,
64 | avg(fare_amount) as average_fare,
65 | avg(tip_amount) as average_tip,
66 | avg(mta_tax) as average_tax,
67 | from taxidata
68 | group by payment_type;
69 |
70 |
71 | * On an average which hour of the day generates the highest revenue?
72 |
73 | select h24 as hour,
74 | avg(total_amount) as avg_revenue
75 | from (select hour(pickup_datetime) as h24,
76 | total_amount
77 | from taxidata) ff
78 | group by h24
79 | order by avg_revenue desc;
80 |
81 |
82 | * What is the average distance of the trips?
83 |
84 | select
85 | avg(trip_distance) as avg_distance
86 | from trips4;
87 |
88 | * How many different payment types are used?
89 |
90 | select distinct payment_type from taxidata;
91 |
92 |
--------------------------------------------------------------------------------
/Instagram Clone SQL - Exploratory Data Analysis.sql:
--------------------------------------------------------------------------------
1 | /*INSTAGRAM CLONE EXPLORATORY DATA ANALYSIS USING SQL*/
2 |
3 | /*SQL SKILLS: joins, date manipulation, regular expressions, views, stored procedures, aggregate functions, string manipulation*/
4 |
5 | -- --------------------------------------------------------------------------------------------------------------
6 |
7 | /*Ques.1 The first 10 users on the platform*/
8 |
9 | SELECT
10 | *
11 | FROM
12 | ig_clone.users
13 | ORDER BY created_at asc
14 | LIMIT 10;
15 | -- --------------------------------------------------------------------------------------------------------------
16 |
17 | /*Ques.2 Total number of registrations*/
18 |
19 | SELECT
20 | COUNT(*) AS 'Total Registration'
21 | FROM
22 | ig_clone.users;
23 | -- --------------------------------------------------------------------------------------------------------------
24 |
25 | /*Ques.3 The day of the week most users register on*/
26 |
27 | CREATE VIEW vwtotalregistrations AS
28 | SELECT
29 | DATE_FORMAT(created_at, '%W') AS 'day of the week',
30 | COUNT(*) AS 'total number of registration'
31 | FROM
32 | ig_clone.users
33 | GROUP BY 1
34 | ORDER BY 2 DESC;
35 |
36 | SELECT
37 | *
38 | FROM
39 | vwtotalregistrations;
40 |
41 | /*Version 2*/
42 |
43 | SELECT
44 | DAYNAME(created_at) AS 'Day of the Week',
45 | COUNT(*) AS 'Total Registration'
46 | FROM
47 | ig_clone.users
48 | GROUP BY 1
49 | ORDER BY 2 DESC;
50 | -- --------------------------------------------------------------------------------------------------------------
51 |
52 | /*Ques.4 The users who have never posted a photo*/
53 |
54 | SELECT
55 | u.username
56 | FROM
57 | ig_clone.users u
58 | LEFT JOIN
59 | ig_clone.photos p ON p.user_id = u.id
60 | WHERE
61 | p.id IS NULL;
62 | -- --------------------------------------------------------------------------------------------------------------
63 |
64 | /*Ques.5 The most likes on a single photo*/
65 |
66 | SELECT
67 | u.username, p.image_url, COUNT(*) AS total
68 | FROM
69 | ig_clone.photos p
70 | INNER JOIN
71 | ig_clone.likes l ON l.photo_id = p.id
72 | INNER JOIN
73 | ig_clone.users u ON p.user_id = u.id
74 | GROUP BY p.id
75 | ORDER BY total DESC
76 | LIMIT 1;
77 |
78 | /*Version 2*/
79 |
80 | SELECT
81 | ROUND((SELECT
82 | COUNT(*)
83 | FROM
84 | ig_clone.photos) / (SELECT
85 | COUNT(*)
86 | FROM
87 | ig_clone.users),
88 | 2) AS 'Average Posts by Users';
89 | -- --------------------------------------------------------------------------------------------------------------
90 |
91 | /*Ques.6 The number of photos posted by most active users*/
92 |
93 | SELECT
94 | u.username AS 'Username',
95 | COUNT(p.image_url) AS 'Number of Posts'
96 | FROM
97 | ig_clone.users u
98 | JOIN
99 | ig_clone.photos p ON u.id = p.user_id
100 | GROUP BY u.id
101 | ORDER BY 2 DESC
102 | LIMIT 5;
103 | -- --------------------------------------------------------------------------------------------------------------
104 |
105 | /*Ques.7 The total number of posts*/
106 |
107 | SELECT
108 | SUM(user_posts.total_posts_per_user) AS 'Total Posts by Users'
109 | FROM
110 | (SELECT
111 | u.username, COUNT(p.image_url) AS total_posts_per_user
112 | FROM
113 | ig_clone.users u
114 | JOIN ig_clone.photos p ON u.id = p.user_id
115 | GROUP BY u.id) AS user_posts;
116 | -- --------------------------------------------------------------------------------------------------------------
117 |
118 | /*Ques.8 The total number of users with posts*/
119 |
120 | SELECT
121 | COUNT(DISTINCT (u.id)) AS total_number_of_users_with_posts
122 | FROM
123 | ig_clone.users u
124 | JOIN
125 | ig_clone.photos p ON u.id = p.user_id;
126 | -- --------------------------------------------------------------------------------------------------------------
127 |
128 | /*Ques.9 The usernames with numbers as ending*/
129 |
130 | SELECT
131 | id, username
132 | FROM
133 | ig_clone.users
134 | WHERE
135 | username REGEXP '[$0-9]';
136 | -- --------------------------------------------------------------------------------------------------------------
137 |
138 | /*Ques.10 The usernames with charachter as ending*/
139 |
140 | SELECT
141 | id, username
142 | FROM
143 | ig_clone.users
144 | WHERE
145 | username NOT REGEXP '[$0-9]';
146 | -- --------------------------------------------------------------------------------------------------------------
147 |
148 | /*Ques.11 The number of usernames that start with A*/
149 |
150 | SELECT
151 | count(id)
152 | FROM
153 | ig_clone.users
154 | WHERE
155 | username REGEXP '^[A]';
156 | -- --------------------------------------------------------------------------------------------------------------
157 |
158 | /*Ques.12 The most popular tag names by usage*/
159 |
160 | SELECT
161 | t.tag_name, COUNT(tag_name) AS seen_used
162 | FROM
163 | ig_clone.tags t
164 | JOIN
165 | ig_clone.photo_tags pt ON t.id = pt.tag_id
166 | GROUP BY t.id
167 | ORDER BY seen_used DESC
168 | LIMIT 10;
169 | -- --------------------------------------------------------------------------------------------------------------
170 |
171 | /*Ques.13 The most popular tag names by likes*/
172 |
173 | SELECT
174 | t.tag_name AS 'Tag Name',
175 | COUNT(l.photo_id) AS 'Number of Likes'
176 | FROM
177 | ig_clone.photo_tags pt
178 | JOIN
179 | ig_clone.likes l ON l.photo_id = pt.photo_id
180 | JOIN
181 | ig_clone.tags t ON pt.tag_id = t.id
182 | GROUP BY 1
183 | ORDER BY 2 DESC
184 | LIMIT 10;
185 | -- --------------------------------------------------------------------------------------------------------------
186 |
187 | /*Ques.14 The users who have liked every single photo on the site*/
188 |
189 | SELECT
190 | u.id, u.username, COUNT(l.user_id) AS total_likes_by_user
191 | FROM
192 | ig_clone.users u
193 | JOIN
194 | ig_clone.likes l ON u.id = l.user_id
195 | GROUP BY u.id
196 | HAVING total_likes_by_user = (SELECT
197 | COUNT(*)
198 | FROM
199 | ig_clone.photos);
200 | -- --------------------------------------------------------------------------------------------------------------
201 |
202 | /*Ques.15 Total number of users without comments*/
203 |
204 | SELECT
205 | COUNT(*) AS total_number_of_users_without_comments
206 | FROM
207 | (SELECT
208 | u.username, c.comment_text
209 | FROM
210 | ig_clone.users u
211 | LEFT JOIN ig_clone.comments c ON u.id = c.user_id
212 | GROUP BY u.id , c.comment_text
213 | HAVING comment_text IS NULL) AS users;
214 | -- --------------------------------------------------------------------------------------------------------------
215 |
216 | /*Ques.16 The percentage of users who have either never commented on a photo or likes every photo*/
217 |
218 | SELECT
219 | tableA.total_A AS 'Number Of Users who never commented',
220 | (tableA.total_A / (SELECT
221 | COUNT(*)
222 | FROM
223 | ig_clone.users u)) * 100 AS '%',
224 | tableB.total_B AS 'Number of Users who likes every photos',
225 | (tableB.total_B / (SELECT
226 | COUNT(*)
227 | FROM
228 | ig_clone.users u)) * 100 AS '%'
229 | FROM
230 | (SELECT
231 | COUNT(*) AS total_A
232 | FROM
233 | (SELECT
234 | u.username, c.comment_text
235 | FROM
236 | ig_clone.users u
237 | LEFT JOIN ig_clone.comments c ON u.id = c.user_id
238 | GROUP BY u.id , c.comment_text
239 | HAVING comment_text IS NULL) AS total_number_of_users_without_comments) AS tableA
240 | JOIN
241 | (SELECT
242 | COUNT(*) AS total_B
243 | FROM
244 | (SELECT
245 | u.id, u.username, COUNT(u.id) AS total_likes_by_user
246 | FROM
247 | ig_clone.users u
248 | JOIN ig_clone.likes l ON u.id = l.user_id
249 | GROUP BY u.id , u.username
250 | HAVING total_likes_by_user = (SELECT
251 | COUNT(*)
252 | FROM
253 | ig_clone.photos p)) AS total_number_users_likes_every_photos) AS tableB;
254 | -- --------------------------------------------------------------------------------------------------------------
255 |
256 | /*Ques.17 Clean URLs of photos posted on the platform*/
257 |
258 | SELECT
259 | SUBSTRING(image_url,
260 | LOCATE('/', image_url) + 2,
261 | LENGTH(image_url) - LOCATE('/', image_url)) AS IMAGE_URL
262 | FROM
263 | ig_clone.photos;
264 | -- --------------------------------------------------------------------------------------------------------------
265 |
266 | /*Ques.18 The average time on the platform */
267 |
268 | SELECT
269 | ROUND(AVG(DATEDIFF(CURRENT_TIMESTAMP, created_at)/360), 2) as Total_Years_on_Platform
270 | FROM
271 | ig_clone.users;
272 | -- --------------------------------------------------------------------------------------------------------------
273 |
274 | /*CREATING STORED PROCEDURES */
275 |
276 | /*Ques.1 Popular hashtags list*/
277 |
278 | CREATE PROCEDURE `spPopularTags`()
279 | BEGIN
280 | SELECT
281 | t.tag_name, COUNT(tag_name) AS 'HashtagCounts'
282 | FROM
283 | ig_clone.tags t
284 | JOIN
285 | ig_clone.photo_tags pt ON t.id = pt.tag_id
286 | GROUP BY t.id , 1
287 | ORDER BY 2 DESC;
288 | END //
289 |
290 | CALL `ig_clone`.`spPopularTags`();
291 | -- --------------------------------------------------------------------------------------------------------------
292 |
293 | /*Ques.2 Users who have engaged atleast one time on the platform*/
294 |
295 | CREATE PROCEDURE `spEngagedUser`()
296 | BEGIN
297 | SELECT DISTINCT
298 | username
299 | FROM
300 | ig_clone.users u
301 | INNER JOIN
302 | ig_clone.photos p ON p.user_id = u.id
303 | INNER JOIN
304 | ig_clone.likes l ON l.user_id = p.user_id
305 | WHERE
306 | p.id IS NOT NULL
307 | OR l.user_id IS NOT NULL;
308 | END //
309 |
310 | CALL `ig_clone`.`spEngagedUser`();
311 | -- --------------------------------------------------------------------------------------------------------------
312 |
313 | /*Ques.3 Total number of comments by the users on the platform */
314 |
315 | CREATE PROCEDURE `spUserComments`()
316 | BEGIN
317 | SELECT
318 | COUNT(*) as 'Total Number of Comments'
319 | FROM (
320 | SELECT
321 | c.user_id, u.username
322 | FROM ig_clone.users u
323 | JOIN ig_clone.comments c ON u.id = c.user_idusers
324 | WHERE
325 | c.comment_text IS NOT NULL
326 | GROUP BY u.username , c.user_id) as Table1;
327 | END //
328 |
329 | CALL `ig_clone`.`spUserComments`();
330 | -- --------------------------------------------------------------------------------------------------------------
331 |
332 | /*Ques.4 The username, image posted, tags used and comments made by a specific user*/
333 |
334 | CREATE PROCEDURE `spUserInfo`(IN userid INT(11))
335 | BEGIN
336 | SELECT
337 | u.id, u.username, p.image_url, c.comment_text, t.tag_name
338 | FROM
339 | ig_clone.users u
340 | INNER JOIN
341 | ig_clone.photos p ON p.user_id = u.id
342 | INNER JOIN
343 | ig_clone.comments c ON c.user_id = u.id
344 | INNER JOIN
345 | ig_clone.photo_tags pt ON pt.photo_id = p.id
346 | INNER JOIN
347 | ig_clone.tags t ON t.id = pt.tag_id
348 | where u.id = userid;
349 | END //
350 |
351 | CALL `ig_clone`.`spUserInfo`(2);
352 | -- --------------------------------------------------------------------------------------------------------------
353 |
--------------------------------------------------------------------------------
/MunicipalityDataAnalysisDashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/MunicipalityDataAnalysisDashboard.png
--------------------------------------------------------------------------------
/PostgreSQL-BI-CHALLENGE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/PostgreSQL-BI-CHALLENGE
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Portfolio Projects
2 | ## Following are my projects in SQL, Python, Tableau & Excel:
3 | #### *You can take a look at my personal website : [www.priyankajha24.wixsite.com](www.priyankajha24.wixsite.com/aboutme)*
4 | #### *You can also take a look at my Linkedin : [PriyankaJhaTheAnalyst](https://www.linkedin.com/in/priyankajhatheanalyst/)*
5 |
6 |
7 |
8 | - [x] **SQL & Tableau** -
9 | - Instagram Clone Data Analysis Project
10 | *See on Tableau Public:* **[Dashboard](https://public.tableau.com/views/InstagramCloneDataAnalysisDashboard/InstagramCloneDataAnalysisDashboard?:language=en-US&:display_count=n&:origin=viz_share_link)**
11 | *Review the Data Insertion SQL Script:* **[HERE](https://github.com/PriyankaJhaTheAnalyst/DataAnalystPortfolioProjects/blob/main/Instagram%20Clone%20SQL%20-%20Database%20%26%20Inserting%20Data.sql)**
12 | *Review the Data Exploration SQL Script:* **[HERE](https://github.com/PriyankaJhaTheAnalyst/DataAnalystPortfolioProjects/blob/main/Instagram%20Clone%20SQL%20-%20Exploratory%20Data%20Analysis.sql)**
13 |
14 | 
15 |
16 |
17 |
18 | - [x] **Hadoop (Hive)** -
19 | - NYC Yellow Taxi Records: Data Analysis
20 | *Review the HiveQL Script:* **[HERE](https://github.com/PriyankaJhaTheAnalyst/DataAnalystPortfolioProjects/blob/main/Hadoop(Hive)%20-%20NYC%20Yellow%20Taxi%20Case%20Study.txt)**
21 |
22 |
23 |
24 | - [x] **SQL** -
25 | - Nashville Housing Dataset: Data Cleaning
26 | *Review the SQL Script:* **[HERE](https://github.com/PriyankaJhaTheAnalyst/DataAnalystPortfolioProjects/blob/main/SQL%20-%20Data%20Cleaning.sql)**
27 |
28 | - COVID-19 Dataset: Data Exploration
29 | *Review the SQL Script:* **[HERE](https://github.com/PriyankaJhaTheAnalyst/DataAnalystPortfolioProjects/blob/main/SQL%20-%20Data%20Exploration.sql)**
30 |
31 |
32 |
33 | - [x] **PostgreSQL** -
34 | - Business Intelligence Challenge
35 | *Review the PostgreSQL Script:* **[HERE](https://github.com/PriyankaJhaTheAnalyst/DataAnalystPortfolioProjects/blob/main/PostgreSQL-BI-CHALLENGE)**
36 | *Review the Google Slides Deck to see the Data Visualizations:* **[HERE](https://drive.google.com/file/d/1JIDnsaLXAx2qnWM86yfrRKLWF5B_ofHU/view?usp=sharing)**
37 |
38 |
39 |
40 | - [x] **Python** -
41 | - Movies Industry Dataset: Exploratory Data Analysis Project
42 | *Read the complete Project Analysis in Markdown:* **[HERE](https://github.com/PriyankaJhaTheAnalyst/Python_MoviesIndustry_EDA/blob/main/README.md)**
43 |
44 |
45 |
46 | - [x] **Tableau** -
47 |
48 | *To view these Tableau Dashboards in Tableau Public, click on the hypertext link.*
49 |
50 | - #MakeoverMonday 2020 Week 32 | Benefits of Working from Home: https://public.tableau.com/views/MakeoverMonday2020Week32BenefitsofWorkingfromHome/WORKFROMHOME?:language=en-GB&:display_count=n&:origin=viz_share_link
51 |
52 | 
53 |
54 | - Municipality Data Analysis Dashboard: https://public.tableau.com/views/MunicipalityDashboard/Municipality?:language=en-GB&:display_count=n&:origin=viz_share_link
55 |
56 | 
57 |
58 | - GROVER Junior Data Analyst Case Study Dashboard: https://public.tableau.com/views/GROVERJuniorDataAnalystCaseStudy/GroverDataAnalystDashboard?:language=en-GB&:display_count=n&:origin=viz_share_link
59 |
60 | 
61 |
62 |
63 | - Retail Pricing Analytics Dashboard: https://public.tableau.com/app/profile/priyankajhatheanalyst/viz/RetailPriceAnalyticsDashboard/RetailPricingAnalytics#2
64 |
65 | 
66 |
67 |
68 | - E-commerce Sales Dashboard: https://public.tableau.com/app/profile/priyankajhatheanalyst/viz/eCommerceSales_16310934566250/ExploratoryVisualAnalysis
69 |
70 | 
71 |
72 |
73 | - Sales SuperStore Deep Data Analysis (5 Dashboards): https://public.tableau.com/app/profile/priyankajhatheanalyst/viz/SuperStoreDataset5Dashboards/1KPIDashboard
74 |
75 | 1 KPI Dashboard
76 |
77 | 
78 |
79 | 2 Top-Down Dashboard
80 |
81 | 
82 |
83 | 3 Q&A Dashboard
84 |
85 | 
86 |
87 | 4 Bottom-Up Dashboard
88 |
89 | 
90 |
91 | 5 Geo Chart
92 |
93 | 
94 |
95 |
96 |
97 | - World Bank CO2 Emissions Dashboard: https://public.tableau.com/views/WorldBankCo2Emissions_16149182681650/Dashboard1?:language=en-GB&:display_count=n&:origin=viz_share_link
98 |
99 | 
100 |
101 |
102 | - London Bus Safety Dashboard: https://public.tableau.com/views/TableauCertificationCourse2/ChartsDashboard?:language=en-GB&:display_count=n&:origin=viz_share_link
103 |
104 | 
105 |
106 |
107 |
108 | - [x] **Excel** -
109 |
110 | *Kindly download these Excel files from this repository and view them in Microsoft Excel.*
111 |
112 |
113 | - Sales Superstore Sample: Sales Performance Dashboard
114 |
115 | 
116 |
117 |
118 | - NetTRON Network Infrastructure Data : LOOKUP, INDEX, MATCH, SUMIFS
119 |
120 | 
121 |
122 | 
123 |
124 |
125 | - Shipping Data: Pivot Tables, Pivot Chart, Slicers
126 |
127 | 
128 |
129 |
130 | - Project Costing Model Data: Scenario Manager, Solver (Data Modeling)
131 |
132 | 
133 |
134 | 
135 |
136 | 
137 |
138 | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
139 |
--------------------------------------------------------------------------------
/SQL - Data Cleaning.sql:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | Cleaning Data in SQL
4 | Skills used : CREATE, UPDATE, SELECT, CTE, JOINS, OREDR BY, GROUP BY
5 | */
6 |
7 |
8 | select *
9 | from nashville_housing
10 |
11 | --------------------------------------------------------------------------------------------------------------------------
12 |
13 |
14 | /*Standardize Date Format*/
15 |
16 |
17 | select `Sale Date`
18 | from nashville_housing
19 |
20 |
21 | select `Sale Date`, convert(`Sale Date`, date)
22 | from nashville_housing
23 |
24 |
25 | update nashville_housing
26 | set `Sale Date`= convert(`Sale Date`, date)
27 |
28 | --------------------------------------------------------------------------------------------------------------------------
29 |
30 |
31 | /*Populate Property Address Data*/
32 |
33 |
34 | select *
35 | from nashville_housing
36 | where `Property Address` is null
37 | order by `Parcel ID`
38 |
39 |
40 | select a.`Parcel ID` , b.`Property Address`, b.`Parcel ID` ,
41 | b.`Property Address` ,
42 | ifnull(a.`Property Address`, b.`Property Address`) as `Address To Be Filled`
43 | from nashville_housing a
44 | join nashville_housing b
45 | on a.`Parcel ID` = b.`Parcel ID`
46 | and a.Column1 != b.Column1
47 | where a.`Property Address` is null
48 |
49 |
50 | update a
51 | set a.`Property Address` = ifnull(a.`Property Address`, b.`Property Address`)
52 | from nashville_housing a
53 | join nashville_housing b
54 | on a.`Parcel ID` = b.`Parcel ID`
55 | and a.Column1 != b.Column1
56 | where a.`Property Address` is null
57 |
58 | --------------------------------------------------------------------------------------------------------------------------
59 |
60 |
61 | /*Breaking out Address into Individual Columns (Address, City, State)*/
62 |
63 |
64 | select `Property Address`
65 | from nashville_housing
66 |
67 |
68 | select
69 | substring(`Property Address` , 1, locate(',', `Property Address`)) - 1 as Address ,
70 | substring(`Property Address` , locate(',', `Property Address`) + 1 , length(`Property Address`)) as Address,
71 | from nashville_housing
72 |
73 |
74 | alter table nashville_housing
75 | set `Property Split Address` varchar(255);
76 |
77 |
78 | update nashville_housing
79 | set `Property Split Address` = substring(`Property Address` , 1, locate(',', `Property Address`)) - 1
80 |
81 |
82 | alter table nashville_housing
83 | set `Property City` varchar(255);
84 |
85 |
86 | update nashville_housing
87 | set `Property City` = substring(`Property Address` , locate(',', `Property Address`) + 1 , length(`Property Address`))
88 |
89 |
90 | select `Owner Address`
91 | from nashville_housing
92 |
93 | --------------------------------------------------------------------------------------------------------------------------
94 |
95 |
96 | /*CREATING A SPLIT STRING FUNCTION TO SPLIT THE OWNER ADDRESS*/
97 |
98 |
99 | create function SPLIT_STR(
100 | x varchar(255),
101 | delim varchar(12),
102 | pos int
103 | )
104 | return varchar(255)
105 | return replace(substring(substring_index(x, delim, pos),
106 | length(substring_index(x, delim, pos -1)) + 1),
107 | delim, '');
108 |
109 |
110 |
111 | select
112 | SPLIT_STR(`Owner Address`, ',', 1),
113 | SPLIT_STR(`Owner Address`, ',', 2),
114 | SPLIT_STR(`Owner Address`, ',', 3)
115 | from nashville_housing
116 |
117 |
118 | alter table nashville_housing
119 | set `Address` varchar(255);
120 |
121 |
122 | update nashville_housing
123 | set `Address` = SPLIT_STR(`Owner Address`, ',', 1)
124 |
125 |
126 | alter table nashville_housing
127 | set `City` varchar(255);
128 |
129 |
130 | update nashville_housing
131 | set `City` = SPLIT_STR(`Owner Address`, ',', 2)
132 |
133 |
134 | alter table nashville_housing
135 | set `State` varchar(255);
136 |
137 |
138 | update nashville_housing
139 | set `State` = SPLIT_STR(`Owner Address`, ',', 3)
140 |
141 | --------------------------------------------------------------------------------------------------------------------------
142 |
143 |
144 | /*Change Y and N to Yes and No in `Sold as Vacant` */
145 |
146 |
147 | select distinct (`Sold As Vacant`), count(`Sold As Vacant`)
148 | from nashville_housing
149 | group by `Sold As Vacant`
150 | order by `Sold As Vacant`
151 |
152 |
153 | select `Sold As Vacant` ,
154 | case
155 | when `Sold As Vacant` = 'Y' then 'Yes'
156 | when `Sold As Vacant` = 'N' then 'No'
157 | end as `Sold As Vacant`
158 | from nashville_housing
159 |
160 |
161 | update nashville_housing
162 | set `Sold As Vacant` = case
163 | when `Sold As Vacant` = 'Y' then 'Yes'
164 | when `Sold As Vacant` = 'N' then 'No'
165 | end as `Sold As Vacant`
166 |
167 | --------------------------------------------------------------------------------------------------------------------------
168 |
169 |
170 | /*Remove Duplicate*/
171 |
172 |
173 | with RowNumCTE
174 | as
175 | (
176 | select *,
177 | row_number() OVER(
178 | partition by
179 | `Parcel ID`,
180 | `Property Address`,
181 | `Sale Price`,
182 | `Sale Date`,
183 | `Legal Reference`
184 | order by
185 | Column1
186 | ) row_num
187 | from nashville_housing
188 | )
189 | delete
190 | from RowNumCTE
191 | where row_num > 1;
192 |
193 | --------------------------------------------------------------------------------------------------------------------------
194 |
195 |
196 | /*Delete Unused Column*/
197 |
198 |
199 | alter table nashville_housing
200 | drop table `Owner Address`, `Tax District`, `Property Address`, `Sale Date`;
201 |
202 | --------------------------------------------------------------------------------------------------------------------------
203 |
204 |
205 |
--------------------------------------------------------------------------------
/SQL - Data Exploration.sql:
--------------------------------------------------------------------------------
1 | /*
2 | Covid 19 Data Exploration
3 | Skills used: Joins, CTE's, Temp Tables, Windows Functions, Aggregate Functions, Creating Views, Converting Data Types
4 | */
5 |
6 | --------------------------------------------------------------------------------------------------------------------------
7 |
8 |
9 | select *
10 | from coviddeaths_csv
11 | where continent is not null
12 | order by 3, 4
13 |
14 | select location , total_cases , new_cases , total_deaths , population
15 | from coviddeaths_csv
16 | order by 1 ,2
17 |
18 | --------------------------------------------------------------------------------------------------------------------------
19 |
20 |
21 | /*Looking at Total Cases vs Total Deaths*/
22 |
23 | select location , `date` ,total_cases , total_deaths ,
24 | (total_cases/population)*100 as death_percentage
25 | from coviddeaths_csv
26 | where location = 'Afghanistan'
27 | order by 1 ,2
28 |
29 | --------------------------------------------------------------------------------------------------------------------------
30 |
31 |
32 | /*Looking at Total Cases vs Population*/
33 |
34 | select location , `date` , population ,total_cases , total_deaths , (total_cases/population)*100 as death_percentage
35 | from coviddeaths_csv
36 | where location = 'Africa'
37 | order by 1 ,2
38 |
39 | --------------------------------------------------------------------------------------------------------------------------
40 |
41 |
42 | /*Looking at Countries with Highest Infection rate compared to Population*/
43 |
44 | select location , population , max(total_cases) ,
45 | max(total_cases/population)*100 as percent_population_infected
46 | from coviddeaths_csv
47 | group by location, population
48 | order by percent_population_infected desc
49 |
50 | --------------------------------------------------------------------------------------------------------------------------
51 |
52 |
53 | /*Showing Countries with Highest Death Count per Population*/
54 |
55 | select location , sum(total_deaths) as total_death_count
56 | from coviddeaths_csv
57 | where continent is not null
58 | group by location
59 | order by total_death_count desc
60 |
61 | --------------------------------------------------------------------------------------------------------------------------
62 |
63 |
64 | /*BREAKING THINGS DOWN BY CONTINENT
65 |
66 | Showing contintents with the highest death count per population*/
67 |
68 | Select continent, MAX(cast(Total_deaths as int)) as total_death_count
69 | From coviddeaths_csv
70 | Where continent is not null
71 | Group by continent
72 | order by total_death_count desc
73 |
74 | --------------------------------------------------------------------------------------------------------------------------
75 |
76 |
77 | /*GLOBAL NUMBERS*/
78 |
79 | select `date` , sum(new_cases) as total_cases , sum(new_deaths) total_deaths ,
80 | sum(new_deaths)/sum(new_cases)*100 as new_death_percentage
81 | from coviddeaths_csv
82 | where continent is not null
83 | group by `date`
84 |
85 | --------------------------------------------------------------------------------------------------------------------------
86 |
87 |
88 | /*Looking at Total Population vs Vaccination*/
89 |
90 | select cd.continent , cd.location , cd.`date` , cd.population, cv.new_vaccinations
91 | from coviddeaths_csv cd
92 | join covidvacinations_csv cv
93 | on cd.location = cv.location
94 | and cd.`date` = cv.`date`
95 | where cv.new_vaccinations is not null
96 |
97 | --------------------------------------------------------------------------------------------------------------------------
98 |
99 |
100 | /*Using CTE*/
101 |
102 | with PopsVsVacc (Continent, Location, Date, New_Vaccination, RollingPeopleVaccinated)
103 | as
104 | (
105 | select cd.continent , cd.location , cd.`date` , cv.new_vaccinations,
106 | sum(cast (cv.new_vaccinations as int)) OVER
107 | (partition by cd.location order by cd.location, cd.`date`)
108 | as rolling_people_vaccinated,
109 | (rolling_people_vaccinated/population)*100
110 | from coviddeaths_csv cd
111 | join covidvacinations_csv cv
112 | on cd.location = cv.location
113 | and cd.`date` = cv.`date`
114 | where cv.new_vaccinations is not null
115 | )
116 |
117 | select *, (RollingPeopleVaccinated/Population)*100 as vaccination_percentage
118 | from PopsVsVacc
119 |
120 | --------------------------------------------------------------------------------------------------------------------------
121 |
122 |
123 | /*TEMP TABLE*/
124 |
125 | drop table if exists percentage_population_vaccinated
126 | create temporary table percentage_population_vaccinated
127 | (
128 | Continent varchar(255),
129 | Location varchar(255),
130 | Date datetime,
131 | Population numeric,
132 | New_vaccinations numeric,
133 | RollingPeopleVaccinated numeric
134 | )
135 |
136 | insert into percentage_population_vaccinated
137 | select cd.continent , cd.location , cd.`date` , cv.new_vaccinations,
138 | SUM(cv.new_vaccinations) OVER(partition by cv.location order by cv.location ,cv.`date` desc)
139 | as rolling_people_vaccinated
140 | from coviddeaths_csv cd
141 | join covidvacinations_csv cv
142 | on cd.location = cv.location
143 | and cd.`date` = cv.`date`
144 | where cv.new_vaccinations is not null
145 |
146 | select *, (RollingPeopleVaccinated/Population)*100 as vaccination_percentage
147 | from percentage_population_vaccinated
148 |
149 | --------------------------------------------------------------------------------------------------------------------------
150 |
151 |
152 | /*Creating View to store data for later visualization*/
153 |
154 | create view percentage_population_vaccinated as
155 | select cd.continent , cd.location , cd.`date` , cv.new_vaccinations,
156 | SUM(cv.new_vaccinations) OVER(partition by cv.location order by cv.location ,cv.`date` desc)
157 | as rolling_people_vaccinated
158 | from coviddeaths_csv cd
159 | join covidvacinations_csv cv
160 | on cd.location = cv.location
161 | and cd.`date` = cv.`date`
162 | where cv.new_vaccinations is not null
163 |
164 | --------------------------------------------------------------------------------------------------------------------------
165 |
166 |
--------------------------------------------------------------------------------
/visuals/BottomUpDashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/visuals/BottomUpDashboard.png
--------------------------------------------------------------------------------
/visuals/E-commerceRetail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/visuals/E-commerceRetail.png
--------------------------------------------------------------------------------
/visuals/GeoChart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/visuals/GeoChart.png
--------------------------------------------------------------------------------
/visuals/GroverDataAnalystDashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/visuals/GroverDataAnalystDashboard.png
--------------------------------------------------------------------------------
/visuals/InstagramCloneDashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/visuals/InstagramCloneDashboard.png
--------------------------------------------------------------------------------
/visuals/KPIDashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/visuals/KPIDashboard.png
--------------------------------------------------------------------------------
/visuals/LondonBusSafety.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/visuals/LondonBusSafety.png
--------------------------------------------------------------------------------
/visuals/MunicipalityDataAnalysisDashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/visuals/MunicipalityDataAnalysisDashboard.png
--------------------------------------------------------------------------------
/visuals/Q&ADashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/visuals/Q&ADashboard.png
--------------------------------------------------------------------------------
/visuals/RetailPricingAnalytics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/visuals/RetailPricingAnalytics.png
--------------------------------------------------------------------------------
/visuals/TopDownDashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/visuals/TopDownDashboard.png
--------------------------------------------------------------------------------
/visuals/WorkFromHome.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/visuals/WorkFromHome.png
--------------------------------------------------------------------------------
/visuals/WorldBankCO2Emission.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/visuals/WorldBankCO2Emission.png
--------------------------------------------------------------------------------
/visuals/excel/Dashboards.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/visuals/excel/Dashboards.png
--------------------------------------------------------------------------------
/visuals/excel/DataModeling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/visuals/excel/DataModeling.png
--------------------------------------------------------------------------------
/visuals/excel/INDEX.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/visuals/excel/INDEX.png
--------------------------------------------------------------------------------
/visuals/excel/LOOKUP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/visuals/excel/LOOKUP.png
--------------------------------------------------------------------------------
/visuals/excel/PivotReports.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/visuals/excel/PivotReports.png
--------------------------------------------------------------------------------
/visuals/excel/ScenarioManager.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/visuals/excel/ScenarioManager.png
--------------------------------------------------------------------------------
/visuals/excel/Solver.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PriyankaJhaTheDeveloper/DataAnalystPortfolioProjects/7e69f79079f2655878387deb6a15e86d85d60d39/visuals/excel/Solver.png
--------------------------------------------------------------------------------