├── logo.png ├── Schemas.sql ├── Business Problems Netflix.sql ├── Solutions of 15 business problems.sql └── README.md /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/najirh/netflix_sql_project/HEAD/logo.png -------------------------------------------------------------------------------- /Schemas.sql: -------------------------------------------------------------------------------- 1 | -- SCHEMAS of Netflix 2 | 3 | DROP TABLE IF EXISTS netflix; 4 | CREATE TABLE netflix 5 | ( 6 | show_id VARCHAR(5), 7 | type VARCHAR(10), 8 | title VARCHAR(250), 9 | director VARCHAR(550), 10 | casts VARCHAR(1050), 11 | country VARCHAR(550), 12 | date_added VARCHAR(55), 13 | release_year INT, 14 | rating VARCHAR(15), 15 | duration VARCHAR(15), 16 | listed_in VARCHAR(250), 17 | description VARCHAR(550) 18 | ); 19 | 20 | SELECT * FROM netflix; -------------------------------------------------------------------------------- /Business Problems Netflix.sql: -------------------------------------------------------------------------------- 1 | -- 15 Business Problems & Solutions 2 | 3 | 1. Count the number of Movies vs TV Shows 4 | 2. Find the most common rating for movies and TV shows 5 | 3. List all movies released in a specific year (e.g., 2020) 6 | 4. Find the top 5 countries with the most content on Netflix 7 | 5. Identify the longest movie 8 | 6. Find content added in the last 5 years 9 | 7. Find all the movies/TV shows by director 'Rajiv Chilaka'! 10 | 8. List all TV shows with more than 5 seasons 11 | 9. Count the number of content items in each genre 12 | 10.Find each year and the average numbers of content release in India on netflix. 13 | return top 5 year with highest avg content release! 14 | 11. List all movies that are documentaries 15 | 12. Find all content without a director 16 | 13. Find how many movies actor 'Salman Khan' appeared in last 10 years! 17 | 14. Find the top 10 actors who have appeared in the highest number of movies produced in India. 18 | 15. 19 | Categorize the content based on the presence of the keywords 'kill' and 'violence' in 20 | the description field. Label content containing these keywords as 'Bad' and all other 21 | content as 'Good'. Count how many items fall into each category. 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /Solutions of 15 business problems.sql: -------------------------------------------------------------------------------- 1 | -- Netflix Data Analysis using SQL 2 | -- Solutions of 15 business problems 3 | -- 1. Count the number of Movies vs TV Shows 4 | 5 | SELECT 6 | type, 7 | COUNT(*) 8 | FROM netflix 9 | GROUP BY 1 10 | 11 | -- 2. Find the most common rating for movies and TV shows 12 | 13 | WITH RatingCounts AS ( 14 | SELECT 15 | type, 16 | rating, 17 | COUNT(*) AS rating_count 18 | FROM netflix 19 | GROUP BY type, rating 20 | ), 21 | RankedRatings AS ( 22 | SELECT 23 | type, 24 | rating, 25 | rating_count, 26 | RANK() OVER (PARTITION BY type ORDER BY rating_count DESC) AS rank 27 | FROM RatingCounts 28 | ) 29 | SELECT 30 | type, 31 | rating AS most_frequent_rating 32 | FROM RankedRatings 33 | WHERE rank = 1; 34 | 35 | 36 | -- 3. List all movies released in a specific year (e.g., 2020) 37 | 38 | SELECT * 39 | FROM netflix 40 | WHERE release_year = 2020 41 | 42 | 43 | -- 4. Find the top 5 countries with the most content on Netflix 44 | 45 | SELECT * 46 | FROM 47 | ( 48 | SELECT 49 | -- country, 50 | UNNEST(STRING_TO_ARRAY(country, ',')) as country, 51 | COUNT(*) as total_content 52 | FROM netflix 53 | GROUP BY 1 54 | )as t1 55 | WHERE country IS NOT NULL 56 | ORDER BY total_content DESC 57 | LIMIT 5 58 | 59 | 60 | -- 5. Identify the longest movie 61 | 62 | SELECT 63 | * 64 | FROM netflix 65 | WHERE type = 'Movie' 66 | ORDER BY SPLIT_PART(duration, ' ', 1)::INT DESC 67 | 68 | 69 | -- 6. Find content added in the last 5 years 70 | SELECT 71 | * 72 | FROM netflix 73 | WHERE TO_DATE(date_added, 'Month DD, YYYY') >= CURRENT_DATE - INTERVAL '5 years' 74 | 75 | 76 | -- 7. Find all the movies/TV shows by director 'Rajiv Chilaka'! 77 | 78 | SELECT * 79 | FROM 80 | ( 81 | 82 | SELECT 83 | *, 84 | UNNEST(STRING_TO_ARRAY(director, ',')) as director_name 85 | FROM 86 | netflix 87 | ) 88 | WHERE 89 | director_name = 'Rajiv Chilaka' 90 | 91 | 92 | 93 | -- 8. List all TV shows with more than 5 seasons 94 | 95 | SELECT * 96 | FROM netflix 97 | WHERE 98 | TYPE = 'TV Show' 99 | AND 100 | SPLIT_PART(duration, ' ', 1)::INT > 5 101 | 102 | 103 | -- 9. Count the number of content items in each genre 104 | 105 | SELECT 106 | UNNEST(STRING_TO_ARRAY(listed_in, ',')) as genre, 107 | COUNT(*) as total_content 108 | FROM netflix 109 | GROUP BY 1 110 | 111 | 112 | -- 10. Find each year and the average numbers of content release by India on netflix. 113 | -- return top 5 year with highest avg content release ! 114 | 115 | 116 | SELECT 117 | country, 118 | release_year, 119 | COUNT(show_id) as total_release, 120 | ROUND( 121 | COUNT(show_id)::numeric/ 122 | (SELECT COUNT(show_id) FROM netflix WHERE country = 'India')::numeric * 100 123 | ,2 124 | ) 125 | as avg_release 126 | FROM netflix 127 | WHERE country = 'India' 128 | GROUP BY country, 2 129 | ORDER BY avg_release DESC 130 | LIMIT 5 131 | 132 | 133 | -- 11. List all movies that are documentaries 134 | SELECT * FROM netflix 135 | WHERE listed_in LIKE '%Documentaries' 136 | 137 | 138 | 139 | -- 12. Find all content without a director 140 | SELECT * FROM netflix 141 | WHERE director IS NULL 142 | 143 | 144 | -- 13. Find how many movies actor 'Salman Khan' appeared in last 10 years! 145 | 146 | SELECT * FROM netflix 147 | WHERE 148 | casts LIKE '%Salman Khan%' 149 | AND 150 | release_year > EXTRACT(YEAR FROM CURRENT_DATE) - 10 151 | 152 | 153 | -- 14. Find the top 10 actors who have appeared in the highest number of movies produced in India. 154 | 155 | 156 | 157 | SELECT 158 | UNNEST(STRING_TO_ARRAY(casts, ',')) as actor, 159 | COUNT(*) 160 | FROM netflix 161 | WHERE country = 'India' 162 | GROUP BY 1 163 | ORDER BY 2 DESC 164 | LIMIT 10 165 | 166 | /* 167 | Question 15: 168 | Categorize the content based on the presence of the keywords 'kill' and 'violence' in 169 | the description field. Label content containing these keywords as 'Bad' and all other 170 | content as 'Good'. Count how many items fall into each category. 171 | */ 172 | 173 | 174 | SELECT 175 | category, 176 | TYPE, 177 | COUNT(*) AS content_count 178 | FROM ( 179 | SELECT 180 | *, 181 | CASE 182 | WHEN description ILIKE '%kill%' OR description ILIKE '%violence%' THEN 'Bad' 183 | ELSE 'Good' 184 | END AS category 185 | FROM netflix 186 | ) AS categorized_content 187 | GROUP BY 1,2 188 | ORDER BY 2 189 | 190 | 191 | 192 | 193 | -- End of reports 194 | 195 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Netflix Movies and TV Shows Data Analysis using SQL 2 | 3 | ![](https://github.com/najirh/netflix_sql_project/blob/main/logo.png) 4 | 5 | ## Overview 6 | This project involves a comprehensive analysis of Netflix's movies and TV shows data using SQL. The goal is to extract valuable insights and answer various business questions based on the dataset. The following README provides a detailed account of the project's objectives, business problems, solutions, findings, and conclusions. 7 | 8 | ## Objectives 9 | 10 | - Analyze the distribution of content types (movies vs TV shows). 11 | - Identify the most common ratings for movies and TV shows. 12 | - List and analyze content based on release years, countries, and durations. 13 | - Explore and categorize content based on specific criteria and keywords. 14 | 15 | ## Dataset 16 | 17 | The data for this project is sourced from the Kaggle dataset: 18 | 19 | - **Dataset Link:** [Movies Dataset](https://www.kaggle.com/datasets/shivamb/netflix-shows?resource=download) 20 | 21 | ## Schema 22 | 23 | ```sql 24 | DROP TABLE IF EXISTS netflix; 25 | CREATE TABLE netflix 26 | ( 27 | show_id VARCHAR(5), 28 | type VARCHAR(10), 29 | title VARCHAR(250), 30 | director VARCHAR(550), 31 | casts VARCHAR(1050), 32 | country VARCHAR(550), 33 | date_added VARCHAR(55), 34 | release_year INT, 35 | rating VARCHAR(15), 36 | duration VARCHAR(15), 37 | listed_in VARCHAR(250), 38 | description VARCHAR(550) 39 | ); 40 | ``` 41 | 42 | ## Business Problems and Solutions 43 | 44 | ### 1. Count the Number of Movies vs TV Shows 45 | 46 | ```sql 47 | SELECT 48 | type, 49 | COUNT(*) 50 | FROM netflix 51 | GROUP BY 1; 52 | ``` 53 | 54 | **Objective:** Determine the distribution of content types on Netflix. 55 | 56 | ### 2. Find the Most Common Rating for Movies and TV Shows 57 | 58 | ```sql 59 | WITH RatingCounts AS ( 60 | SELECT 61 | type, 62 | rating, 63 | COUNT(*) AS rating_count 64 | FROM netflix 65 | GROUP BY type, rating 66 | ), 67 | RankedRatings AS ( 68 | SELECT 69 | type, 70 | rating, 71 | rating_count, 72 | RANK() OVER (PARTITION BY type ORDER BY rating_count DESC) AS rank 73 | FROM RatingCounts 74 | ) 75 | SELECT 76 | type, 77 | rating AS most_frequent_rating 78 | FROM RankedRatings 79 | WHERE rank = 1; 80 | ``` 81 | 82 | **Objective:** Identify the most frequently occurring rating for each type of content. 83 | 84 | ### 3. List All Movies Released in a Specific Year (e.g., 2020) 85 | 86 | ```sql 87 | SELECT * 88 | FROM netflix 89 | WHERE release_year = 2020; 90 | ``` 91 | 92 | **Objective:** Retrieve all movies released in a specific year. 93 | 94 | ### 4. Find the Top 5 Countries with the Most Content on Netflix 95 | 96 | ```sql 97 | SELECT * 98 | FROM 99 | ( 100 | SELECT 101 | UNNEST(STRING_TO_ARRAY(country, ',')) AS country, 102 | COUNT(*) AS total_content 103 | FROM netflix 104 | GROUP BY 1 105 | ) AS t1 106 | WHERE country IS NOT NULL 107 | ORDER BY total_content DESC 108 | LIMIT 5; 109 | ``` 110 | 111 | **Objective:** Identify the top 5 countries with the highest number of content items. 112 | 113 | ### 5. Identify the Longest Movie 114 | 115 | ```sql 116 | SELECT 117 | * 118 | FROM netflix 119 | WHERE type = 'Movie' 120 | ORDER BY SPLIT_PART(duration, ' ', 1)::INT DESC; 121 | ``` 122 | 123 | **Objective:** Find the movie with the longest duration. 124 | 125 | ### 6. Find Content Added in the Last 5 Years 126 | 127 | ```sql 128 | SELECT * 129 | FROM netflix 130 | WHERE TO_DATE(date_added, 'Month DD, YYYY') >= CURRENT_DATE - INTERVAL '5 years'; 131 | ``` 132 | 133 | **Objective:** Retrieve content added to Netflix in the last 5 years. 134 | 135 | ### 7. Find All Movies/TV Shows by Director 'Rajiv Chilaka' 136 | 137 | ```sql 138 | SELECT * 139 | FROM ( 140 | SELECT 141 | *, 142 | UNNEST(STRING_TO_ARRAY(director, ',')) AS director_name 143 | FROM netflix 144 | ) AS t 145 | WHERE director_name = 'Rajiv Chilaka'; 146 | ``` 147 | 148 | **Objective:** List all content directed by 'Rajiv Chilaka'. 149 | 150 | ### 8. List All TV Shows with More Than 5 Seasons 151 | 152 | ```sql 153 | SELECT * 154 | FROM netflix 155 | WHERE type = 'TV Show' 156 | AND SPLIT_PART(duration, ' ', 1)::INT > 5; 157 | ``` 158 | 159 | **Objective:** Identify TV shows with more than 5 seasons. 160 | 161 | ### 9. Count the Number of Content Items in Each Genre 162 | 163 | ```sql 164 | SELECT 165 | UNNEST(STRING_TO_ARRAY(listed_in, ',')) AS genre, 166 | COUNT(*) AS total_content 167 | FROM netflix 168 | GROUP BY 1; 169 | ``` 170 | 171 | **Objective:** Count the number of content items in each genre. 172 | 173 | ### 10.Find each year and the average numbers of content release in India on netflix. 174 | return top 5 year with highest avg content release! 175 | 176 | ```sql 177 | SELECT 178 | country, 179 | release_year, 180 | COUNT(show_id) AS total_release, 181 | ROUND( 182 | COUNT(show_id)::numeric / 183 | (SELECT COUNT(show_id) FROM netflix WHERE country = 'India')::numeric * 100, 2 184 | ) AS avg_release 185 | FROM netflix 186 | WHERE country = 'India' 187 | GROUP BY country, release_year 188 | ORDER BY avg_release DESC 189 | LIMIT 5; 190 | ``` 191 | 192 | **Objective:** Calculate and rank years by the average number of content releases by India. 193 | 194 | ### 11. List All Movies that are Documentaries 195 | 196 | ```sql 197 | SELECT * 198 | FROM netflix 199 | WHERE listed_in LIKE '%Documentaries'; 200 | ``` 201 | 202 | **Objective:** Retrieve all movies classified as documentaries. 203 | 204 | ### 12. Find All Content Without a Director 205 | 206 | ```sql 207 | SELECT * 208 | FROM netflix 209 | WHERE director IS NULL; 210 | ``` 211 | 212 | **Objective:** List content that does not have a director. 213 | 214 | ### 13. Find How Many Movies Actor 'Salman Khan' Appeared in the Last 10 Years 215 | 216 | ```sql 217 | SELECT * 218 | FROM netflix 219 | WHERE casts LIKE '%Salman Khan%' 220 | AND release_year > EXTRACT(YEAR FROM CURRENT_DATE) - 10; 221 | ``` 222 | 223 | **Objective:** Count the number of movies featuring 'Salman Khan' in the last 10 years. 224 | 225 | ### 14. Find the Top 10 Actors Who Have Appeared in the Highest Number of Movies Produced in India 226 | 227 | ```sql 228 | SELECT 229 | UNNEST(STRING_TO_ARRAY(casts, ',')) AS actor, 230 | COUNT(*) 231 | FROM netflix 232 | WHERE country = 'India' 233 | GROUP BY actor 234 | ORDER BY COUNT(*) DESC 235 | LIMIT 10; 236 | ``` 237 | 238 | **Objective:** Identify the top 10 actors with the most appearances in Indian-produced movies. 239 | 240 | ### 15. Categorize Content Based on the Presence of 'Kill' and 'Violence' Keywords 241 | 242 | ```sql 243 | SELECT 244 | category, 245 | COUNT(*) AS content_count 246 | FROM ( 247 | SELECT 248 | CASE 249 | WHEN description ILIKE '%kill%' OR description ILIKE '%violence%' THEN 'Bad' 250 | ELSE 'Good' 251 | END AS category 252 | FROM netflix 253 | ) AS categorized_content 254 | GROUP BY category; 255 | ``` 256 | 257 | **Objective:** Categorize content as 'Bad' if it contains 'kill' or 'violence' and 'Good' otherwise. Count the number of items in each category. 258 | 259 | ## Findings and Conclusion 260 | 261 | - **Content Distribution:** The dataset contains a diverse range of movies and TV shows with varying ratings and genres. 262 | - **Common Ratings:** Insights into the most common ratings provide an understanding of the content's target audience. 263 | - **Geographical Insights:** The top countries and the average content releases by India highlight regional content distribution. 264 | - **Content Categorization:** Categorizing content based on specific keywords helps in understanding the nature of content available on Netflix. 265 | 266 | This analysis provides a comprehensive view of Netflix's content and can help inform content strategy and decision-making. 267 | 268 | 269 | 270 | ## Author - Zero Analyst 271 | 272 | This project is part of my portfolio, showcasing the SQL skills essential for data analyst roles. If you have any questions, feedback, or would like to collaborate, feel free to get in touch! 273 | 274 | ### Stay Updated and Join the Community 275 | 276 | For more content on SQL, data analysis, and other data-related topics, make sure to follow me on social media and join our community: 277 | 278 | - **YouTube**: [Subscribe to my channel for tutorials and insights](https://www.youtube.com/@zero_analyst) 279 | - **Instagram**: [Follow me for daily tips and updates](https://www.instagram.com/zero_analyst/) 280 | - **LinkedIn**: [Connect with me professionally](https://www.linkedin.com/in/najirr) 281 | - **Discord**: [Join our community to learn and grow together](https://discord.gg/36h5f2Z5PK) 282 | 283 | Thank you for your support, and I look forward to connecting with you! 284 | --------------------------------------------------------------------------------