├── Amazon Web Scraper Project.ipynb ├── COVID Project - Data Exploration.sql ├── Data Cleaning Project Queries.sql ├── Indian Census.sql ├── README.md ├── SQL Project on Zomato Analytics.sql ├── SQL Project on Zomato Analytics ├── SQL Project on Zomato Analytics.sql ├── Screenshot (30).png ├── Screenshot (31).png ├── Screenshot (32).png ├── Screenshot (33).png ├── Screenshot (34).png └── Zomato Table Script.txt └── Shark Tank Analysis Project ├── SHARK TANK DATAAS.xlsx ├── SHRK TANK DATA.xlsx └── SQL Project on Shark Tank India.sql /Amazon Web Scraper Project.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "23ec5670", 6 | "metadata": {}, 7 | "source": [ 8 | "# Amazon Web Scraper Project" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "6f52d05c", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "# import libraries \n", 19 | "\n", 20 | "from bs4 import BeautifulSoup\n", 21 | "import requests\n", 22 | "import time\n", 23 | "import datetime\n", 24 | "\n", 25 | "import smtplib" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 29, 31 | "id": "717e8a45", 32 | "metadata": {}, 33 | "outputs": [ 34 | { 35 | "name": "stdout", 36 | "output_type": "stream", 37 | "text": [ 38 | "\n", 39 | " Demon Slayer Men's Short-Sleeve T-Shirt\n", 40 | " \n" 41 | ] 42 | } 43 | ], 44 | "source": [ 45 | "# Connect to Website and pull in data\n", 46 | "\n", 47 | "URL = 'https://www.amazon.com/dp/B081THZ9S8/ref=syn_sd_onsite_desktop_131?ie=UTF8&psc=1&pd_rd_plhdr=t'\n", 48 | "\n", 49 | "headers = {\"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36\", \"Accept-Encoding\":\"gzip, deflate\", \"Accept\":\"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\", \"DNT\":\"1\",\"Connection\":\"close\", \"Upgrade-Insecure-Requests\":\"1\"}\n", 50 | "\n", 51 | "page = requests.get(URL, headers=headers)\n", 52 | "\n", 53 | "soup1 = BeautifulSoup(page.content, \"html.parser\")\n", 54 | "\n", 55 | "soup2 = BeautifulSoup(soup1.prettify(), \"html.parser\")\n", 56 | "\n", 57 | "title = soup2.find(id='productTitle').get_text()\n", 58 | "\n", 59 | "\n", 60 | "\n", 61 | "print(title)\n" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 31, 67 | "id": "0a5820de", 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | "Demon Slayer Men's Short-Sleeve T-Shirt\n" 75 | ] 76 | } 77 | ], 78 | "source": [ 79 | "# Clean up the data a little bit\n", 80 | "\n", 81 | "title = title.strip()\n", 82 | "\n", 83 | "print(title)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 33, 89 | "id": "c5bff919", 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "name": "stdout", 94 | "output_type": "stream", 95 | "text": [ 96 | "2023-01-07\n" 97 | ] 98 | } 99 | ], 100 | "source": [ 101 | "# Create a Timestamp for your output to track when data was collected\n", 102 | "\n", 103 | "import datetime\n", 104 | "\n", 105 | "today = datetime.date.today()\n", 106 | "\n", 107 | "print(today)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 37, 113 | "id": "f51f0b79", 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "# Create CSV and write headers and data into the file\n", 118 | "\n", 119 | "import csv \n", 120 | "\n", 121 | "header = ['Title', 'Date']\n", 122 | "data = [title, today]\n", 123 | "\n", 124 | "\n", 125 | "with open('AmazonWebScraperDataset.csv', 'w', newline='', encoding='UTF8') as f:\n", 126 | " writer = csv.writer(f)\n", 127 | " writer.writerow(header)\n", 128 | " writer.writerow(data)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 47, 134 | "id": "09fdc431", 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "name": "stdout", 139 | "output_type": "stream", 140 | "text": [ 141 | " Title Date\n", 142 | "0 Demon Slayer Men's Short-Sleeve T-Shirt 2023-01-07\n" 143 | ] 144 | } 145 | ], 146 | "source": [ 147 | "import pandas as pd\n", 148 | "\n", 149 | "df = pd.read_csv(r'C:\\Users\\hp\\Desktop\\AmazonWebScraperDataset.csv')\n", 150 | "\n", 151 | "print(df)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 48, 157 | "id": "9f51a3ee", 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "#Now we are appending data to the csv\n", 162 | "\n", 163 | "with open('AmazonWebScraperDataset.csv', 'a+', newline='', encoding='UTF8') as f:\n", 164 | " writer = csv.writer(f)\n", 165 | " writer.writerow(data)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 49, 171 | "id": "d847f406", 172 | "metadata": {}, 173 | "outputs": [ 174 | { 175 | "name": "stdout", 176 | "output_type": "stream", 177 | "text": [ 178 | "\n", 179 | " Demon Slayer Men's Short-Sleeve T-Shirt\n", 180 | " \n", 181 | "Demon Slayer Men's Short-Sleeve T-Shirt\n", 182 | "2023-01-07\n", 183 | " Title Date\n", 184 | "0 Demon Slayer Men's Short-Sleeve T-Shirt 2023-01-07\n" 185 | ] 186 | } 187 | ], 188 | "source": [ 189 | "#Combine all of the above code into one function\n", 190 | "\n", 191 | "\n", 192 | "URL = 'https://www.amazon.com/dp/B081THZ9S8/ref=syn_sd_onsite_desktop_131?ie=UTF8&psc=1&pd_rd_plhdr=t'\n", 193 | "\n", 194 | "headers = {\"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36\", \"Accept-Encoding\":\"gzip, deflate\", \"Accept\":\"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\", \"DNT\":\"1\",\"Connection\":\"close\", \"Upgrade-Insecure-Requests\":\"1\"}\n", 195 | "\n", 196 | "page = requests.get(URL, headers=headers)\n", 197 | "\n", 198 | "soup1 = BeautifulSoup(page.content, \"html.parser\")\n", 199 | "\n", 200 | "soup2 = BeautifulSoup(soup1.prettify(), \"html.parser\")\n", 201 | "\n", 202 | "title = soup2.find(id='productTitle').get_text()\n", 203 | "\n", 204 | "\n", 205 | "\n", 206 | "print(title)\n", 207 | "\n", 208 | "title = title.strip()\n", 209 | "\n", 210 | "print(title)\n", 211 | "\n", 212 | "import datetime\n", 213 | "\n", 214 | "today = datetime.date.today()\n", 215 | "\n", 216 | "print(today)\n", 217 | "\n", 218 | "import csv \n", 219 | "\n", 220 | "header = ['Title', 'Date']\n", 221 | "data = [title, today]\n", 222 | "\n", 223 | "\n", 224 | "with open('AmazonWebScraperDataset.csv', 'w', newline='', encoding='UTF8') as f:\n", 225 | " writer = csv.writer(f)\n", 226 | " writer.writerow(header)\n", 227 | " writer.writerow(data)\n", 228 | "\n", 229 | "import pandas as pd\n", 230 | "\n", 231 | "df = pd.read_csv(r'C:\\Users\\hp\\Desktop\\AmazonWebScraperDataset.csv')\n", 232 | "\n", 233 | "print(df)\n", 234 | "\n", 235 | "#Now we are appending data to the csv\n", 236 | "\n", 237 | "with open('AmazonWebScraperDataset.csv', 'a+', newline='', encoding='UTF8') as f:\n", 238 | " writer = csv.writer(f)\n", 239 | " writer.writerow(data)\n", 240 | " " 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 51, 246 | "id": "82671ecc", 247 | "metadata": {}, 248 | "outputs": [ 249 | { 250 | "name": "stdout", 251 | "output_type": "stream", 252 | "text": [ 253 | " Title Date\n", 254 | "0 Demon Slayer Men's Short-Sleeve T-Shirt 2023-01-07\n" 255 | ] 256 | } 257 | ], 258 | "source": [ 259 | "import pandas as pd\n", 260 | "\n", 261 | "df = pd.read_csv(r'C:\\Users\\hp\\Desktop\\AmazonWebScraperDataset.csv')\n", 262 | "\n", 263 | "print(df)" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 52, 269 | "id": "d4d42fbb", 270 | "metadata": {}, 271 | "outputs": [], 272 | "source": [ 273 | "# If uou want to try sending yourself an email (just for fun) when a price hits below a certain level you can try it\n", 274 | "# out with this script\n", 275 | "\n", 276 | "def send_mail():\n", 277 | " server = smtplib.SMTP_SSL('smtp.gmail.com',465)\n", 278 | " server.ehlo()\n", 279 | " #server.starttls()\n", 280 | " server.ehlo()\n", 281 | " server.login('sachintukumar1609@gmail.com','xxxxxxxxxxxxxx')\n", 282 | " \n", 283 | " subject = \"The Shirt you want is below $15! Now is your chance to buy!\"\n", 284 | " body = \"Sachin, This is the moment we have been waiting for. Now is your chance to pick up the shirt of your dreams. Don't mess it up! Link here: https://www.amazon.com/Funny-Data-Systems-Business-Analyst/dp/B07FNW9FGJ/ref=sr_1_3?dchild=1&keywords=data+analyst+tshirt&qid=1626655184&sr=8-3\"\n", 285 | " \n", 286 | " msg = f\"Subject: {subject}\\n\\n{body}\"\n", 287 | " \n", 288 | " server.sendmail(\n", 289 | " 'sachintukumar1609@gmail.com',\n", 290 | " msg\n", 291 | " \n", 292 | " )" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": null, 298 | "id": "3a7226ad", 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [] 302 | } 303 | ], 304 | "metadata": { 305 | "kernelspec": { 306 | "display_name": "Python 3 (ipykernel)", 307 | "language": "python", 308 | "name": "python3" 309 | }, 310 | "language_info": { 311 | "codemirror_mode": { 312 | "name": "ipython", 313 | "version": 3 314 | }, 315 | "file_extension": ".py", 316 | "mimetype": "text/x-python", 317 | "name": "python", 318 | "nbconvert_exporter": "python", 319 | "pygments_lexer": "ipython3", 320 | "version": "3.9.13" 321 | } 322 | }, 323 | "nbformat": 4, 324 | "nbformat_minor": 5 325 | } 326 | -------------------------------------------------------------------------------- /COVID Project - Data Exploration.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Covid 19 Data Exploration 3 | Skills used: Joins, CTE's, Temp Tables, Windows Functions, Aggregate Functions, Creating Views, Converting Data Types 4 | */ 5 | 6 | Select * 7 | From PortfolioProject..CovidDeaths 8 | Where continent is not null 9 | order by 3,4 10 | 11 | 12 | -- Select Data that we are going to be starting with 13 | 14 | Select Location, date, total_cases, new_cases, total_deaths, population 15 | From PortfolioProject..CovidDeaths 16 | Where continent is not null 17 | order by 1,2 18 | 19 | 20 | -- Total Cases vs Total Deaths 21 | -- Shows likelihood of dying if you contract covid in your country 22 | 23 | Select Location, date, total_cases,total_deaths, (total_deaths/total_cases)*100 as DeathPercentage 24 | From PortfolioProject..CovidDeaths 25 | Where location like '%states%' 26 | and continent is not null 27 | order by 1,2 28 | 29 | 30 | -- Total Cases vs Population 31 | -- Shows what percentage of population infected with Covid 32 | 33 | Select Location, date, Population, total_cases, (total_cases/population)*100 as PercentPopulationInfected 34 | From PortfolioProject..CovidDeaths 35 | --Where location like '%states%' 36 | order by 1,2 37 | 38 | 39 | -- Countries with Highest Infection Rate compared to Population 40 | 41 | Select Location, Population, MAX(total_cases) as HighestInfectionCount, Max((total_cases/population))*100 as PercentPopulationInfected 42 | From PortfolioProject..CovidDeaths 43 | --Where location like '%states%' 44 | Group by Location, Population 45 | order by PercentPopulationInfected desc 46 | 47 | 48 | -- Countries with Highest Death Count per Population 49 | 50 | Select Location, MAX(cast(Total_deaths as int)) as TotalDeathCount 51 | From PortfolioProject..CovidDeaths 52 | --Where location like '%states%' 53 | Where continent is not null 54 | Group by Location 55 | order by TotalDeathCount desc 56 | 57 | 58 | 59 | -- BREAKING THINGS DOWN BY CONTINENT 60 | 61 | -- Showing contintents with the highest death count per population 62 | 63 | Select continent, MAX(cast(Total_deaths as int)) as TotalDeathCount 64 | From PortfolioProject..CovidDeaths 65 | --Where location like '%states%' 66 | Where continent is not null 67 | Group by continent 68 | order by TotalDeathCount desc 69 | 70 | 71 | 72 | -- GLOBAL NUMBERS 73 | 74 | Select SUM(new_cases) as total_cases, SUM(cast(new_deaths as int)) as total_deaths, SUM(cast(new_deaths as int))/SUM(New_Cases)*100 as DeathPercentage 75 | From PortfolioProject..CovidDeaths 76 | --Where location like '%states%' 77 | where continent is not null 78 | --Group By date 79 | order by 1,2 80 | 81 | 82 | 83 | -- Total Population vs Vaccinations 84 | -- Shows Percentage of Population that has recieved at least one Covid Vaccine 85 | 86 | Select dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations 87 | , SUM(CONVERT(int,vac.new_vaccinations)) OVER (Partition by dea.Location Order by dea.location, dea.Date) as RollingPeopleVaccinated 88 | --, (RollingPeopleVaccinated/population)*100 89 | From PortfolioProject..CovidDeaths dea 90 | Join PortfolioProject..CovidVaccinations vac 91 | On dea.location = vac.location 92 | and dea.date = vac.date 93 | where dea.continent is not null 94 | order by 2,3 95 | 96 | 97 | -- Using CTE to perform Calculation on Partition By in previous query 98 | 99 | With PopvsVac (Continent, Location, Date, Population, New_Vaccinations, RollingPeopleVaccinated) 100 | as 101 | ( 102 | Select dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations 103 | , SUM(CONVERT(int,vac.new_vaccinations)) OVER (Partition by dea.Location Order by dea.location, dea.Date) as RollingPeopleVaccinated 104 | --, (RollingPeopleVaccinated/population)*100 105 | From PortfolioProject..CovidDeaths dea 106 | Join PortfolioProject..CovidVaccinations vac 107 | On dea.location = vac.location 108 | and dea.date = vac.date 109 | where dea.continent is not null 110 | --order by 2,3 111 | ) 112 | Select *, (RollingPeopleVaccinated/Population)*100 113 | From PopvsVac 114 | 115 | 116 | 117 | -- Using Temp Table to perform Calculation on Partition By in previous query 118 | 119 | DROP Table if exists #PercentPopulationVaccinated 120 | Create Table #PercentPopulationVaccinated 121 | ( 122 | Continent nvarchar(255), 123 | Location nvarchar(255), 124 | Date datetime, 125 | Population numeric, 126 | New_vaccinations numeric, 127 | RollingPeopleVaccinated numeric 128 | ) 129 | 130 | Insert into #PercentPopulationVaccinated 131 | Select dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations 132 | , SUM(CONVERT(int,vac.new_vaccinations)) OVER (Partition by dea.Location Order by dea.location, dea.Date) as RollingPeopleVaccinated 133 | --, (RollingPeopleVaccinated/population)*100 134 | From PortfolioProject..CovidDeaths dea 135 | Join PortfolioProject..CovidVaccinations vac 136 | On dea.location = vac.location 137 | and dea.date = vac.date 138 | --where dea.continent is not null 139 | --order by 2,3 140 | 141 | Select *, (RollingPeopleVaccinated/Population)*100 142 | From #PercentPopulationVaccinated 143 | 144 | 145 | 146 | 147 | -- Creating View to store data for later visualizations 148 | 149 | Create View PercentPopulationVaccinated as 150 | Select dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations 151 | , SUM(CONVERT(int,vac.new_vaccinations)) OVER (Partition by dea.Location Order by dea.location, dea.Date) as RollingPeopleVaccinated 152 | --, (RollingPeopleVaccinated/population)*100 153 | From PortfolioProject..CovidDeaths dea 154 | Join PortfolioProject..CovidVaccinations vac 155 | On dea.location = vac.location 156 | and dea.date = vac.date 157 | where dea.continent is not null -------------------------------------------------------------------------------- /Data Cleaning Project Queries.sql: -------------------------------------------------------------------------------- 1 | -- Cleaning Data in SQL Queries 2 | 3 | Select * 4 | From [SQL Project].dbo.NashvilleHousing 5 | 6 | -- Standardize Date Format 7 | 8 | Select SaleDateConverted,CONVERT(Date,SaleDate) 9 | From [SQL Project].dbo.NashvilleHousing 10 | 11 | Update NashvilleHousing 12 | SET SaleDate = CONVERT(Date,SaleDate) 13 | 14 | -- -- If it doesn't Update properly 15 | 16 | ALTER TABLE NashvilleHousing 17 | Add SaleDateConverted Date; 18 | 19 | Update NashvilleHousing 20 | SET SaleDateConverted = CONVERT(Date,SaleDate) 21 | 22 | --- Populate Property Address data 23 | 24 | Select * 25 | From [SQL Project].dbo.Nashvillehousing 26 | --where PropertyAddress is null 27 | order by ParcelID 28 | 29 | 30 | 31 | Select a.ParcelID,a.PropertyAddress, b.ParcelID,b.PropertyAddress, ISNULL(a.propertyaddress,b.PropertyAddress) 32 | From [SQL Project].dbo.Nashvillehousing a 33 | JOIN [SQL Project].dbo.NashvilleHousing b 34 | on a.ParcelID = b.ParcelID 35 | AND a.[UniqueID ] <> b.[UniqueID ] 36 | WHERE a.PropertyAddress is null 37 | 38 | 39 | Update a 40 | SET PropertyAddress = ISNULL(a.PropertyAddress,b.PropertyAddress) 41 | From [SQL Project].dbo.NashvilleHousing a 42 | JOIN [SQL Project].dbo.NashvilleHousing b 43 | on a.ParcelID = b.ParcelID 44 | AND a.[UniqueID ] <> b.[UniqueID ] 45 | Where a.PropertyAddress is null 46 | 47 | 48 | ---- Breaking out Address into Individual Columns (Address, City, State) 49 | 50 | Select PropertyAddress 51 | From [SQL Project].dbo.NashvilleHousing 52 | --Where PropertyAddress is null 53 | --order by ParcelID 54 | 55 | SELECT 56 | SUBSTRING(PropertyAddress, 1, CHARINDEX(',', PropertyAddress) -1 ) as Address 57 | , SUBSTRING(PropertyAddress, CHARINDEX(',', PropertyAddress) + 1 , LEN(PropertyAddress)) as Address 58 | 59 | From [SQL Project].dbo.NashvilleHousing 60 | 61 | 62 | ALTER TABLE NashvilleHousing 63 | Add PropertySplitAddress Nvarchar(255); 64 | 65 | Update NashvilleHousing 66 | SET PropertySplitAddress = SUBSTRING(PropertyAddress, 1, CHARINDEX(',', PropertyAddress) -1 ) 67 | 68 | 69 | ALTER TABLE NashvilleHousing 70 | Add PropertySplitCity Nvarchar(255); 71 | 72 | Update NashvilleHousing 73 | SET PropertySplitCity = SUBSTRING(PropertyAddress, CHARINDEX(',', PropertyAddress) + 1 , LEN(PropertyAddress)) 74 | 75 | Select * 76 | From [SQL Project].dbo.NashvilleHousing 77 | 78 | 79 | Select OwnerAddress 80 | From [SQL Project].dbo.NashvilleHousing 81 | 82 | Select 83 | PARSENAME(REPLACE(OwnerAddress, ',', '.') , 3) 84 | ,PARSENAME(REPLACE(OwnerAddress, ',', '.') , 2) 85 | ,PARSENAME(REPLACE(OwnerAddress, ',', '.') , 1) 86 | From [SQL Project].dbo.NashvilleHousing 87 | 88 | 89 | ALTER TABLE NashvilleHousing 90 | Add OwnerSplitAddress Nvarchar(255); 91 | 92 | Update NashvilleHousing 93 | SET OwnerSplitAddress = PARSENAME(REPLACE(OwnerAddress, ',', '.') , 3) 94 | 95 | 96 | ALTER TABLE NashvilleHousing 97 | Add OwnerSplitCity Nvarchar(255); 98 | 99 | Update NashvilleHousing 100 | SET OwnerSplitCity = PARSENAME(REPLACE(OwnerAddress, ',', '.') , 2) 101 | 102 | ALTER TABLE NashvilleHousing 103 | Add OwnerSplitState Nvarchar(255); 104 | 105 | Update NashvilleHousing 106 | SET OwnerSplitState = PARSENAME(REPLACE(OwnerAddress, ',', '.') , 1) 107 | 108 | 109 | Select * 110 | From [SQL Project].dbo.NashvilleHousing 111 | 112 | 113 | ------ -- Change Y and N to Yes and No in "Sold as Vacant" field 114 | 115 | Select Distinct(SoldAsVacant), Count(SoldAsVacant) 116 | From [SQL Project].dbo.NashvilleHousing 117 | Group by SoldAsVacant 118 | order by 2 119 | 120 | 121 | Select SoldAsVacant 122 | , CASE When SoldAsVacant = 'Y' THEN 'Yes' 123 | When SoldAsVacant = 'N' THEN 'No' 124 | ELSE SoldAsVacant 125 | END 126 | From [SQL Project].dbo.NashvilleHousing 127 | 128 | Update NashvilleHousing 129 | SET SoldAsVacant = CASE When SoldAsVacant = 'Y' THEN 'Yes' 130 | When SoldAsVacant = 'N' THEN 'No' 131 | ELSE SoldAsVacant 132 | END 133 | 134 | -- Remove Duplicates 135 | 136 | WITH RowNumCTE AS( 137 | Select *, 138 | ROW_NUMBER() OVER ( 139 | PARTITION BY ParcelID, 140 | PropertyAddress, 141 | SalePrice, 142 | SaleDate, 143 | LegalReference 144 | ORDER BY 145 | UniqueID 146 | ) row_num 147 | 148 | From [SQL Project].dbo.NashvilleHousing 149 | --order by ParcelID 150 | ) 151 | Select * 152 | From RowNumCTE 153 | Where row_num > 1 154 | Order by PropertyAddress 155 | 156 | Select * 157 | From [SQL Project].dbo.NashvilleHousing 158 | 159 | 160 | -- Delete Unused Columns 161 | 162 | 163 | 164 | Select * 165 | From [SQL Project].dbo.NashvilleHousing 166 | 167 | ALTER TABLE [SQL Project].dbo.NashvilleHousing 168 | DROP COLUMN TaxDistrict, PropertyAddress 169 | 170 | 171 | ----------------------------------------------------------------------------------------------- 172 | ----------------------------------------------------------------------------------------------- 173 | 174 | --- Importing Data using OPENROWSET and BULK INSERT 175 | 176 | -- More advanced and looks cooler, but have to configure server appropriately to do correctly 177 | -- Wanted to provide this in case you wanted to try it 178 | 179 | 180 | --sp_configure 'show advanced options', 1; 181 | --RECONFIGURE; 182 | --GO 183 | --sp_configure 'Ad Hoc Distributed Queries', 1; 184 | --RECONFIGURE; 185 | --GO 186 | 187 | 188 | 189 | 190 | 191 | -------------------------------------------------------------------------------- /Indian Census.sql: -------------------------------------------------------------------------------- 1 | select * from [SQL Project].dbo.Data1 2 | 3 | select * from [SQL Project].dbo.Data2 4 | 5 | --number of rows into our dataset 6 | 7 | select count(*) from [SQL Project]..Data1 8 | select count(*) from [SQL Project]..Data2 9 | 10 | -- dataset for jharkhand and bihar 11 | 12 | select * from [SQL Project].dbo.Data1 where state in ('Jharkhand' , 'bihar') 13 | 14 | -- population of india 15 | 16 | select sum(population) as population from [SQL Project]..Data2 17 | 18 | -- avg growth 19 | 20 | select state,avg(growth)*100 avg_growth from [SQL Project]..data1 group by state; 21 | 22 | --avg sex ratio 23 | 24 | 25 | -- avg literacy rate 26 | 27 | select state,round(avg(literacy),0) avg_literacy_ratio from [SQL Project]..data1 28 | group by state having round(avg(literacy),0)>90 order by avg_literacy_ratio desc ; 29 | 30 | -- top 3 state showing highest growth ratio 31 | 32 | select top 3 state,avg(growth)*100 avg_growth from [SQL Project]..data1 group by state order by avg_growth desc ; 33 | 34 | -- bottom 3 state showing lowest growth ratio 35 | 36 | select top 3 state,round(avg(sex_ratio),0) avg_sex_ratio from [SQL Project]..data1 group by state order by avg_sex_ratio asc; 37 | 38 | -- top and bottom 3 states in literacy state 39 | 40 | drop table if exists #bottomstates; 41 | create table #bottomstates 42 | ( state nvarchar(255), 43 | bottomstates float 44 | 45 | ) 46 | 47 | insert into #bottomstates 48 | select state,round(avg(literacy),0) avg_literacy_ratio from [SQL Project]..data1 49 | group by state order by avg_literacy_ratio desc; 50 | 51 | select top 3 * from #bottomstates order by #bottomstates.bottomstates asc; 52 | 53 | -- union operator 54 | select * from ( 55 | select top 3 * from #topstates order by #topstates.topstates desc) a 56 | 57 | union 58 | 59 | select * from ( 60 | select top 3 * from #bottomstates order by #bottomstates.bottomstates asc) b; 61 | 62 | -- state start with letter a 63 | 64 | select distinct state from [SQL Project]..Data1 where lower(state) like 'a%' or lower(state) like'b%' 65 | 66 | select distinct state from [SQL Project]..Data1 where lower(state) like 'a%' and lower(state) like'%m' 67 | 68 | -- join both table 69 | 70 | 71 | --- total literacy rate 72 | select c.state,sum(literate_people)total_literate_pop,sum(illiterate_people) total_lliterate_pop from 73 | (select d.district,d.state,round(d.literacy_ratio*d.population,0) literate_people, 74 | round((1-d.literacy_ratio)* d.population,0) illiterate_people from 75 | (select a.district,a.state,a.literacy/100 literacy_ratio,b.population from [SQL Project]..data1 a 76 | inner join [SQL Project]..Data2 b on a.District=b.District) d) c 77 | group by c.state 78 | 79 | -- population in previous election 80 | 81 | 82 | select sum(m.previous_census_population) previous_census_population,sum(m.current_census_population) current_census_population from( 83 | select e.state,sum(e.previous_census_population) previous_census_population,sum(e.current_census_population) current_census_population from 84 | (select d.district,d.state,round(d.population/(1+d.growth),0) previous_census_population,d.population current_census_population from 85 | (select a.district,a.state,a.growth growth,b.population from project..data1 a inner join project..data2 b on a.district=b.district) d) e 86 | group by e.state)m 87 | 88 | 89 | -- population vs area 90 | 91 | select (g.total_area/g.previous_census_population) as previous_census_population_vs_area, (g.total_area/g.current_census_population) as 92 | current_census_population_vs_area from 93 | (select q.*,r.total_area from ( 94 | 95 | select '1' as keyy,n.* from 96 | (select sum(m.previous_census_population) previous_census_population,sum(m.current_census_population) current_census_population from( 97 | select e.state,sum(e.previous_census_population) previous_census_population,sum(e.current_census_population) current_census_population from 98 | (select d.district,d.state,round(d.population/(1+d.growth),0) previous_census_population,d.population current_census_population from 99 | (select a.district,a.state,a.growth growth,b.population from project..data1 a inner join project..data2 b on a.district=b.district) d) e 100 | group by e.state)m) n) q inner join ( 101 | 102 | select '1' as keyy,z.* from ( 103 | select sum(area_km2) total_area from project..data2)z) r on q.keyy=r.keyy)g 104 | 105 | --window 106 | 107 | output top 3 districts from each state with highest literacy rate 108 | 109 | 110 | select a.* from 111 | (select district,state,literacy,rank() over(partition by state order by literacy desc) rnk from project..data1) a 112 | 113 | where a.rnk in (1,2,3) order by state 114 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SQL Portfolio Project 2 | 3 | |Medium Blog For this Project| 4 | |-| 5 | https://medium.com/@sachintukumar1609/sql-project-on-zomato-analytics-ad78ffb2342c 6 | ![Zomato-1200](https://user-images.githubusercontent.com/103982094/213903540-d4fb743a-fb9c-4c06-821f-536f381002bb.jpg) 7 | 8 | -------------------------------------------------------------------------------- /SQL Project on Zomato Analytics.sql: -------------------------------------------------------------------------------- 1 | drop table if exists goldusers_signup; 2 | CREATE TABLE goldusers_signup(userid integer,gold_signup_date date); 3 | 4 | INSERT INTO goldusers_signup(userid,gold_signup_date) 5 | VALUES (1,'09-22-2017'), 6 | (3,'04-21-2017'); 7 | 8 | drop table if exists users; 9 | CREATE TABLE users(userid integer,signup_date date); 10 | 11 | INSERT INTO users(userid,signup_date) 12 | VALUES (1,'09-02-2014'), 13 | (2,'01-15-2015'), 14 | (3,'04-11-2014'); 15 | 16 | drop table if exists sales; 17 | CREATE TABLE sales(userid integer,created_date date,product_id integer); 18 | 19 | INSERT INTO sales(userid,created_date,product_id) 20 | VALUES (1,'04-19-2017',2), 21 | (3,'12-18-2019',1), 22 | (2,'07-20-2020',3), 23 | (1,'10-23-2019',2), 24 | (1,'03-19-2018',3), 25 | (3,'12-20-2016',2), 26 | (1,'11-09-2016',1), 27 | (1,'05-20-2016',3), 28 | (2,'09-24-2017',1), 29 | (1,'03-11-2017',2), 30 | (1,'03-11-2016',1), 31 | (3,'11-10-2016',1), 32 | (3,'12-07-2017',2), 33 | (3,'12-15-2016',2), 34 | (2,'11-08-2017',2), 35 | (2,'09-10-2018',3); 36 | 37 | 38 | drop table if exists product; 39 | CREATE TABLE product(product_id integer,product_name text,price integer); 40 | 41 | INSERT INTO product(product_id,product_name,price) 42 | VALUES 43 | (1,'p1',980), 44 | (2,'p2',870), 45 | (3,'p3',330); 46 | 47 | 48 | select * from sales; 49 | select * from product; 50 | select * from goldusers_signup; 51 | select * from users; 52 | 53 | 54 | 55 | 1 ---- what is total amount each customer spent on zomato ? 56 | 57 | select a.userid,sum(b.price) total_amt_spent from sales a inner join product b on a.product_id=b.product_id 58 | group by a.userid 59 | 60 | 61 | 2 ---- How many days has each customer visited zomato? 62 | 63 | select userid,count(distinct created_date) distinct_days 64 | from sales 65 | group by userid; 66 | 67 | 68 | 3 --- what was the first product purchased by each customer? 69 | 70 | select * from 71 | (select*,rank() over (partition by userid order by created_date ) rnk from sales) a where rnk = 1 72 | 73 | 74 | 4 --- what is most purchased item on menu & how many times was it purchased by all customers ? 75 | 76 | select userid,count(product_id) cnt from sales where product_id = 77 | (select top 1 product_id from sales group by product_id order by count(product_id) desc) 78 | group by userid 79 | 80 | 81 | 5 ---- which item was most popular for each customer? 82 | 83 | select * from 84 | (select *,rank() over(partition by userid order by cnt desc) rnk from 85 | (select userid,product_id,count(product_id) cnt from sales group by userid,product_id)a)b 86 | where rnk =1 87 | 88 | 89 | 6 --- which item was purchased first by customer after they become a member ? 90 | 91 | select * from 92 | (select c.*,rank() over (partition by userid order by created_date ) rnk from 93 | (select a.userid,a.created_date,a.product_id,b.gold_signup_date from sales a inner join 94 | goldusers_signup b on a.userid=b.userid and created_date>=gold_signup_date) c)d where rnk=1; 95 | 96 | 97 | 7 --- which item was purchased just before customer became a member? 98 | 99 | select * from 100 | (select c.*,rank() over (partition by userid order by created_date desc ) rnk from 101 | (select a.userid,a.created_date,a.product_id,b.gold_signup_date from sales a inner join 102 | goldusers_signup b on a.userid=b.userid and created_date<=gold_signup_date) c)d where rnk=1; 103 | 104 | 105 | 8 ---- what is total orders and amount spent for each member before they become a member ? 106 | 107 | select userid,count(created_date) order_purchased,sum(price) total_amt_spent from 108 | (select c.*,d.price from 109 | (select a.userid,a.created_date,a.product_id,b.gold_signup_date from sales a inner join 110 | goldusers_signup b on a.userid=b.userid and created_date<=gold_signup_date) c inner join product d on c.product_id=d.product_id)e 111 | group by userid; 112 | 113 | 114 | 9 --- if buying each product generate points for eg 5rs=2 zomato point and each product has different purchasing points 115 | -- for eg for p1 5rs=1 zomato point,for p2 10rs=zomato point and p3 5rs=1 zomato point 2rs =1zomato point 116 | ,--calculate points collected by each customers and for which product most points have been given till now. 117 | 118 | select userid,sum(total_points)*2.5 total_point_earned from 119 | (select e.*,amt/points total_points from 120 | (select d.*,case when product_id=1 then 5 when product_id=2 then 2 when product_id=3 then 5 else 0 end as points from 121 | (select c.userid,c.product_id,sum(price) amt from 122 | (select a.*,b.price from sales a inner join product b on a.product_id=b.product_id)c 123 | group by userid,product_id)d)e)f group by userid; 124 | 125 | 126 | select * from 127 | (select * , rank() over (order by total_point_earned desc) rnk from 128 | (select product_id,sum(total_points) total_point_earned from 129 | (select e.*,amt/points total_points from 130 | (select d.*,case when product_id=1 then 5 when product_id=2 then 2 when product_id=3 then 5 else 0 end as points from 131 | (select c.userid,c.product_id,sum(price) amt from 132 | (select a.*,b.price from sales a inner join product b on a.product_id=b.product_id)c 133 | group by userid,product_id)d)e)f group by product_id)f)g where rnk=1; 134 | 135 | 136 | 137 | select e.*,amt/points total_points from 138 | (select d.*,case when product_id=1 then 5 when product_id=2 then 2 when product_id=3 then 5 else 0 end as points from 139 | (select c.userid,c.product_id,sum(price) amt from 140 | (select a.*,b.price from sales a inner join product b on a.product_id=b.product_id)c 141 | group by userid,product_id)d)e 142 | 143 | 144 | 10 --- in the first one year after customer joins the gold program (including the join date ) irrespective of 145 | -- what customer has purchased earn 5 zomato points for every 10rs spent who earned more more 1 or 3 146 | -- what int earning in first yr ? 1zp = 2rs 147 | 148 | 149 | 150 | select c.*,d.price*0.5 total_points_earned from 151 | (select a.userid,a.created_date,a.product_id,b.gold_signup_date from sales a inner join 152 | goldusers_signup b on a.userid=b.userid and created_date>=gold_signup_date and created_date<=Dateadd(year,1,gold_signup_date))c 153 | inner join product d on c.product_id=d.product_id; 154 | 155 | 156 | 157 | 11 --- rnk all transaction of the customers 158 | 159 | 160 | select*,rank() over (partition by userid order by created_date ) rnk from sales; 161 | 162 | 163 | 12 --- rank all transaction for each member whenever they are zomato gold member for every non gold member transaction mark as na 164 | 165 | select e.*,case when rnk=0 then 'na' else rnk end as rnkk from 166 | (select c.*,cast((case when gold_signup_date is null then 0 else rank() over (partition by userid order by created_date desc) end) as varchar) as rnk from 167 | (select a.userid,a.created_date,a.product_id,b.gold_signup_date from sales a left join 168 | goldusers_signup b on a.userid=b.userid and created_date>=gold_signup_date)c)e; -------------------------------------------------------------------------------- /SQL Project on Zomato Analytics/SQL Project on Zomato Analytics.sql: -------------------------------------------------------------------------------- 1 | drop table if exists goldusers_signup; 2 | CREATE TABLE goldusers_signup(userid integer,gold_signup_date date); 3 | 4 | INSERT INTO goldusers_signup(userid,gold_signup_date) 5 | VALUES (1,'09-22-2017'), 6 | (3,'04-21-2017'); 7 | 8 | drop table if exists users; 9 | CREATE TABLE users(userid integer,signup_date date); 10 | 11 | INSERT INTO users(userid,signup_date) 12 | VALUES (1,'09-02-2014'), 13 | (2,'01-15-2015'), 14 | (3,'04-11-2014'); 15 | 16 | drop table if exists sales; 17 | CREATE TABLE sales(userid integer,created_date date,product_id integer); 18 | 19 | INSERT INTO sales(userid,created_date,product_id) 20 | VALUES (1,'04-19-2017',2), 21 | (3,'12-18-2019',1), 22 | (2,'07-20-2020',3), 23 | (1,'10-23-2019',2), 24 | (1,'03-19-2018',3), 25 | (3,'12-20-2016',2), 26 | (1,'11-09-2016',1), 27 | (1,'05-20-2016',3), 28 | (2,'09-24-2017',1), 29 | (1,'03-11-2017',2), 30 | (1,'03-11-2016',1), 31 | (3,'11-10-2016',1), 32 | (3,'12-07-2017',2), 33 | (3,'12-15-2016',2), 34 | (2,'11-08-2017',2), 35 | (2,'09-10-2018',3); 36 | 37 | 38 | drop table if exists product; 39 | CREATE TABLE product(product_id integer,product_name text,price integer); 40 | 41 | INSERT INTO product(product_id,product_name,price) 42 | VALUES 43 | (1,'p1',980), 44 | (2,'p2',870), 45 | (3,'p3',330); 46 | 47 | 48 | select * from sales; 49 | select * from product; 50 | select * from goldusers_signup; 51 | select * from users; 52 | 53 | 54 | 55 | 1 ---- what is total amount each customer spent on zomato ? 56 | 57 | select a.userid,sum(b.price) total_amt_spent from sales a inner join product b on a.product_id=b.product_id 58 | group by a.userid 59 | 60 | 61 | 2 ---- How many days has each customer visited zomato? 62 | 63 | select userid,count(distinct created_date) distinct_days 64 | from sales 65 | group by userid; 66 | 67 | 68 | 3 --- what was the first product purchased by each customer? 69 | 70 | select * from 71 | (select*,rank() over (partition by userid order by created_date ) rnk from sales) a where rnk = 1 72 | 73 | 74 | 4 --- what is most purchased item on menu & how many times was it purchased by all customers ? 75 | 76 | select userid,count(product_id) cnt from sales where product_id = 77 | (select top 1 product_id from sales group by product_id order by count(product_id) desc) 78 | group by userid 79 | 80 | 81 | 5 ---- which item was most popular for each customer? 82 | 83 | select * from 84 | (select *,rank() over(partition by userid order by cnt desc) rnk from 85 | (select userid,product_id,count(product_id) cnt from sales group by userid,product_id)a)b 86 | where rnk =1 87 | 88 | 89 | 6 --- which item was purchased first by customer after they become a member ? 90 | 91 | select * from 92 | (select c.*,rank() over (partition by userid order by created_date ) rnk from 93 | (select a.userid,a.created_date,a.product_id,b.gold_signup_date from sales a inner join 94 | goldusers_signup b on a.userid=b.userid and created_date>=gold_signup_date) c)d where rnk=1; 95 | 96 | 97 | 7 --- which item was purchased just before customer became a member? 98 | 99 | select * from 100 | (select c.*,rank() over (partition by userid order by created_date desc ) rnk from 101 | (select a.userid,a.created_date,a.product_id,b.gold_signup_date from sales a inner join 102 | goldusers_signup b on a.userid=b.userid and created_date<=gold_signup_date) c)d where rnk=1; 103 | 104 | 105 | 8 ---- what is total orders and amount spent for each member before they become a member ? 106 | 107 | select userid,count(created_date) order_purchased,sum(price) total_amt_spent from 108 | (select c.*,d.price from 109 | (select a.userid,a.created_date,a.product_id,b.gold_signup_date from sales a inner join 110 | goldusers_signup b on a.userid=b.userid and created_date<=gold_signup_date) c inner join product d on c.product_id=d.product_id)e 111 | group by userid; 112 | 113 | 114 | 9 --- if buying each product generate points for eg 5rs=2 zomato point and each product has different purchasing points 115 | -- for eg for p1 5rs=1 zomato point,for p2 10rs=zomato point and p3 5rs=1 zomato point 2rs =1zomato point 116 | ,--calculate points collected by each customers and for which product most points have been given till now. 117 | 118 | select userid,sum(total_points)*2.5 total_point_earned from 119 | (select e.*,amt/points total_points from 120 | (select d.*,case when product_id=1 then 5 when product_id=2 then 2 when product_id=3 then 5 else 0 end as points from 121 | (select c.userid,c.product_id,sum(price) amt from 122 | (select a.*,b.price from sales a inner join product b on a.product_id=b.product_id)c 123 | group by userid,product_id)d)e)f group by userid; 124 | 125 | 126 | select * from 127 | (select * , rank() over (order by total_point_earned desc) rnk from 128 | (select product_id,sum(total_points) total_point_earned from 129 | (select e.*,amt/points total_points from 130 | (select d.*,case when product_id=1 then 5 when product_id=2 then 2 when product_id=3 then 5 else 0 end as points from 131 | (select c.userid,c.product_id,sum(price) amt from 132 | (select a.*,b.price from sales a inner join product b on a.product_id=b.product_id)c 133 | group by userid,product_id)d)e)f group by product_id)f)g where rnk=1; 134 | 135 | 136 | 137 | select e.*,amt/points total_points from 138 | (select d.*,case when product_id=1 then 5 when product_id=2 then 2 when product_id=3 then 5 else 0 end as points from 139 | (select c.userid,c.product_id,sum(price) amt from 140 | (select a.*,b.price from sales a inner join product b on a.product_id=b.product_id)c 141 | group by userid,product_id)d)e 142 | 143 | 144 | 10 --- in the first one year after customer joins the gold program (including the join date ) irrespective of 145 | -- what customer has purchased earn 5 zomato points for every 10rs spent who earned more more 1 or 3 146 | -- what int earning in first yr ? 1zp = 2rs 147 | 148 | 149 | 150 | select c.*,d.price*0.5 total_points_earned from 151 | (select a.userid,a.created_date,a.product_id,b.gold_signup_date from sales a inner join 152 | goldusers_signup b on a.userid=b.userid and created_date>=gold_signup_date and created_date<=Dateadd(year,1,gold_signup_date))c 153 | inner join product d on c.product_id=d.product_id; 154 | 155 | 156 | 157 | 11 --- rnk all transaction of the customers 158 | 159 | 160 | select*,rank() over (partition by userid order by created_date ) rnk from sales; 161 | 162 | 163 | 12 --- rank all transaction for each member whenever they are zomato gold member for every non gold member transaction mark as na 164 | 165 | select e.*,case when rnk=0 then 'na' else rnk end as rnkk from 166 | (select c.*,cast((case when gold_signup_date is null then 0 else rank() over (partition by userid order by created_date desc) end) as varchar) as rnk from 167 | (select a.userid,a.created_date,a.product_id,b.gold_signup_date from sales a left join 168 | goldusers_signup b on a.userid=b.userid and created_date>=gold_signup_date)c)e; -------------------------------------------------------------------------------- /SQL Project on Zomato Analytics/Screenshot (30).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sachinkumar1609/SQL-Portfolio-Projects/98a572738b7e879c76c65416f859c99b2e734f5a/SQL Project on Zomato Analytics/Screenshot (30).png -------------------------------------------------------------------------------- /SQL Project on Zomato Analytics/Screenshot (31).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sachinkumar1609/SQL-Portfolio-Projects/98a572738b7e879c76c65416f859c99b2e734f5a/SQL Project on Zomato Analytics/Screenshot (31).png -------------------------------------------------------------------------------- /SQL Project on Zomato Analytics/Screenshot (32).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sachinkumar1609/SQL-Portfolio-Projects/98a572738b7e879c76c65416f859c99b2e734f5a/SQL Project on Zomato Analytics/Screenshot (32).png -------------------------------------------------------------------------------- /SQL Project on Zomato Analytics/Screenshot (33).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sachinkumar1609/SQL-Portfolio-Projects/98a572738b7e879c76c65416f859c99b2e734f5a/SQL Project on Zomato Analytics/Screenshot (33).png -------------------------------------------------------------------------------- /SQL Project on Zomato Analytics/Screenshot (34).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sachinkumar1609/SQL-Portfolio-Projects/98a572738b7e879c76c65416f859c99b2e734f5a/SQL Project on Zomato Analytics/Screenshot (34).png -------------------------------------------------------------------------------- /SQL Project on Zomato Analytics/Zomato Table Script.txt: -------------------------------------------------------------------------------- 1 | drop table if exists driver; 2 | CREATE TABLE driver(driver_id integer,reg_date date); 3 | 4 | INSERT INTO driver(driver_id,reg_date) 5 | VALUES (1,'01-01-2021'), 6 | (2,'01-03-2021'), 7 | (3,'01-08-2021'), 8 | (4,'01-15-2021'); 9 | 10 | 11 | drop table if exists ingredients; 12 | CREATE TABLE ingredients(ingredients_id integer,ingredients_name varchar(60)); 13 | 14 | INSERT INTO ingredients(ingredients_id ,ingredients_name) 15 | VALUES (1,'BBQ Chicken'), 16 | (2,'Chilli Sauce'), 17 | (3,'Chicken'), 18 | (4,'Cheese'), 19 | (5,'Kebab'), 20 | (6,'Mushrooms'), 21 | (7,'Onions'), 22 | (8,'Egg'), 23 | (9,'Peppers'), 24 | (10,'schezwan sauce'), 25 | (11,'Tomatoes'), 26 | (12,'Tomato Sauce'); 27 | 28 | drop table if exists rolls; 29 | CREATE TABLE rolls(roll_id integer,roll_name varchar(30)); 30 | 31 | INSERT INTO rolls(roll_id ,roll_name) 32 | VALUES (1 ,'Non Veg Roll'), 33 | (2 ,'Veg Roll'); 34 | 35 | drop table if exists rolls_recipes; 36 | CREATE TABLE rolls_recipes(roll_id integer,ingredients varchar(24)); 37 | 38 | INSERT INTO rolls_recipes(roll_id ,ingredients) 39 | VALUES (1,'1,2,3,4,5,6,8,10'), 40 | (2,'4,6,7,9,11,12'); 41 | 42 | drop table if exists driver_order; 43 | CREATE TABLE driver_order(order_id integer,driver_id integer,pickup_time datetime,distance VARCHAR(7),duration VARCHAR(10),cancellation VARCHAR(23)); 44 | INSERT INTO driver_order(order_id,driver_id,pickup_time,distance,duration,cancellation) 45 | VALUES(1,1,'01-01-2021 18:15:34','20km','32 minutes',''), 46 | (2,1,'01-01-2021 19:10:54','20km','27 minutes',''), 47 | (3,1,'01-03-2021 00:12:37','13.4km','20 mins','NaN'), 48 | (4,2,'01-04-2021 13:53:03','23.4','40','NaN'), 49 | (5,3,'01-08-2021 21:10:57','10','15','NaN'), 50 | (6,3,null,null,null,'Cancellation'), 51 | (7,2,'01-08-2020 21:30:45','25km','25mins',null), 52 | (8,2,'01-10-2020 00:15:02','23.4 km','15 minute',null), 53 | (9,2,null,null,null,'Customer Cancellation'), 54 | (10,1,'01-11-2020 18:50:20','10km','10minutes',null); 55 | 56 | 57 | drop table if exists customer_orders; 58 | CREATE TABLE customer_orders(order_id integer,customer_id integer,roll_id integer,not_include_items VARCHAR(4),extra_items_included VARCHAR(4),order_date datetime); 59 | INSERT INTO customer_orders(order_id,customer_id,roll_id,not_include_items,extra_items_included,order_date) 60 | values (1,101,1,'','','01-01-2021 18:05:02'), 61 | (2,101,1,'','','01-01-2021 19:00:52'), 62 | (3,102,1,'','','01-02-2021 23:51:23'), 63 | (3,102,2,'','NaN','01-02-2021 23:51:23'), 64 | (4,103,1,'4','','01-04-2021 13:23:46'), 65 | (4,103,1,'4','','01-04-2021 13:23:46'), 66 | (4,103,2,'4','','01-04-2021 13:23:46'), 67 | (5,104,1,null,'1','01-08-2021 21:00:29'), 68 | (6,101,2,null,null,'01-08-2021 21:03:13'), 69 | (7,105,2,null,'1','01-08-2021 21:20:29'), 70 | (8,102,1,null,null,'01-09-2021 23:54:33'), 71 | (9,103,1,'4','1,5','01-10-2021 11:22:59'), 72 | (10,104,1,null,null,'01-11-2021 18:34:49'), 73 | (10,104,1,'2,6','1,4','01-11-2021 18:34:49'); 74 | -------------------------------------------------------------------------------- /Shark Tank Analysis Project/SHARK TANK DATAAS.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sachinkumar1609/SQL-Portfolio-Projects/98a572738b7e879c76c65416f859c99b2e734f5a/Shark Tank Analysis Project/SHARK TANK DATAAS.xlsx -------------------------------------------------------------------------------- /Shark Tank Analysis Project/SHRK TANK DATA.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sachinkumar1609/SQL-Portfolio-Projects/98a572738b7e879c76c65416f859c99b2e734f5a/Shark Tank Analysis Project/SHRK TANK DATA.xlsx -------------------------------------------------------------------------------- /Shark Tank Analysis Project/SQL Project on Shark Tank India.sql: -------------------------------------------------------------------------------- 1 | select * from [Shark tank project]..data 2 | 3 | -- total episodes 4 | 5 | select max( [Ep# No#]) from [Shark tank project]..data 6 | select count(Distinct [Ep# No#]) from [Shark tank project]..data 7 | 8 | -- pitches 9 | 10 | select count(Distinct [Brand]) from [Shark tank project]..data 11 | 12 | --- pitches converted 13 | 14 | select cast(sum(a.converted_not_converted) as float) /cast(count(*) as float) from ( 15 | select [Amount Invested lakhs] ,case when [Amount Invested lakhs] >0 then 1 else 0 end as converted_not_converted from [Shark tank project]..data) a 16 | 17 | 18 | --- total male 19 | 20 | select sum(male) from [Shark tank project]..data 21 | 22 | --- total female 23 | 24 | select sum(female) from [Shark tank project]..data 25 | 26 | 27 | --- gender ratio 28 | select sum(female)/sum(male) from [Shark tank project]..data 29 | 30 | --- total invested amount 31 | 32 | select sum([Amount Invested lakhs])from [Shark tank project]..data 33 | 34 | 35 | -- avg equity taken 36 | 37 | select avg(a.[Equity Taken %]) from 38 | (select * from [Shark tank project]..data where equitytakenp>0) a 39 | 40 | 41 | 42 | --- highest deal taken 43 | 44 | 45 | select max([amount invested lakhs]) from [Shark tank project]..data 46 | 47 | 48 | --higheest equity taken 49 | 50 | select max([Equity Taken %]) from [Shark tank project]..data 51 | 52 | 53 | -- startups having at least women 54 | 55 | select sum(a.female_count) startups having at least women from ( 56 | select female,case when female>0 then 1 else 0 end as female_count from [Shark tank project]..data) a 57 | 58 | 59 | -- pitches converted having atleast One women 60 | 61 | select * from [Shark tank project]..data 62 | 63 | select sum(b.female_count) from( 64 | 65 | select case when a.female>0 then 1 else 0 end as female_count ,a.*from ( 66 | (select * from [Shark tank project]..data where deal!='No Deal')) a)b 67 | 68 | 69 | select avg([Team members]) from [shark tank project]..data 70 | 71 | 72 | --- amount invested per deal 73 | 74 | select avg(a.[amount invested lakhs]) amount_invested_per_Deal from 75 | (select * from [Shark tank project]..data where deal!='No Deal')a 76 | 77 | 78 | -- avg age group of contestants 79 | 80 | select avg age,count(avg age) cnt from [Shark tank project]..data group by avg age order by cnt desc 81 | 82 | 83 | -- location group of contestants 84 | 85 | select location,count(location) cnt from [Shark tank project]..data group by location order by cnt desc 86 | 87 | 88 | 89 | -- sector group of contestants 90 | 91 | select sector,count(sector) cnt from [Shark tank project]..data group by sector order by cnt desc 92 | 93 | 94 | --partner deals 95 | 96 | select partners,count(partners) cnt from [Shark tank project]..data where partners!='-' group by partners order by cnt desc 97 | 98 | 99 | -- making the matrix 100 | 101 | 102 | select * from [Shark tank project]..data 103 | 104 | select 'Ashnner' as keyy,count([Ashneer Amount Invested]) from [Shark tank project]..data where [Ashneer Amount Invested] is not null 105 | 106 | 107 | select 'Ashnner' as keyy,count([ashneer amount invested]) from [Shark tank project]..data where [Ashneer Amount Invested] is not null AND [Ashneer Amount Invested]!=0 108 | 109 | 110 | SELECT 'Ashneer' as keyy,SUM(C.[ashneer amount invested]),AVG(C.[Aman Equity Taken %]) 111 | FROM (SELECT * FROM [Shark tank project]..DATA WHERE [Ashneer Equity Taken %]!=0 AND [Ashneer Equity Taken %] IS NOT NULL) C 112 | 113 | select m.keyy,m.total_deals_present,m.total_deals,n.total_amount_invested,n.avg_equity_taken from 114 | 115 | (select a.keyy,a.total_deals_present,b.total_deals from( 116 | 117 | select 'Ashneer' as keyy,count([Ashneer Amount Invested]) total_deals_present from [Shark tank project]..data where [Ashneer Amount Invested] is not null) a 118 | 119 | inner join( 120 | select 'Ashneer' as keyy,count([ashneer amount invested]) total_deals from [Shark tank project]..data 121 | where [Ashneer Amount Invested] is not null AND [Ashneer Amount Invested]!=0)b 122 | 123 | on a.keyy=b.keyy) m 124 | 125 | inner join 126 | 127 | (SELECT 'Ashneer' as keyy,SUM(C.[ashneer amount invested]) total_amount_invested 128 | ,AVG(C.[Aman Equity Taken %]) avg_equity_taken 129 | FROM (SELECT * FROM [Shark tank project]..DATA WHERE [Ashneer Equity Taken %]!=0 AND [Ashneer Equity Taken %] IS NOT NULL) C)n 130 | 131 | on m.keyy=n.keyy 132 | 133 | 134 | -- which is the startup in which the highest amount has been invested in each domain/sector 135 | 136 | 137 | 138 | 139 | select c.* from 140 | (select brand,sector,[amount invested lakhs],rank() over(partition by sector order by [amount invested lakhs] desc) rnk 141 | 142 | from [Shark tank project]..data) c 143 | 144 | where c.rnk=1 145 | --------------------------------------------------------------------------------