├── pythonca2.pptx ├── E-Commerce Analytics.pdf ├── README.md └── code.py /pythonca2.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/surya2905/E-COMMERCE-ANALYTICS-SWIGGY-ZOMATO-BLINKIT/HEAD/pythonca2.pptx -------------------------------------------------------------------------------- /E-Commerce Analytics.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/surya2905/E-COMMERCE-ANALYTICS-SWIGGY-ZOMATO-BLINKIT/HEAD/E-Commerce Analytics.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Unlocking Business Insights with Data Analytics! 2 | 3 | In my latest data analysis project, I explored critical aspects of business operations using Python, Pandas, NumPy, Matplotlib, and Seaborn. Here’s what I covered: 4 | 1. Order Trends & Peak Hours Analysis 5 | 2. Delivery Time Analysis & Delay Impact 6 | 3. Customer Satisfaction & Service Ratings 7 | 4. Revenue Insights & Top-Selling Products 8 | 5. Refund & Complaint Analysis 9 | 10 | From handling missing data to uncovering patterns through heatmaps, boxplots, pair plots, and more — this project reveals how data-driven decisions can optimize performance. 11 | 12 | Tech Stack: 13 | Pandas | NumPy | Matplotlib | Seaborn 14 | 15 | Visuals Include: 16 | Heatmaps | Line Charts | Column Graphs | Bar Charts | Boxplots | Pair Plots | Scatter Plots 17 | -------------------------------------------------------------------------------- /code.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | from scipy.stats import zscore 6 | 7 | # Optional for Jupyter users 8 | # %matplotlib inline 9 | 10 | # 1. Import the dataset (FIXED path issue) 11 | df = pd.read_csv("C:\\Users\\Vijay\Downloads\\archive (1)\\Ecommerce_Delivery_Analytics_New.csv") 12 | 13 | # 2. Get info about the dataset 14 | print("\n--- Dataset Info ---") 15 | df.info() 16 | print("\n--- Summary Statistics ---") 17 | print(df.describe(include='all')) 18 | 19 | # 3. Handling Missing Data 20 | print("\n--- Missing Values ---") 21 | print(df.isnull().sum()) # Check for null values 22 | 23 | # Convert datetime column (FIXED coercion issue) 24 | df['Order Date & Time'] = pd.to_datetime(df['Order Date & Time'], errors='coerce') 25 | df = df.dropna(subset=['Order Date & Time']) # Drop rows with invalid date 26 | 27 | # Objective 1: Order Trends & Peak Hours Analysis 28 | df['Hour'] = df['Order Date & Time'].dt.hour 29 | order_trends = df['Hour'].value_counts().sort_index() 30 | plt.figure(figsize=(10, 6)) 31 | order_trends.plot(kind='line', marker='o') 32 | plt.title('Order Trends by Hour') 33 | plt.xlabel('Hour of Day') 34 | plt.ylabel('Number of Orders') 35 | plt.grid(True) 36 | plt.tight_layout() 37 | plt.show() 38 | 39 | # Objective 2: Delivery Time Analysis & Delay Impact 40 | plt.figure(figsize=(10, 6)) 41 | sns.histplot(df['Delivery Time (Minutes)'], bins=30, kde=True,color="green") 42 | plt.title('Distribution of Delivery Time') 43 | plt.tight_layout() 44 | plt.show() 45 | 46 | plt.figure(figsize=(8, 5)) 47 | sns.boxplot(data=df, x='Delivery Delay', y='Delivery Time (Minutes)',color="red") 48 | plt.title('Delivery Time vs Delay Status') 49 | plt.tight_layout() 50 | plt.show() 51 | 52 | # Objective 3: Customer Satisfaction & Service Ratings 53 | plt.figure(figsize=(8, 6)) 54 | sns.countplot(data=df, x='Service Rating',color="yellow") 55 | plt.title('Service Ratings Distribution') 56 | plt.tight_layout() 57 | plt.show() 58 | 59 | plt.figure(figsize=(10, 6)) 60 | sns.boxplot(data=df, x='Service Rating', y='Order Value (INR)',color="purple") 61 | plt.title('Order Value vs Service Rating') 62 | plt.tight_layout() 63 | plt.show() 64 | 65 | # Objective 4: Revenue Insights & Top-Selling Products 66 | product_revenue = df.groupby('Product Category')['Order Value (INR)'].sum().sort_values(ascending=False) 67 | plt.figure(figsize=(10, 6)) 68 | product_revenue.plot(kind='bar', color='teal') 69 | plt.title('Revenue by Product Category') 70 | plt.ylabel('Total Revenue (INR)') 71 | plt.xticks(rotation=45) 72 | plt.tight_layout() 73 | plt.show() 74 | 75 | # Pie chart 76 | plt.figure(figsize=(8, 8)) 77 | product_revenue.plot(kind='pie', autopct='%1.1f%%') 78 | plt.ylabel('') 79 | plt.title('Revenue Distribution by Product Category') 80 | plt.tight_layout() 81 | plt.show() 82 | 83 | # Objective 5: Refund & Complaint Analysis 84 | refund_counts = df['Refund Requested'].value_counts() 85 | plt.figure(figsize=(6, 6)) 86 | sns.barplot(x=refund_counts.index, y=refund_counts.values,color="orange") 87 | plt.title('Refund Request Distribution') 88 | plt.tight_layout() 89 | plt.show() 90 | 91 | # 6. Heatmap of correlations 92 | corr = df[['Delivery Time (Minutes)', 'Order Value (INR)', 'Service Rating']].corr() 93 | plt.figure(figsize=(8, 6)) 94 | sns.heatmap(corr, annot=True, cmap='coolwarm') 95 | plt.title('Correlation Heatmap') 96 | plt.tight_layout() 97 | plt.show() 98 | 99 | # 7. Outliers - Z-Score 100 | z_scores = np.abs(zscore(df[['Delivery Time (Minutes)', 'Order Value (INR)']])) 101 | outliers = (z_scores > 3) 102 | print("\n--- Number of Outliers (z > 3) ---") 103 | print(outliers.sum()) 104 | 105 | # Boxplot 106 | plt.figure(figsize=(10, 5)) 107 | sns.boxplot(data=df[['Delivery Time (Minutes)', 'Order Value (INR)']]) 108 | plt.title('Boxplot for Delivery Time and Order Value') 109 | plt.tight_layout() 110 | plt.show() 111 | 112 | # Pairplot 113 | sns.pairplot(df[['Delivery Time (Minutes)', 'Order Value (INR)', 'Service Rating']]) 114 | plt.suptitle('Pairplot of Key Variables', y=1.02) 115 | plt.tight_layout() 116 | plt.show() 117 | 118 | # Scatter plot: Delivery Time vs Order Value 119 | plt.figure(figsize=(8, 6)) 120 | sns.scatterplot(data=df, x='Order Value (INR)', y='Delivery Time (Minutes)', hue='Delivery Delay') 121 | plt.title('Delivery Time vs Order Value') 122 | plt.tight_layout() 123 | plt.show() 124 | --------------------------------------------------------------------------------