├── Candy_Distubutor_analysis.py └── README.md /Candy_Distubutor_analysis.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | 6 | df = pd.read_csv("C:\\Users\\hp\\OneDrive\\Desktop\\hari excel\\Candy_Sales.csv") 7 | print(df) 8 | print(df.head(10)) 9 | print(df.tail(10)) 10 | print(df.info()) 11 | print(df.describe()) 12 | #1. Line Chart – Sales Over Time 13 | df['Month'] = df['Order Date'].dt.to_period('M') 14 | sales_by_month = df.groupby('Month')['Sales'].sum().reset_index() 15 | plt.figure(figsize=(12, 6)) 16 | plt.plot(sales_by_month['Month'].astype(str), sales_by_month['Sales'], color='g', marker='d', linewidth=2) 17 | plt.title("Sales Over Time (Monthly)") 18 | plt.xlabel("Month") 19 | plt.ylabel("Total Sales") 20 | plt.xticks(rotation=90) 21 | plt.grid(True) 22 | #2.Bar Chart – Total Sales by Region 23 | sales_by_region = df.groupby('Region')['Sales'].sum().sort_values(ascending=False) 24 | plt.figure(figsize=(10, 6)) 25 | sns.barplot(x=sales_by_region.values, y=sales_by_region.index, palette="viridis") 26 | plt.title("Total Sales by Region") 27 | plt.xlabel("Sales") 28 | plt.ylabel("Region") 29 | #3.Column Chart – Top 10 States by Units Sold 30 | units_by_state = df.groupby('State/Province')['Units'].sum().sort_values(ascending=False).head(10) 31 | plt.figure(figsize=(10, 6)) 32 | sns.barplot(x=units_by_state.values, y=units_by_state.index, palette="magma") 33 | plt.title("Top 10 States by Units Sold") 34 | plt.xlabel("Units Sold") 35 | plt.ylabel("State") 36 | #4.Scatter Plot – Sales vs Gross Profit 37 | plt.figure(figsize=(8, 6)) 38 | sns.scatterplot(data=df, x='Sales', y='Gross Profit', color="r") 39 | plt.title("Sales vs Gross Profit") 40 | plt.xlabel("Sales") 41 | plt.ylabel("Gross Profit") 42 | plt.grid(True) 43 | #5.Boxplot – Outliers Detection 44 | plt.figure(figsize=(8, 6)) 45 | sns.boxplot(data=df, y='Sales', color='skyblue') 46 | plt.title("Boxplot - Sales") 47 | #6.Heatmap – Correlation Matrix 48 | corr = df[['Sales', 'Units', 'Gross Profit', 'Cost']].corr() 49 | plt.figure(figsize=(8, 6)) 50 | sns.heatmap(corr, annot=True, cmap='Blues', fmt='.2f', linewidths=0.5) 51 | plt.title("Correlation Heatmap") 52 | #7.Pair Plot – Numerical Feature Relationships 53 | sns.pairplot(df[['Sales', 'Units', 'Gross Profit', 'Cost']], kind='scatter', diag_kind='hist', corner=True) 54 | plt.suptitle("Pair Plot of Numerical Features", y=1.02) 55 | 56 | plt.show() 57 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # python_project --------------------------------------------------------------------------------