├── README.md ├── histogram representation.py ├── summary representation.py ├── pairy plot.py ├── NumPy Arrays and Operations.py ├── boxy plot.py ├── Correlation and Covariance.py ├── Outlier Detection using IQR.py ├── heat map representation.py ├── Matplotlib Bar Plot.py └── count representation.py /README.md: -------------------------------------------------------------------------------- 1 | # Data-visualization-with-python -------------------------------------------------------------------------------- /histogram representation.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | from scipy.stats import zscore 6 | 7 | # Load dataset 8 | df = pd.read_excel("C:\\Users\\udayt\\Downloads\\BlinkIT Grocery Data.xlsx") 9 | df.info() 10 | # Data Cleaning 11 | df['Item Weight'] = df['Item Weight'].fillna(df['Item Weight'].mean()) 12 | df['Outlet Size'] = df['Outlet Size'].fillna("Unknown 13 | -------------------------------------------------------------------------------- /summary representation.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | from scipy.stats import zscore 6 | 7 | # Load dataset 8 | df = pd.read_excel("C:\\Users\\udayt\\Downloads\\BlinkIT Grocery Data.xlsx") 9 | df.info() 10 | # Data Cleaning 11 | df['Item Weight'] = df['Item Weight'].fillna(df['Item Weight'].mean()) 12 | df['Outlet Size'] = df['Outlet Size'].fillna("Unknown") 13 | 14 | # 6. Summary Statistics 15 | print("\nSummary Statistics:\n") 16 | print(df[['Item Weight', 'Item Visibility', 'Sales', 'Rating']].describe()) 17 | 18 | -------------------------------------------------------------------------------- /pairy plot.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | from scipy.stats import zscore 6 | 7 | # Load dataset 8 | df = pd.read_excel("C:\\Users\\udayt\\Downloads\\BlinkIT Grocery Data.xlsx") 9 | df.info() 10 | # Data Cleaning 11 | df['Item Weight'] = df['Item Weight'].fillna(df['Item Weight'].mean()) 12 | df['Outlet Size'] = df['Outlet Size'].fillna("Unknown") 13 | 14 | # 4. Pair Plot - Variable Relationships 15 | sns.pairplot(df[['Item Weight', 'Item Visibility', 'Sales', 'Rating']].dropna()) 16 | plt.suptitle("Pairwise Relationships", y=1.02) 17 | plt.show() 18 | -------------------------------------------------------------------------------- /NumPy Arrays and Operations.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | from scipy.stats import zscore 6 | 7 | # Load dataset 8 | df = pd.read_excel("C:\\Users\\udayt\\Downloads\\BlinkIT Grocery Data.xlsx") 9 | df.info() 10 | # Data Cleaning 11 | df['Item Weight'] = df['Item Weight'].fillna(df['Item Weight'].mean()) 12 | df['Outlet Size'] = df['Outlet Size'].fillna("Unknown") 13 | 14 | # Objective 9: NumPy Arrays and Operations 15 | weights = df['Item Weight'].dropna().values 16 | print("Mean Item Weight:", np.mean(weights)) 17 | print("Standard Deviation of Weight:", np.std(weights)) 18 | -------------------------------------------------------------------------------- /boxy plot.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | from scipy.stats import zscore 6 | 7 | # Load dataset 8 | df = pd.read_excel("C:\\Users\\udayt\\Downloads\\BlinkIT Grocery Data.xlsx") 9 | df.info() 10 | # Data Cleaning 11 | df['Item Weight'] = df['Item Weight'].fillna(df['Item Weight'].mean()) 12 | df['Outlet Size'] = df['Outlet Size'].fillna("Unknown") 13 | 14 | #3. Boxplot - Sales by Outlet Type 15 | plt.figure(figsize=(8, 5)) 16 | sns.boxplot(x='Outlet Type', y='Sales', data=df, 17 | hue='Outlet Type', palette='Set2', legend=False) 18 | plt.title("Sales by Outlet Type") 19 | plt.tight_layout() 20 | plt.show() 21 | -------------------------------------------------------------------------------- /Correlation and Covariance.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | from scipy.stats import zscore 6 | 7 | # Load dataset 8 | df = pd.read_excel("C:\\Users\\udayt\\Downloads\\BlinkIT Grocery Data.xlsx") 9 | df.info() 10 | # Data Cleaning 11 | df['Item Weight'] = df['Item Weight'].fillna(df['Item Weight'].mean()) 12 | df['Outlet Size'] = df['Outlet Size'].fillna("Unknown") 13 | 14 | # Objective 7: Correlation and Covariance 15 | print("Correlation Matrix:\n", df[['Item Weight', 'Item Visibility', 'Sales', 'Rating']].corr()) 16 | print("Covariance Matrix:\n", df[['Item Weight', 'Item Visibility', 'Sales', 'Rating']].cov()) 17 | 18 | 19 | -------------------------------------------------------------------------------- /Outlier Detection using IQR.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | from scipy.stats import zscore 6 | 7 | # Load dataset 8 | df = pd.read_excel("C:\\Users\\udayt\\Downloads\\BlinkIT Grocery Data.xlsx") 9 | df.info() 10 | # Data Cleaning 11 | df['Item Weight'] = df['Item Weight'].fillna(df['Item Weight'].mean()) 12 | df['Outlet Size'] = df['Outlet Size'].fillna("Unknown") 13 | 14 | # Objective 8: Outlier Detection using IQR 15 | Q1 = df['Sales'].quantile(0.25) 16 | Q3 = df['Sales'].quantile(0.75) 17 | IQR = Q3 - Q1 18 | outliers = df[(df['Sales'] < Q1 - 1.5 * IQR) | (df['Sales'] > Q3 + 1.5 * IQR)] 19 | print("Number of Sales Outliers:", outliers.shape[0]) 20 | -------------------------------------------------------------------------------- /heat map representation.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | from scipy.stats import zscore 6 | 7 | # Load dataset 8 | df = pd.read_excel("C:\\Users\\udayt\\Downloads\\BlinkIT Grocery Data.xlsx") 9 | df.info() 10 | # Data Cleaning 11 | df['Item Weight'] = df['Item Weight'].fillna(df['Item Weight'].mean()) 12 | df['Outlet Size'] = df['Outlet Size'].fillna("Unknown") 13 | 14 | # 5. Heatmap - Correlation Matrix 15 | plt.figure(figsize=(6, 5)) 16 | sns.heatmap(df[['Item Weight', 'Item Visibility', 'Sales', 'Rating']].corr(), 17 | annot=True, cmap='YlGnBu', linecolor='white', linewidths=1) 18 | plt.title("Correlation Heatmap") 19 | plt.tight_layout() 20 | plt.show() 21 | -------------------------------------------------------------------------------- /Matplotlib Bar Plot.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | from scipy.stats import zscore 6 | 7 | # Load dataset 8 | df = pd.read_excel("C:\\Users\\udayt\\Downloads\\BlinkIT Grocery Data.xlsx") 9 | df.info() 10 | # Data Cleaning 11 | df['Item Weight'] = df['Item Weight'].fillna(df['Item Weight'].mean()) 12 | df['Outlet Size'] = df['Outlet Size'].fillna("Unknown") 13 | 14 | # Objective 10: Matplotlib Bar Plot 15 | sales_by_type = df.groupby('Item Type')['Sales'].sum().sort_values(ascending=False) 16 | top5 = sales_by_type.head() 17 | plt.bar(top5.index, top5.values, color='skyblue') 18 | plt.title("Top 5 Selling Item Types") 19 | plt.ylabel("Total Sales") 20 | plt.xticks(rotation=45) 21 | plt.tight_layout() 22 | plt.show() 23 | -------------------------------------------------------------------------------- /count representation.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | from scipy.stats import zscore 6 | 7 | # Load dataset 8 | df = pd.read_excel("C:\\Users\\udayt\\Downloads\\BlinkIT Grocery Data.xlsx") 9 | df.info() 10 | # Data Cleaning 11 | df['Item Weight'] = df['Item Weight'].fillna(df['Item Weight'].mean()) 12 | df['Outlet Size'] = df['Outlet Size'].fillna("Unknown") 13 | 14 | 15 | # 2. Count Plot - Item Type 16 | plt.figure(figsize=(10, 6)) 17 | sns.countplot(y='Item Type', data=df, order=df['Item Type'].value_counts().index, 18 | hue='Item Type', palette='viridis', legend=False) 19 | plt.title("Count of Each Item Type") 20 | plt.xlabel("Count") 21 | plt.ylabel("Item Type") 22 | plt.tight_layout() 23 | plt.show() 24 | --------------------------------------------------------------------------------