├── README.md
├── histogram representation.py
├── summary representation.py
├── pairy plot.py
├── NumPy Arrays and Operations.py
├── boxy plot.py
├── Correlation and Covariance.py
├── Outlier Detection using IQR.py
├── heat map representation.py
├── Matplotlib Bar Plot.py
└── count representation.py


/README.md:
--------------------------------------------------------------------------------
1 | # Data-visualization-with-python


--------------------------------------------------------------------------------
/histogram representation.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | from scipy.stats import zscore
 6 | 
 7 | # Load dataset
 8 | df = pd.read_excel("C:\\Users\\udayt\\Downloads\\BlinkIT Grocery Data.xlsx")
 9 | df.info()
10 | # Data Cleaning
11 | df['Item Weight'] = df['Item Weight'].fillna(df['Item Weight'].mean())
12 | df['Outlet Size'] = df['Outlet Size'].fillna("Unknown
13 | 


--------------------------------------------------------------------------------
/summary representation.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | from scipy.stats import zscore
 6 | 
 7 | # Load dataset
 8 | df = pd.read_excel("C:\\Users\\udayt\\Downloads\\BlinkIT Grocery Data.xlsx")
 9 | df.info()
10 | # Data Cleaning
11 | df['Item Weight'] = df['Item Weight'].fillna(df['Item Weight'].mean())
12 | df['Outlet Size'] = df['Outlet Size'].fillna("Unknown")
13 | 
14 | # 6. Summary Statistics
15 | print("\nSummary Statistics:\n")
16 | print(df[['Item Weight', 'Item Visibility', 'Sales', 'Rating']].describe())
17 | 
18 | 


--------------------------------------------------------------------------------
/pairy plot.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | from scipy.stats import zscore
 6 | 
 7 | # Load dataset
 8 | df = pd.read_excel("C:\\Users\\udayt\\Downloads\\BlinkIT Grocery Data.xlsx")
 9 | df.info()
10 | # Data Cleaning
11 | df['Item Weight'] = df['Item Weight'].fillna(df['Item Weight'].mean())
12 | df['Outlet Size'] = df['Outlet Size'].fillna("Unknown")
13 | 
14 | # 4. Pair Plot - Variable Relationships
15 | sns.pairplot(df[['Item Weight', 'Item Visibility', 'Sales', 'Rating']].dropna())
16 | plt.suptitle("Pairwise Relationships", y=1.02)
17 | plt.show()
18 | 


--------------------------------------------------------------------------------
/NumPy Arrays and Operations.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | from scipy.stats import zscore
 6 | 
 7 | # Load dataset
 8 | df = pd.read_excel("C:\\Users\\udayt\\Downloads\\BlinkIT Grocery Data.xlsx")
 9 | df.info()
10 | # Data Cleaning
11 | df['Item Weight'] = df['Item Weight'].fillna(df['Item Weight'].mean())
12 | df['Outlet Size'] = df['Outlet Size'].fillna("Unknown")
13 | 
14 | # Objective 9: NumPy Arrays and Operations
15 | weights = df['Item Weight'].dropna().values
16 | print("Mean Item Weight:", np.mean(weights))
17 | print("Standard Deviation of Weight:", np.std(weights))
18 | 


--------------------------------------------------------------------------------
/boxy plot.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | from scipy.stats import zscore
 6 | 
 7 | # Load dataset
 8 | df = pd.read_excel("C:\\Users\\udayt\\Downloads\\BlinkIT Grocery Data.xlsx")
 9 | df.info()
10 | # Data Cleaning
11 | df['Item Weight'] = df['Item Weight'].fillna(df['Item Weight'].mean())
12 | df['Outlet Size'] = df['Outlet Size'].fillna("Unknown")
13 | 
14 | #3. Boxplot - Sales by Outlet Type
15 | plt.figure(figsize=(8, 5))
16 | sns.boxplot(x='Outlet Type', y='Sales', data=df,
17 |             hue='Outlet Type', palette='Set2', legend=False)
18 | plt.title("Sales by Outlet Type")
19 | plt.tight_layout()
20 | plt.show()
21 | 


--------------------------------------------------------------------------------
/Correlation and Covariance.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | from scipy.stats import zscore
 6 | 
 7 | # Load dataset
 8 | df = pd.read_excel("C:\\Users\\udayt\\Downloads\\BlinkIT Grocery Data.xlsx")
 9 | df.info()
10 | # Data Cleaning
11 | df['Item Weight'] = df['Item Weight'].fillna(df['Item Weight'].mean())
12 | df['Outlet Size'] = df['Outlet Size'].fillna("Unknown")
13 | 
14 | # Objective 7: Correlation and Covariance
15 | print("Correlation Matrix:\n", df[['Item Weight', 'Item Visibility', 'Sales', 'Rating']].corr())
16 | print("Covariance Matrix:\n", df[['Item Weight', 'Item Visibility', 'Sales', 'Rating']].cov())
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/Outlier Detection using IQR.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | from scipy.stats import zscore
 6 | 
 7 | # Load dataset
 8 | df = pd.read_excel("C:\\Users\\udayt\\Downloads\\BlinkIT Grocery Data.xlsx")
 9 | df.info()
10 | # Data Cleaning
11 | df['Item Weight'] = df['Item Weight'].fillna(df['Item Weight'].mean())
12 | df['Outlet Size'] = df['Outlet Size'].fillna("Unknown")
13 | 
14 | # Objective 8: Outlier Detection using IQR
15 | Q1 = df['Sales'].quantile(0.25)
16 | Q3 = df['Sales'].quantile(0.75)
17 | IQR = Q3 - Q1
18 | outliers = df[(df['Sales'] < Q1 - 1.5 * IQR) | (df['Sales'] > Q3 + 1.5 * IQR)]
19 | print("Number of Sales Outliers:", outliers.shape[0])
20 | 


--------------------------------------------------------------------------------
/heat map representation.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | from scipy.stats import zscore
 6 | 
 7 | # Load dataset
 8 | df = pd.read_excel("C:\\Users\\udayt\\Downloads\\BlinkIT Grocery Data.xlsx")
 9 | df.info()
10 | # Data Cleaning
11 | df['Item Weight'] = df['Item Weight'].fillna(df['Item Weight'].mean())
12 | df['Outlet Size'] = df['Outlet Size'].fillna("Unknown")
13 | 
14 | # 5. Heatmap - Correlation Matrix
15 | plt.figure(figsize=(6, 5))
16 | sns.heatmap(df[['Item Weight', 'Item Visibility', 'Sales', 'Rating']].corr(),
17 |             annot=True, cmap='YlGnBu', linecolor='white', linewidths=1)
18 | plt.title("Correlation Heatmap")
19 | plt.tight_layout()
20 | plt.show()
21 | 


--------------------------------------------------------------------------------
/Matplotlib Bar Plot.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | from scipy.stats import zscore
 6 | 
 7 | # Load dataset
 8 | df = pd.read_excel("C:\\Users\\udayt\\Downloads\\BlinkIT Grocery Data.xlsx")
 9 | df.info()
10 | # Data Cleaning
11 | df['Item Weight'] = df['Item Weight'].fillna(df['Item Weight'].mean())
12 | df['Outlet Size'] = df['Outlet Size'].fillna("Unknown")
13 | 
14 | # Objective 10: Matplotlib Bar Plot
15 | sales_by_type = df.groupby('Item Type')['Sales'].sum().sort_values(ascending=False)
16 | top5 = sales_by_type.head()
17 | plt.bar(top5.index, top5.values, color='skyblue')
18 | plt.title("Top 5 Selling Item Types")
19 | plt.ylabel("Total Sales")
20 | plt.xticks(rotation=45)
21 | plt.tight_layout()
22 | plt.show()
23 | 


--------------------------------------------------------------------------------
/count representation.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | from scipy.stats import zscore
 6 | 
 7 | # Load dataset
 8 | df = pd.read_excel("C:\\Users\\udayt\\Downloads\\BlinkIT Grocery Data.xlsx")
 9 | df.info()
10 | # Data Cleaning
11 | df['Item Weight'] = df['Item Weight'].fillna(df['Item Weight'].mean())
12 | df['Outlet Size'] = df['Outlet Size'].fillna("Unknown")
13 | 
14 | 
15 | # 2. Count Plot - Item Type
16 | plt.figure(figsize=(10, 6))
17 | sns.countplot(y='Item Type', data=df, order=df['Item Type'].value_counts().index,
18 |               hue='Item Type', palette='viridis', legend=False)
19 | plt.title("Count of Each Item Type")
20 | plt.xlabel("Count")
21 | plt.ylabel("Item Type")
22 | plt.tight_layout()
23 | plt.show()
24 | 


--------------------------------------------------------------------------------