├── Dealer Wise Particular Part Stock (2).csv └── ca2 project.py /ca2 project.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import seaborn as sn 5 | 6 | df=pd.read_csv("C:/Users/anusr/OneDrive/Desktop/CA 2 Project 12319748/Dealer Wise Particular Part Stock (2).csv") 7 | print(df) 8 | print(df.head()) 9 | 10 | print(df.info()) 11 | 12 | print(df.describe()) 13 | 14 | t = df.isnull().sum() 15 | print("Null Values count:",t) 16 | 17 | print("Total null values:", df.isnull().sum().sum()) 18 | print(df['Zonal Office'].isnull().sum()) 19 | print(df['Area Office'].isnull().sum()) 20 | print(df['Dealer Name'].isnull().sum()) 21 | print(df['Inventory Location'].isnull().sum()) 22 | print(df['Part Number'].isnull().sum()) 23 | print(df['Part Description'].isnull().sum()) 24 | print(df['Stock QTY'].isnull().sum()) 25 | print(df['RATE'].isnull().sum()) 26 | print(df['Amount'].isnull().sum()) 27 | print(df.columns.tolist()) 28 | 29 | 30 | if 'Stock QTY' in df.columns: 31 | print(df['Stock QTY'] == df['Stock QTY'].fillna(0)) 32 | print(df['Stock QTY']) 33 | else: 34 | print(" 'Stock QTY' column not found!") 35 | 36 | print("\nMissing values per column:\n", df.isnull().sum()) 37 | print("\nAny missing values at all?:", df.isnull().values.any()) 38 | df['Zonal Office']=df['Zonal Office'].fillna('Unknown') 39 | print(df['Zonal Office']) 40 | print(df.isnull().sum()) 41 | print(df.isnull().values.any()) 42 | df['Area Office']=df['Area Office'].fillna('Unknown') 43 | print(df['Area Office']) 44 | print(df.isnull().sum()) 45 | print(df.isnull().values.any()) 46 | df['Dealer Name']=df['Dealer Name'].fillna('Unknown') 47 | print(df['Dealer Name']) 48 | print(df.isnull().sum()) 49 | print(df.isnull().values.any()) 50 | df['Inventory Location']=df['Inventory Location'].fillna('Unknown') 51 | print(df['Inventory Location']) 52 | print(df.isnull().sum()) 53 | print(df.isnull().values.any()) 54 | df['Part Number']=df['Part Number'].fillna(0) 55 | print(df['Part Number']) 56 | print(df.isnull().sum()) 57 | print(df.isnull().values.any()) 58 | df['Part Description']=df['Part Description'].fillna('Unknown') 59 | print(df['Part Description']) 60 | print(df.isnull().sum()) 61 | print(df.isnull().values.any()) 62 | df['RATE']=df['RATE'].fillna(0) 63 | print(df['RATE']) 64 | print(df.isnull().sum()) 65 | print(df.isnull().values.any()) 66 | df['Amount']=df['Amount'].fillna(0) 67 | print(df['Amount']) 68 | print(df.isnull().values.any()) 69 | print(df.isnull().sum()) 70 | print(df.isnull().values.any()) 71 | #Objective 1:Visualize the top 10 parts by their inventory value (sum of "Amount") in a horizontal bar chart. 72 | top_parts = df.groupby("Part Number", as_index=False)["Amount"].sum() 73 | top_parts = top_parts.nlargest(10, "Amount").sort_values("Amount") 74 | plt.figure(figsize=(7, 4)) 75 | sn.barplot(data=top_parts, x="Part Number", y="Amount", color='darkgreen') 76 | plt.title("Top 10 Part Numbers by Inventory Value") 77 | plt.xlabel("Part Number") 78 | plt.ylabel("Inventory Value (INR)") 79 | plt.xticks(rotation=45) 80 | plt.tight_layout() 81 | plt.show() 82 | #Objective 2:To identify which zones (represented by the top 10 dealers) hold more expensive inventory by analyzing the inventory value using a box plot. 83 | top_dealers = df.groupby('Dealer Name')['Amount'].sum().nlargest(10).index 84 | df_top = df[df['Dealer Name'].isin(top_dealers)] 85 | plt.figure(figsize=(7, 5)) 86 | sn.boxplot(data=df_top, x='Dealer Name', y='Amount', palette='husl',legend=False,hue='Dealer Name') 87 | plt.title('Distribution of Inventory Value (Amount) for Top 10 Dealers') 88 | plt.xlabel('Dealer Name') 89 | plt.ylabel('Inventory Value (Amount)') 90 | plt.xticks(rotation=45) 91 | plt.tight_layout() 92 | plt.show() 93 | #Objective 3:To visualize and analyze the unit rates of the top 20 most frequently occurring part descriptions in the dataset using a line plot. 94 | plt.figure(figsize=(7, 5)) 95 | top_parts = df["Part Description"].value_counts().head(20).index 96 | filtered_df = df[df["Part Description"].isin(top_parts)] 97 | sn.lineplot(data=filtered_df, x="Part Description", y="RATE", marker="*", linewidth=2) 98 | plt.xticks(rotation=90) 99 | plt.title("Rate by Part Description") 100 | plt.xlabel("Part Description") 101 | plt.ylabel("Rate (Unit Price)") 102 | plt.tight_layout() 103 | plt.grid(True) 104 | plt.show() 105 | #Objective 4:To analyze the relationship between Stock Quantity and Rate of parts across dealers by visualizing their correlation using a heatmap 106 | corr_matrix = df[["Stock QTY", "RATE", ]].corr() 107 | plt.figure(figsize=(6, 5)) 108 | sn.heatmap(corr_matrix, annot=True, cmap="coolwarm", fmt=".2f") 109 | plt.title("Correlation Heatmap: Stock QTY, RATE") 110 | plt.show() 111 | #Objective 5:To visualize the distribution and frequency of stock quantities across all parts using a histogram. 112 | 113 | plt.figure(figsize=(6, 5)) 114 | plt.hist(data=df, x="Stock QTY", bins=30, color="black") 115 | plt.title("Histogram of Stock Quantity") 116 | plt.show() 117 | 118 | --------------------------------------------------------------------------------