├── README.md └── data_visualization └── datavisualization.py /README.md: -------------------------------------------------------------------------------- 1 | # Amazon_shipment -------------------------------------------------------------------------------- /data_visualization/datavisualization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import seaborn as sb 5 | 6 | df=pd.read_csv("C:/Users/yvsya/OneDrive/Desktop/amazon_product_shipment_dataset.csv") 7 | df 8 | 9 | 10 | print(df.info()) 11 | print(df.describe()) 12 | 13 | df['Mode_of_Shipment'].fillna(df['Mode_of_Shipment'].mode().iloc[0],inplace=True) 14 | df['Cost_of_the_Product'].fillna(df['Cost_of_the_Product'].mean(),inplace=True) 15 | df['Prior_purchases'].fillna(df['Prior_purchases'].mean(),inplace=True) 16 | df['Product_importance'].fillna(df['Product_importance'].mode().iloc[0],inplace=True) 17 | df['Gender'].fillna(df['Gender'].mode().iloc[0],inplace=True) 18 | df['Discount_offered'].fillna(df['Discount_offered'].mean(),inplace=True) 19 | print(df.info()) 20 | 21 | #1. Histograms 22 | plt.figure(figsize=(12, 5)) 23 | sb.histplot(df['Cost_of_the_Product'],bins=30,kde=True, color='skyblue') 24 | plt.title('Cost of the Product') 25 | plt.xlabel('Cost') 26 | plt.ylabel('Frequency') 27 | plt.show() 28 | 29 | #2. Pie chart for shipment mode 30 | shipment_counts = df['Mode_of_Shipment'].value_counts() 31 | plt.figure(figsize=(6, 6)) 32 | plt.pie(shipment_counts, labels=shipment_counts.index, autopct='%1.1f%%', startangle=140) 33 | plt.title('Shipment Mode Distribution') 34 | plt.axis('equal') 35 | plt.show() 36 | 37 | #3. Bar plot of product importance 38 | sb.countplot(data=df, x='Product_importance', palette='Set2') 39 | plt.title('Product Importance Levels') 40 | plt.xlabel('Importance Level') 41 | plt.ylabel('Count') 42 | plt.show() 43 | 44 | #4. Countplot of delivery status by mode of shipment 45 | sb.countplot(data=df, x='Mode_of_Shipment', hue='Reached.on.Time_Y.N', palette='Set1') 46 | plt.title('Delivery Status by Shipment Mode') 47 | plt.xlabel('Mode of Shipment') 48 | plt.ylabel('Count') 49 | plt.legend(title='Reached On Time (1=Yes)') 50 | plt.show() 51 | 52 | #5. Boxplot: weight vs delivery status 53 | sb.boxplot(data=df, x='Reached.on.Time_Y.N', y='Weight_in_gms') 54 | plt.title('Weight of Shipment vs Delivery Status') 55 | plt.xlabel('Reached on Time (1=Yes)') 56 | plt.ylabel('Weight (grams)') 57 | plt.show() 58 | 59 | #6. Histogram for discount 60 | plt.figure(figsize=(8, 4)) 61 | sb.histplot(df['Discount_offered'],bins=30,kde=True, color='orange') 62 | plt.title('Distribution of Discounts Offered') 63 | plt.xlabel('Discount Offered') 64 | plt.ylabel('Frequency') 65 | plt.show() 66 | 67 | #7. Correlation heatmap 68 | plt.figure(figsize=(10, 6)) 69 | sb.heatmap(df.corr(numeric_only=True), annot=True, cmap='coolwarm', linewidths=0.5) 70 | plt.title('Correlation Heatmap') 71 | plt.show() 72 | --------------------------------------------------------------------------------