├── README.md ├── code_file_python_project.docx ├── obj2.py ├── obj5.py ├── obj9.py ├── obj3.py ├── obj6.py ├── obj7.py ├── obj4.py ├── obj1.py └── obj8.py /README.md: -------------------------------------------------------------------------------- 1 | this project is based on retail data analasys using python 2 | -------------------------------------------------------------------------------- /code_file_python_project.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Prabhas121/retail-project/HEAD/code_file_python_project.docx -------------------------------------------------------------------------------- /obj2.py: -------------------------------------------------------------------------------- 1 | Quantity Distribution 2 | 3 | plt.figure(figsize=(8, 5)) 4 | sns.histplot(df_cleaned["Quantity"], bins=30, kde=True, color="green") 5 | plt.title("Distribution of Quantity") 6 | plt.xlabel("Quantity") plt.ylabel("Frequency") plt.show() 7 | -------------------------------------------------------------------------------- /obj5.py: -------------------------------------------------------------------------------- 1 | # 5. Quantity vs Unit Price plt.figure(figsize=(8, 5)) 2 | sns.scatterplot(x=df_cleaned["Quantity"], y=df_cleaned["UnitPrice"], color='blue', alpha=0.5) 3 | plt.title("Quantity vs Unit Price") plt.xlabel("Quantity") plt.ylabel("Unit Price") plt.show() 4 | -------------------------------------------------------------------------------- /obj9.py: -------------------------------------------------------------------------------- 1 | # 9. Heatmap of Correlation Between Numerical Features numerical_df = df_cleaned.select_dtypes(include=["number"]) correlation_matrix = numerical_df.corr() plt.figure(figsize=(12, 6)) 2 | sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="coolwarm", linewidths=0.5) plt.title("Correlation Heatmap of Numerical Features") plt.show() 3 | -------------------------------------------------------------------------------- /obj3.py: -------------------------------------------------------------------------------- 1 | 2 | # 3. Proportion of Orders by Country 3 | country_orders = df_cleaned["Country"].value_counts() plt.figure(figsize=(10, 5)) sns.barplot(x=country_orders.index, y=country_orders.values, palette="Set2") 4 | plt.title("Proportion of Orders by Country") plt.xlabel("Country") 5 | plt.ylabel("Number of Orders") plt.xticks(rotation=90) plt.show() 6 | -------------------------------------------------------------------------------- /obj6.py: -------------------------------------------------------------------------------- 1 | # 6. Top 10 Most Purchased Products by Quantity top_products = 2 | df_cleaned.groupby("Description")["Quantity"].sum().nlargest(10) top_products.plot(kind="bar", figsize=(12, 6), color="coral") plt.title("Top 10 Most Purchased Products by Quantity") plt.xlabel("Product Description") 3 | plt.ylabel("Total Quantity Purchased") plt.xticks(rotation=45) 4 | plt.show() 5 | 6 | -------------------------------------------------------------------------------- /obj7.py: -------------------------------------------------------------------------------- 1 | # 7. Count of Orders by CustomerID 2 | customer_orders = df_cleaned["CustomerID"].value_counts().head(10) plt.figure(figsize=(12, 6)) 3 | sns.barplot(x=customer_orders.index, y=customer_orders.values, palette="Blues") 4 | plt.title("Top 10 Customers by Number of Orders") plt.xlabel("CustomerID") 5 | plt.ylabel("Number of Orders") plt.xticks(rotation=90) 6 | 7 | plt.show() 8 | 9 | -------------------------------------------------------------------------------- /obj4.py: -------------------------------------------------------------------------------- 1 | # 4. Average UnitPrice by Country avg_price_country = 2 | df_cleaned.groupby("Country")["UnitPrice"].mean().sort_values(ascendin g=False) 3 | plt.figure(figsize=(12, 6)) 4 | sns.barplot(x=avg_price_country.index, y=avg_price_country.values, palette="viridis") 5 | plt.title("Average UnitPrice by Country") plt.xlabel("Country") plt.ylabel("Average Unit Price") plt.xticks(rotation=90) 6 | plt.show() 7 | -------------------------------------------------------------------------------- /obj1.py: -------------------------------------------------------------------------------- 1 | CustomerID Completeness 2 | 3 | plt.figure(figsize=(8, 5)) 4 | missing_customer = df["CustomerID"].isnull().sum() 5 | available_customer = df["CustomerID"].notnull().sum() 6 | plt.bar(["Available Data", "Missing Data"], [available_customer, missing_customer], color=["lightgreen", "lightblue"]) 7 | plt.title("Availability of CustomerID Data") 8 | 9 | plt.ylabel("Number of Entries") plt.show() 10 | -------------------------------------------------------------------------------- /obj8.py: -------------------------------------------------------------------------------- 1 | # 8. Sales by Month 2 | df_cleaned['InvoiceDate'] = pd.to_datetime(df_cleaned['InvoiceDate'], format='%d-%m-%Y %H:%M') 3 | df_cleaned['Month'] = df_cleaned['InvoiceDate'].dt.month monthly_sales = df_cleaned.groupby("Month")["UnitPrice"].sum() plt.figure(figsize=(8, 5)) 4 | monthly_sales.plot(kind="bar", color="teal") plt.title("Sales by Month") plt.xlabel("Month") 5 | plt.ylabel("Total Sales") plt.xticks(rotation=0) plt.show() 6 | --------------------------------------------------------------------------------