├── File update2 └── README.md /File update2: -------------------------------------------------------------------------------- 1 | # Demand Forecasting for E-commerce and Retail 2 | 3 | import pandas as pd 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import seaborn as sns 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.ensemble import RandomForestRegressor 9 | from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score 10 | import xgboost as xgb 11 | from statsmodels.tsa.holtwinters import ExponentialSmoothing 12 | 13 | # Load dataset 14 | data = pd.read_csv('ecommerce_sales_data.csv', parse_dates=['date']) 15 | data = data.sort_values('date') 16 | 17 | # Exploratory Data Analysis 18 | print(data.head()) 19 | print(data.describe()) 20 | 21 | # Plot sales over time 22 | plt.figure(figsize=(12, 6)) 23 | plt.plot(data['date'], data['sales'], label='Sales') 24 | plt.xlabel('Date') 25 | plt.ylabel('Sales') 26 | plt.title('Sales Over Time') 27 | plt.legend() 28 | plt.show() 29 | 30 | # Feature engineering 31 | data['dayofweek'] = data['date'].dt.dayofweek 32 | data['month'] = data['date'].dt.month 33 | data['year'] = data['date'].dt.year 34 | data['lag_1'] = data['sales'].shift(1) 35 | data['rolling_mean_7'] = data['sales'].rolling(window=7).mean() 36 | data = data.dropna() 37 | 38 | # Train/test split 39 | X = data[['dayofweek', 'month', 'year', 'lag_1', 'rolling_mean_7']] 40 | y = data['sales'] 41 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False) 42 | 43 | # Model 1: Random Forest 44 | rf = RandomForestRegressor(n_estimators=100, random_state=42) 45 | rf.fit(X_train, y_train) 46 | y_pred_rf = rf.predict(X_test) 47 | 48 | # Model 2: XGBoost 49 | xgbr = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100) 50 | xgbr.fit(X_train, y_train) 51 | y_pred_xgb = xgbr.predict(X_test) 52 | 53 | # Model 3: Exponential Smoothing 54 | es_model = ExponentialSmoothing(data['sales'], trend='add', seasonal='add', seasonal_periods=12) 55 | es_fit = es_model.fit() 56 | y_pred_es = es_fit.fittedvalues[-len(y_test):] 57 | 58 | # Evaluation 59 | print("Random Forest R2:", r2_score(y_test, y_pred_rf)) 60 | print("XGBoost R2:", r2_score(y_test, y_pred_xgb)) 61 | print("Exp Smoothing R2:", r2_score(y_test, y_pred_es)) 62 | 63 | # Plot Predictions 64 | plt.figure(figsize=(14, 6)) 65 | plt.plot(data['date'][-len(y_test):], y_test, label='Actual') 66 | plt.plot(data['date'][-len(y_test):], y_pred_rf, label='Random Forest') 67 | plt.plot(data['date'][-len(y_test):], y_pred_xgb, label='XGBoost') 68 | plt.plot(data['date'][-len(y_test):], y_pred_es, label='Exponential Smoothing') 69 | plt.legend() 70 | plt.title('Demand Forecasting Predictions') 71 | plt.xlabel('Date') 72 | plt.ylabel('Sales') 73 | plt.grid(True) 74 | plt.show() 75 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 📈 Demand Forecasting for E-commerce and Retail 2 | 3 | This project aims to build accurate and scalable demand forecasting models for e-commerce and retail businesses. By leveraging historical sales data, product metadata, and external factors, the models help predict future product demand to optimize inventory management, supply chain operations, and marketing strategies. 4 | 🚀 Project Objectives 5 | 6 | Forecast daily/weekly/monthly demand at product or category level. 7 | 8 | Reduce overstock and stockouts through predictive analytics. 9 | 10 | Support real-time and batch processing of demand forecasts. 11 | 12 | Provide visual insights for business decision-making. 13 | 14 | 📦 Key Features 15 | 16 | 🧠 Machine Learning Models (e.g., XGBoost, LightGBM, Random Forest) 17 | 18 | 📊 Time Series Forecasting (ARIMA, Prophet, LSTM, etc.) 19 | 20 | 🗃️ Feature Engineering (calendar events, lag variables, promotional flags) 21 | 22 | 📈 Model Evaluation (MAPE, RMSE, SMAPE) 23 | 24 | 🌐 Interactive Dashboards (optional via Streamlit or PowerBI) 25 | 26 | ⏱️ Hyperparameter tuning for model optimization 27 | 28 | 🗂️ Project Structure 29 | 30 | Demand-Forecasting-for-E-commerce-and-Retail/ 31 | │ 32 | ├── data/ # Raw and processed datasets 33 | ├── notebooks/ # Jupyter notebooks for exploration and modeling 34 | ├── src/ # Core source code (data processing, modeling) 35 | │ ├── features.py 36 | │ ├── model.py 37 | │ └── utils.py 38 | ├── models/ # Trained model artifacts 39 | ├── reports/ # Output reports, visualizations 40 | ├── app/ # Optional Streamlit app 41 | ├── requirements.txt # Python dependencies 42 | └── README.md # Project documentation 43 | 44 | 📊 Example Use Cases 45 | 46 | Forecasting daily demand for SKUs in an online retail store. 47 | 48 | Predicting weekly sales across regions for a supermarket chain. 49 | 50 | Planning stock levels for promotional campaigns (e.g., Black Friday). 51 | 52 | 📈 Sample Workflow 53 | 54 | Data Collection: Load historical sales and product data. 55 | 56 | Preprocessing: Clean, impute, and transform data. 57 | 58 | Feature Engineering: Create lag features, rolling averages, etc. 59 | 60 | Model Training: Train models and evaluate performance. 61 | 62 | Forecasting: Generate future demand predictions. 63 | 64 | Visualization: Analyze forecasts via plots or dashboards. 65 | 66 | 🧪 Requirements 67 | 68 | Install dependencies using: 69 | 70 | pip install -r requirements.txt 71 | 72 | ⚙️ Technologies Used 73 | 74 | Python (Pandas, NumPy, Scikit-learn) 75 | 76 | XGBoost, LightGBM, Prophet, LSTM 77 | 78 | Matplotlib, Seaborn, Plotly 79 | 80 | Streamlit (optional for web UI) 81 | 82 | 📚 References 83 | 84 | Kaggle Retail Forecasting Competitions 85 | 86 | Facebook Prophet Documentation 87 | 88 | Demand forecasting research papers and case studies 89 | 90 | 📝 License 91 | 92 | This project is licensed under the MIT License. 93 | --------------------------------------------------------------------------------