├── logs └── logs.log ├── main ├── __init__.py ├── logger.py ├── main.py └── safety_stock_utils.py ├── requirements.txt └── README.md /logs/logs.log: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /main/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==2.2.5 2 | pandas==2.2.3 3 | python-dateutil==2.9.0.post0 4 | python-dotenv==1.1.0 5 | pytz==2025.2 6 | scipy==1.15.2 7 | six==1.17.0 8 | tzdata==2025.2 -------------------------------------------------------------------------------- /main/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | from logging.handlers import RotatingFileHandler 4 | 5 | 6 | 7 | 8 | #logger = logging.getLogger(__name__) 9 | logger = logging.getLogger(name="safetystock") 10 | 11 | 12 | 13 | fileHandler = RotatingFileHandler(os.path.abspath("./logs/logs.log"), backupCount=50, maxBytes=5000000) 14 | 15 | 16 | fmt = logging.Formatter( 17 | "%(name)s: %(asctime)s | %(levelname)s | %(filename)s%(lineno)s | %(process)d | >>> %(message)s" 18 | ) 19 | 20 | fileHandler.setFormatter(fmt) 21 | 22 | logger.addHandler(fileHandler) 23 | 24 | logger.setLevel(logging.INFO) 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Using Gaussian Kernel Density Estimation to Simulate Safety Stock 2 | 3 | ![](https://i.imgur.com/FaWOaYJ.png) 4 | 5 |

Description

6 | 7 | The typical formula for safety stock is given as $= Z_\sigma \sigma_d \sqrt{L + R}$ 8 | 9 | where $= Z_\sigma$ is the inverse CDF, $\sigma_d$ is the standard deviation of demand, $L + R$ i sthe Lead time and Review period 10 | 11 | some notable limitations when applying this formulation to real-world situations are as follows 12 | 1. demand is not normally distributed and mostly discrete 13 | 2. demand is historical and not forward looking 14 | 3. the $= Z_\sigma$ is dependent on picking an arbitrary service level, but may not be optimal 15 | 16 | In this, project, the following modifications were made to the above formulation to minimize the limitations when applying to real-world scenario 17 | 1. model demand using a custom gaussian kernel density estimation and transform the resulting distribution to a discrete distribution 18 | 2. use forecasted demand if available 19 | 3. run simulations over different service levels say 70% to 99% and different review periods say 1 week to 4 weeks 20 | 4. optional: use linear programming to pick the safety stock setting that minimizes inventory holding cost 21 | 22 | 23 |

Programming Language

24 | 25 | - Python 26 | 27 |

Environment Used

28 | 29 | - Ubuntu 30 | 31 |

To reproduce:

32 | 33 |

34 | 35 | 1. Clone the project: Run this from the command line 36 | 37 | ```commandline 38 | git clone https://github.com/graphshade/safety_stock_simulation.git 39 | ``` 40 | 41 | 2. Change directory to safety_stock_simulation, create an virtual environment and install the dependencies. 42 | 43 | ```commandline 44 | pip install -r requirements.txt 45 | ``` 46 | 47 | 3. update the main.py file to input needed data and run the main.py file 48 | ```commandline 49 | python main.py 50 | ``` 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /main/main.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | import sys 3 | import pandas as pd 4 | 5 | import safety_stock_utils 6 | from logger import logger 7 | 8 | if __name__ == "__main__": 9 | 10 | #load environment variables if any 11 | load_dotenv() 12 | 13 | #data prep : get consumption input data 14 | try: 15 | # data preparation code goes here (dummy data) 16 | df = pd.DataFrame([{"itemID":["item1", "item2"], 17 | "qty":[234,479]}]) 18 | pass 19 | 20 | except Exception as error: 21 | logger.critical(error, stack_info=True) 22 | 23 | ### Safety stock run 24 | try: 25 | #Fit custom distributions 26 | #Loop over each unique item and fit custom distribution and save the result as a 27 | # list of dictinaries of unique items and fitted demand distribution with probability mass function 28 | kde_li = [] 29 | for item in [df["itemID"].unique()]: 30 | try: 31 | d_x, d_pmf = safety_stock_utils.fit_discrete_kde(df[df['itemID']==item]['qty']) 32 | kde_li.append({"itemID":item, "d_x":d_x,"d_pmf":d_pmf}) 33 | #for itemID where there is lower subspace in data in order to fit custom distribution,skip 34 | except Exception as error: 35 | pass 36 | #generate dataframe from results and merge to original dataset 37 | df_kde = df.merge(pd.DataFrame(kde_li), 38 | on="itemID", 39 | how="left") 40 | #remove instances of 'd_x' being NaN 41 | df_kde = df_kde[~df_kde['d_x'].isna()] 42 | 43 | logger.info("custom fitting successful") 44 | 45 | ### Run Simulation 46 | df_kde['result'] = df_kde.apply(lambda x: safety_stock_utils.simulate_safety_custom(d_x=x['d_x'], 47 | d_pmf=x['d_pmf'], 48 | pu_price=x['price'], 49 | R=x['R_value'], 50 | L=x['lt_wk'], 51 | alpha=x['SL_value'], 52 | time=100000), 53 | axis=1) 54 | df_kde[["t_alpha", "sl_alpha", "sl_period", "ss", "ss_value"]] = pd.DataFrame(df_kde['result'].tolist(),index=df_kde.index) 55 | df_kde.drop(columns=['result', 'd_x','d_pmf'],inplace=True) 56 | df_kde.to_csv(f"./results/outputs_kde/ss_out_.csv", index=False) 57 | 58 | logger.info("simulation completed") #must delete 59 | except Exception as error: 60 | logger.critical(error, stack_info=True) 61 | sys.exit(1) 62 | -------------------------------------------------------------------------------- /main/safety_stock_utils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | from scipy import stats 5 | 6 | from datetime import datetime 7 | import glob 8 | pd.options.mode.chained_assignment = None 9 | 10 | 11 | def fit_discrete_kde(d_x, bw_method='scott', cut=3, support_min=0): 12 | """ 13 | Fit Gaussian KDE to data d_x, evaluate on integer grid, and return a discrete PMF. 14 | Probability mass below `support_min` is merged into the first valid bin. 15 | """ 16 | d_x = np.asarray(d_x, dtype=float) 17 | 18 | # KDE 19 | kde = stats.gaussian_kde(d_x, bw_method=bw_method) 20 | 21 | # Bandwidth and range (inclusive) 22 | bandwidth = kde.factor * np.std(d_x, ddof=1) 23 | lower = int(np.floor(d_x.min() - cut * bandwidth)) 24 | upper = int(np.ceil(d_x.max() + cut * bandwidth)) 25 | if upper < lower: 26 | lower, upper = upper, lower 27 | x = np.arange(lower, upper + 1, dtype=int) 28 | 29 | # Evaluate and normalize to PMF (Δx = 1) 30 | pdf_vals = kde.pdf(x.astype(float)) 31 | total = pdf_vals.sum() 32 | if total == 0: 33 | # Degenerate fallback: put all mass at nearest integer to mean 34 | xi = int(np.rint(d_x.mean())) 35 | return np.array([xi], dtype=int), np.array([1.0], dtype=float) 36 | pmf = pdf_vals / total 37 | 38 | # Truncate below support_min and merge spill into first valid bin 39 | if support_min is not None: 40 | mask = x >= support_min 41 | if not mask.any(): 42 | # Everything is below support_min → put all mass at support_min 43 | return np.array([int(support_min)], dtype=int), np.array([1.0], dtype=float) 44 | 45 | spill = pmf[~mask].sum() 46 | first_idx = np.where(mask)[0][0] # index in original arrays 47 | pmf[first_idx] += spill # add to original pmf 48 | # Now actually truncate arrays 49 | pmf = pmf[mask] 50 | x = x[mask] 51 | 52 | # Final renormalization (protect against FP drift) 53 | pmf = pmf / pmf.sum() 54 | 55 | return x.astype(int), pmf 56 | 57 | ### function to get distribution attributes 58 | 59 | def attributes(pmf,x): 60 | """ 61 | Takes discrete demand series and corresponding probability mass function and returns discrete distribution attributes 62 | 63 | Parameters: 64 | pmf (numpy array): corresponding probability mass function 65 | x (numpy array): discrete demand 66 | 67 | Returns: 68 | tuple: tuple of (mu: expected mean of the distribution, std: standard deviation of the distribution) 69 | """ 70 | mu = sum(pmf*x) 71 | var = sum(((x-mu)**2)*pmf) 72 | std = np.sqrt(var) 73 | return mu, std 74 | 75 | 76 | ### Safety Stock Over Risk Period (L + R): Fitting Gaussian Kernel Density, simulation 77 | def simulate_safety_custom( 78 | d_x, d_pmf, L=4, R=1, alpha=0.95, time=200, pu_price=None, holding_rate=1, seed=111 79 | ): 80 | """ 81 | Simulate inventory performance under stochastic demand using an empirical 82 | safety stock estimate derived from a discrete demand distribution. 83 | 84 | Parameters 85 | ---------- 86 | d_x : array-like 87 | Discrete demand values (support of the demand distribution). 88 | d_pmf : array-like 89 | Probability mass function corresponding to `d_x`. 90 | L : int, default=4 91 | Lead time (in periods) for replenishment. 92 | R : int, default=1 93 | Review period (in periods) between orders. 94 | alpha : float, default=0.95 95 | Service level target (quantile for empirical safety stock computation). 96 | time : int, default=200 97 | Number of simulation periods. 98 | pu_price : float, optional 99 | Unit purchase price used to estimate the holding cost value of safety stock. 100 | holding_rate : float, default=1 101 | Holding cost rate applied to the safety stock value. 102 | seed : int, default=111 103 | Random seed for reproducibility. 104 | 105 | Returns 106 | ------- 107 | tuple 108 | (target_service_level, simulated_cycle_service_level, 109 | simulated_period_service_level, safety_stock_units, safety_stock_value) 110 | 111 | where: 112 | - target_service_level : float — target α service level (in percentage). 113 | - simulated_cycle_service_level : float — achieved service level per cycle. 114 | - simulated_period_service_level : float — achieved service level per period. 115 | - safety_stock_units : int — computed empirical safety stock quantity. 116 | - safety_stock_value : float — cost value of safety stock. 117 | 118 | Notes 119 | ----- 120 | - The empirical safety stock is estimated as the α-quantile of the total demand 121 | over the combined lead and review period minus its mean. 122 | - The simulation tracks on-hand and in-transit inventories and identifies 123 | stockout events across periods and cycles. 124 | - A DataFrame of inventory trajectories is generated internally but not returned. 125 | 126 | """ 127 | if seed: 128 | np.random.seed(seed) 129 | 130 | # --- Demand distribution attributes --- 131 | d_mu, d_std = attributes(d_pmf, d_x) 132 | # print(f"mean: {d_mu} and std: {d_std}") 133 | d = np.random.choice(d_x, size=time, p=d_pmf) 134 | # print(f"demand: {d}") 135 | # --- Empirical safety stock based on KDE quantile --- 136 | period_demand_samples = np.random.choice(d_x, size=(1000, L+R), p=d_pmf) 137 | # print(f"sample: {period_demand_samples}") 138 | total_demand_LR = np.sum(period_demand_samples, axis=1) 139 | ss_empirical = np.quantile(total_demand_LR, alpha) - np.mean(total_demand_LR) 140 | ss_empirical = max(0.0, ss_empirical) 141 | Ss = np.round(ss_empirical).astype(int) 142 | # print(f"total_demand: {total_demand_LR}, ss_empirical: {ss_empirical}, ss: {Ss}") 143 | 144 | # --- Stock components --- 145 | Cs = 0.5 * d_mu * R 146 | Is = d_mu * L 147 | S = Ss + 2*Cs + Is 148 | 149 | # --- Cost impact --- 150 | S_value = round(pu_price * holding_rate * Ss,4) if pu_price else 0.0000 151 | 152 | # --- Inventory simulation --- 153 | hand = np.zeros(time) 154 | transit = np.zeros((time, L+1)) 155 | stockout_period = np.full(time, False, dtype=bool) 156 | stockout_cycle = [] 157 | 158 | hand[0] = S - d[0] 159 | transit[0, -1] = d[0] 160 | 161 | for t in range(1, time): 162 | if transit[t-1, 0] > 0: 163 | stockout_cycle.append(stockout_period[t-1]) 164 | hand[t] = hand[t-1] - d[t] + transit[t-1, 0] 165 | stockout_period[t] = hand[t] < 0 166 | hand[t] = max(0, hand[t]) 167 | transit[t, :-1] = transit[t-1, 1:] 168 | if t % R == 0: 169 | net = hand[t] + transit[t].sum() 170 | transit[t, L] = S - net 171 | 172 | df = pd.DataFrame({'Demand': d, 'On-hand': hand, 'In-transit': list(transit)}) 173 | df = df.iloc[L+R:, :] 174 | 175 | # SL_cycle = round((1 - np.mean(stockout_cycle)) * 100, 1) 176 | # SL_period = round((1 - np.mean(stockout_period)) * 100, 1) 177 | SL_cycle = round((1 - np.mean(stockout_cycle)), 4) 178 | SL_period = round((1 - np.mean(stockout_period)), 4) 179 | 180 | return round(alpha * 100, 1), SL_cycle, SL_period, Ss, S_value 181 | 182 | 183 | def simulate_safety_custom_norm( 184 | d_mu, d_std, L=4, R=1, alpha=0.95, time=200, pu_price=None, holding_rate=1, seed=111 185 | ): 186 | """ 187 | Simulate inventory performance using a normal demand distribution and 188 | compute safety stock based on the normal quantile method. 189 | 190 | Parameters 191 | ---------- 192 | d_mu : float 193 | Mean of the normal demand distribution. 194 | d_std : float 195 | Standard deviation of the normal demand distribution. 196 | L : int, default=4 197 | Lead time (in periods) for replenishment. 198 | R : int, default=1 199 | Review period (in periods) between orders. 200 | alpha : float, default=0.95 201 | Target service level (used for safety stock quantile). 202 | time : int, default=200 203 | Number of simulation periods. 204 | pu_price : float, optional 205 | Unit price for estimating holding cost value of safety stock. 206 | holding_rate : float, default=1 207 | Holding cost rate applied to the safety stock. 208 | seed : int, default=111 209 | Random seed for reproducibility. 210 | 211 | Returns 212 | ------- 213 | tuple 214 | (target_service_level, simulated_cycle_service_level, 215 | simulated_period_service_level, safety_stock_units, safety_stock_value) 216 | 217 | where: 218 | - target_service_level : float — target α service level (in percentage). 219 | - simulated_cycle_service_level : float — achieved service level per cycle. 220 | - simulated_period_service_level : float — achieved service level per period. 221 | - safety_stock_units : int — computed safety stock quantity. 222 | - safety_stock_value : float — estimated cost of safety stock. 223 | 224 | Notes 225 | ----- 226 | - Demand is generated from a normal distribution truncated at zero. 227 | - Safety stock is computed as z * σ√(L+R), where z is the α-quantile. 228 | - The simulation tracks on-hand, in-transit inventory, and stockouts. 229 | """ 230 | if seed: 231 | np.random.seed(seed) 232 | 233 | d = np.maximum(np.random.normal(d_mu,d_std,time).round(0).astype(int),0) 234 | z = stats.norm.ppf(alpha) 235 | if z < 0: 236 | z = 0.0 237 | x_std = np.sqrt(L+R)*d_std 238 | Ss = np.round(x_std*z).astype(int) 239 | Ss = max(0.0, Ss) 240 | Cs = 1/2 * d_mu * R 241 | Is = d_mu * L 242 | S = Ss + 2*Cs + Is 243 | 244 | # --- Cost impact --- 245 | S_value = round(pu_price * holding_rate * Ss,4) if pu_price else 0 246 | 247 | # --- Inventory simulation --- 248 | hand = np.zeros(time) 249 | transit = np.zeros((time, L+1)) 250 | stockout_period = np.full(time, False, dtype=bool) 251 | stockout_cycle = [] 252 | 253 | hand[0] = S - d[0] 254 | transit[0, -1] = d[0] 255 | 256 | for t in range(1, time): 257 | if transit[t-1, 0] > 0: 258 | stockout_cycle.append(stockout_period[t-1]) 259 | hand[t] = hand[t-1] - d[t] + transit[t-1, 0] 260 | stockout_period[t] = hand[t] < 0 261 | hand[t] = max(0, hand[t]) 262 | transit[t, :-1] = transit[t-1, 1:] 263 | if t % R == 0: 264 | net = hand[t] + transit[t].sum() 265 | transit[t, L] = S - net 266 | 267 | df = pd.DataFrame({'Demand': d, 'On-hand': hand, 'In-transit': list(transit)}) 268 | df = df.iloc[L+R:, :] 269 | 270 | # SL_cycle = round((1 - np.mean(stockout_cycle)) * 100, 1) 271 | # SL_period = round((1 - np.mean(stockout_period)) * 100, 1) 272 | SL_cycle = round((1 - np.mean(stockout_cycle)), 4) 273 | SL_period = round((1 - np.mean(stockout_period)), 4) 274 | 275 | return round(alpha * 100, 1), SL_cycle, SL_period, Ss, S_value 276 | 277 | 278 | --------------------------------------------------------------------------------