├── .gitignore ├── AdaptiveMarketPlanning ├── AdaptiveMarketPlanningDriverScript.py ├── AdaptiveMarketPlanningModel.py ├── AdaptiveMarketPlanningPolicy.py ├── Base parameters.xlsx ├── ParametricModel parameters.xlsx ├── ParametricModel.py ├── ParametricModelDriverScript.py └── README.txt ├── AssetSelling ├── AssetSelling.ipynb ├── AssetSellingModel.py └── AssetSellingPolicies.py ├── BaseClasses ├── Dummy.py ├── SDPModel.py ├── SDPPolicy.py └── Util.py ├── BloodManagement ├── BloodManagementDriverScript.py ├── BloodManagementModel.py ├── BloodManagementNetwork.py ├── BloodManagementPolicy.py ├── OutputAll.txt └── Parameters.xlsx ├── ClinicalTrials ├── ClinicalTrialsDriverScript.py ├── ClinicalTrialsDriverScriptSolutionQ4.py ├── ClinicalTrialsDriverScriptSolutionQ5.py ├── ClinicalTrialsDriverScriptSolutionQ6.py ├── ClinicalTrialsModel.py ├── ClinicalTrialsPolicy.py ├── ClinicalTrialsPolicySolutionQ6.py └── Parameters.xlsx ├── EnergyStorage_I ├── BackwardDP.py ├── EnergyStorageDriverScript.py ├── EnergyStorageModel.py ├── EnergyStoragePolicy.py └── Parameters.xlsx ├── LICENSE ├── MedicalDecisionDiabetes ├── MedicalDecisionDiabetes Solution.ipynb ├── MedicalDecisionDiabetes.ipynb ├── MedicalDecisionDiabetesModel.py └── MedicalDecisionDiabetesPolicies.py ├── README.md ├── StochasticShortestPath_Dynamic ├── Driver.py ├── GraphGenerator.py ├── Model.py ├── Network_Steps.xlsx ├── Parameters.xlsx └── Policy.py ├── StochasticShortestPath_Static ├── SSPStatic.ipynb ├── SSPStaticModel.py ├── SSPStaticPolicy.py └── cache │ ├── 68f662685724a6b23632a0c46475a528a84ae228.json │ ├── 77dcc202053a9972d96023e09bd81101008c0f76.json │ ├── 8256670b0dc93e9243acf697fcd93e584be4855b.json │ ├── 95e859f7c6e1b8b135d86a49734c82a9700bac74.json │ ├── 9c6fcf6dda6650e68aaa0cfee53cca9627c262e8.json │ ├── ef512a715bcd7d55d650793744f1d1689bd42fe2.json │ └── f5f5991d3d305d913eeb1881f9a87a92a5bffd02.json └── TwoNewsvendor ├── Parameters.xlsx ├── TwoNewsvendor.py ├── TwoNewsvendorDriverScript.py ├── TwoNewsvendorLearning.py └── TwoNewsvendorPolicy.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | .vscode/launch.json 106 | .conda/vcruntime140.dll 107 | AssetSelling/~$asset_selling_policy_parameters.xlsx 108 | MedicalDecisionDiabetes/~$MDDMparameters.xlsx 109 | stash.ipynb 110 | SSPStatic/cache 111 | AssetSelling/AssetSelling_solution.ipynb 112 | -------------------------------------------------------------------------------- /AdaptiveMarketPlanning/AdaptiveMarketPlanningDriverScript.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adaptive Market Planning Driver Script 3 | 4 | """ 5 | 6 | from collections import namedtuple 7 | from AdaptiveMarketPlanningModel import AdaptiveMarketPlanningModel 8 | from AdaptiveMarketPlanningPolicy import AdaptiveMarketPlanningPolicy 9 | 10 | import numpy as np 11 | import pandas as pd 12 | import matplotlib.pyplot as plt 13 | 14 | if __name__ == "__main__": 15 | # this is an example of creating a model and running a simulation for a certain trial size 16 | 17 | # define state variables 18 | state_names = ['order_quantity', 'counter'] 19 | init_state = {'order_quantity': 0, 'counter': 0} 20 | decision_names = ['step_size'] 21 | 22 | # read in variables from excel file 23 | file = 'Base parameters.xlsx' 24 | raw_data = pd.ExcelFile(file) 25 | data = raw_data.parse('parameters') 26 | cost = data.iat[0, 2] 27 | trial_size = np.rint(data.iat[1, 2]).astype(int) 28 | price = data.iat[2, 2] 29 | theta_step = data.iat[3, 2] 30 | T = data.iat[4, 2] 31 | reward_type = data.iat[5, 2] 32 | 33 | # initialize model and store ordered quantities in an array 34 | M = AdaptiveMarketPlanningModel(state_names, decision_names, init_state, T,reward_type, price, cost) 35 | P = AdaptiveMarketPlanningPolicy(M, theta_step) 36 | 37 | rewards_per_iteration = [] 38 | learning_list_per_iteration = [] 39 | for ite in list(range(trial_size)): 40 | print("Starting iteration ", ite) 41 | reward,learning_list = P.run_policy() 42 | M.learning_list=[] 43 | #print(learning_list) 44 | rewards_per_iteration.append(reward) 45 | learning_list_per_iteration.append(learning_list) 46 | print("Ending iteration ", ite," Reward ",reward) 47 | 48 | 49 | nElem = np.arange(1,trial_size+1) 50 | 51 | rewards_per_iteration = np.array(rewards_per_iteration) 52 | rewards_per_iteration_sum = rewards_per_iteration.cumsum() 53 | rewards_per_iteration_cum_avg = rewards_per_iteration_sum/nElem 54 | 55 | if (reward_type=="Cumulative"): 56 | rewards_per_iteration_cum_avg = rewards_per_iteration_cum_avg/T 57 | rewards_per_iteration = rewards_per_iteration/T 58 | 59 | optimal_order_quantity = -np.log(cost/price) * 100 60 | print("Optimal order_quantity for price {} and cost {} is {}".format(price,cost,optimal_order_quantity)) 61 | print("Reward type: {}, theta_step: {}, T: {} - Average reward over {} iteratios is: {}".format(reward_type,theta_step,T,trial_size,rewards_per_iteration_cum_avg[-1])) 62 | 63 | ite = np.random.randint(0,trial_size) 64 | order_quantity = learning_list_per_iteration[ite] 65 | print("Order quantity for iteration {}".format(ite)) 66 | print(order_quantity) 67 | 68 | #Ploting the reward 69 | fig1, axsubs = plt.subplots(1,2,sharex=True,sharey=True) 70 | fig1.suptitle("Reward type: {}, theta_step: {}, T: {}".format(reward_type,theta_step,T) ) 71 | 72 | axsubs[0].plot(nElem, rewards_per_iteration_cum_avg, 'g') 73 | axsubs[0].set_title('Cum_average reward') 74 | 75 | axsubs[1].plot(nElem, rewards_per_iteration, 'g') 76 | axsubs[1].set_title('Reward per iteration') 77 | #Create a big subplot 78 | ax = fig1.add_subplot(111, frameon=False) 79 | # hide tick and tick label of the big axes 80 | plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False) 81 | ax.set_ylabel('USD', labelpad=0) # Use argument `labelpad` to move label downwards. 82 | ax.set_xlabel('Iterations', labelpad=10) 83 | plt.show() 84 | 85 | 86 | 87 | 88 | # ploting the analytical sol 89 | plt.xlabel("Time") 90 | plt.ylabel("Order quantity") 91 | plt.title("Analytical vs learned ordered quantity - (iteration {})".format(ite)) 92 | time = np.arange(0, len(order_quantity)) 93 | plt.plot(time, time * 0 - np.log(cost/price) * 100, label = "Analytical solution") 94 | plt.plot(time, order_quantity, label = "Kesten's Rule for theta_step {}".format(theta_step)) 95 | plt.legend() 96 | plt.show() 97 | 98 | 99 | -------------------------------------------------------------------------------- /AdaptiveMarketPlanning/AdaptiveMarketPlanningModel.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adaptive Market Planning Model class 3 | 4 | Adapted from code by Donghun Lee (c) 2018 5 | 6 | """ 7 | 8 | from collections import namedtuple 9 | 10 | import numpy as np 11 | import matplotlib.pyplot as plt 12 | 13 | class AdaptiveMarketPlanningModel(): 14 | """ 15 | Base class for model 16 | """ 17 | 18 | def __init__(self, state_names, x_names, s_0, T,reward_type,price = 1.0, cost = 1.0, exog_info_fn=None, transition_fn=None, objective_fn=None, seed=20180613): 19 | """ 20 | Initializes the model 21 | 22 | :param state_names: list(str) - state variable dimension names 23 | :param x_names: list(str) - decision variable dimension names 24 | :param s_0: dict - need to contain at least information to populate initial state using s_names 25 | :param price: float - price p 26 | :param cost: float - cost c 27 | :param exog_info_fn: function - calculates relevant exogenous information 28 | :param transition_fn: function - takes in decision variables and exogenous information to describe how the state 29 | evolves 30 | :param objective_fn: function - calculates contribution at time t 31 | :param seed: int - seed for random number generator 32 | """ 33 | 34 | self.init_args = {seed: seed} 35 | self.prng = np.random.RandomState(seed) 36 | self.init_state = s_0 37 | self.T = T 38 | self.reward_type = reward_type 39 | self.state_names = state_names 40 | self.x_names = x_names 41 | self.State = namedtuple('State', state_names) 42 | self.state = self.build_state(s_0) 43 | self.Decision = namedtuple('Decision', x_names) 44 | self.obj = 0.0 45 | self.past_derivative = 0.0 46 | self.cost = cost 47 | self.price = price 48 | self.t = 0 49 | self.learning_list=[] 50 | 51 | 52 | 53 | # this function gives a state containing all the state information needed 54 | def build_state(self, info): 55 | return self.State(*[info[k] for k in self.state_names]) 56 | 57 | # this function gives a decision 58 | def build_decision(self, info): 59 | return self.Decision(*[info[k] for k in self.x_names]) 60 | 61 | # this function gives the exogenous information that is dependent on a random process 62 | # computes the f_hat, chnage in the forecast over the horizon 63 | def exog_info_fn(self, decision): 64 | # return new demand based on a given distribution 65 | return {"demand": self.prng.exponential(100)} 66 | 67 | # this function takes in the decision and exogenous information to return 68 | # new state 69 | def transition_fn(self, decision, exog_info): 70 | 71 | self.learning_list.append(self.state.order_quantity) 72 | 73 | # compute derivative 74 | derivative = self.price - self.cost if self.state.order_quantity < exog_info['demand'] else - self.cost 75 | # update order quantity 76 | new_order_quantity = max(0, self.state.order_quantity + decision.step_size * derivative) 77 | print(' step ', decision.step_size) 78 | print(' derivative ', derivative) 79 | # count number of times derivative changes sign 80 | new_counter = self.state.counter + 1 if self.past_derivative * derivative < 0 else self.state.counter 81 | self.past_derivative = derivative 82 | 83 | 84 | 85 | return {"order_quantity": new_order_quantity, "counter": new_counter} 86 | 87 | # this function calculates how much money we make 88 | def objective_fn(self, decision, exog_info): 89 | self.order_quantity=self.state.order_quantity 90 | obj_part = self.price * min(self.order_quantity, exog_info['demand']) - self.cost * self.state.order_quantity 91 | return obj_part 92 | 93 | # this method steps the process forward by one time increment by updating the sum of the contributions, the 94 | # exogenous information and the state variable 95 | def step(self, decision): 96 | self.t_update() 97 | exog_info = self.exog_info_fn(decision) 98 | onestep_contribution = self.objective_fn(decision, exog_info) 99 | 100 | print("t {}, Price {}, Demand {}, order_quantity {}, contribution {}".format(self.t,self.price,exog_info['demand'],self.order_quantity,onestep_contribution)) 101 | 102 | #Check if cumulative or terminal reward 103 | if (self.reward_type == 'Cumulative'): 104 | self.obj += onestep_contribution 105 | else: 106 | if (self.t == self.T): 107 | self.obj = onestep_contribution 108 | 109 | 110 | transition_info = self.transition_fn(decision, exog_info) 111 | self.state = self.build_state(transition_info) 112 | 113 | 114 | 115 | # Update method for time counter 116 | def t_update(self): 117 | self.t += 1 118 | return self.t -------------------------------------------------------------------------------- /AdaptiveMarketPlanning/AdaptiveMarketPlanningPolicy.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adaptive Market Planning Policy class 3 | 4 | """ 5 | 6 | from collections import namedtuple 7 | 8 | import numpy as np 9 | from copy import copy 10 | from AdaptiveMarketPlanningModel import AdaptiveMarketPlanningModel 11 | 12 | class AdaptiveMarketPlanningPolicy(): 13 | """ 14 | Base class for policy 15 | """ 16 | 17 | def __init__(self, AdaptiveMarketPlanningModel, theta_step): 18 | """ 19 | Initializes the model 20 | 21 | :param AdaptiveMarketPlanningModel: AdaptiveMarketPlanningModel - model to construct decision for 22 | :param theta_step: float - theta step variable 23 | """ 24 | 25 | self.M = AdaptiveMarketPlanningModel 26 | self.theta_step = theta_step 27 | 28 | # returns decision based on harmonic step size policy 29 | def harmonic_rule(self): 30 | return self.M.build_decision({'step_size': self.theta_step / (self.theta_step + self.M.t - 1)}) 31 | 32 | # returns decision based on Kesten's rule policy 33 | def kesten_rule(self): 34 | return self.M.build_decision({'step_size': self.theta_step / (self.theta_step + self.M.state.counter - 1)}) 35 | 36 | # returns decision based on a constant rule policy 37 | def constant_rule(self): 38 | return self.M.build_decision({'step_size': self.theta_step}) 39 | 40 | # returns decision based on a constant rule policy 41 | def run_policy(self): 42 | model_copy = copy(self.M) 43 | 44 | for t in range(model_copy.T): 45 | model_copy.step(AdaptiveMarketPlanningPolicy(model_copy, self.theta_step).kesten_rule()) 46 | 47 | 48 | 49 | return (model_copy.obj,model_copy.learning_list.copy()) 50 | 51 | 52 | -------------------------------------------------------------------------------- /AdaptiveMarketPlanning/Base parameters.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/djanka2/stochastic-optimization/8b654e10480705901a6e48e869e2e0a779e97933/AdaptiveMarketPlanning/Base parameters.xlsx -------------------------------------------------------------------------------- /AdaptiveMarketPlanning/ParametricModel parameters.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/djanka2/stochastic-optimization/8b654e10480705901a6e48e869e2e0a779e97933/AdaptiveMarketPlanning/ParametricModel parameters.xlsx -------------------------------------------------------------------------------- /AdaptiveMarketPlanning/ParametricModel.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adaptive Market Planning Model for variable price subclass 3 | 4 | """ 5 | 6 | from collections import namedtuple 7 | from AdaptiveMarketPlanningModel import AdaptiveMarketPlanningModel 8 | 9 | import numpy as np 10 | 11 | class ParametricModel(AdaptiveMarketPlanningModel): 12 | """ 13 | Subclass for Adaptive Market Planning 14 | """ 15 | 16 | def __init__(self, state_names, x_names, s_0, T, reward_type, cost = 1.0, price_low = 1.0, price_high = 10.0, exog_info_fn=None, transition_fn=None, objective_fn=None, seed=20180613): 17 | """ 18 | Initializes the model 19 | 20 | See Adaptive Market Planning Model for more details 21 | """ 22 | super().__init__(state_names, x_names, s_0, T, reward_type,cost = cost, exog_info_fn=exog_info_fn, transition_fn=transition_fn, objective_fn=objective_fn, seed=seed) 23 | self.past_derivative = np.array([0, 0, 0]) 24 | self.low = price_low 25 | self.high = price_high 26 | self.PRICE_PROCESS ='RW' 27 | 28 | # returns order quantity for a given price and theta vector 29 | def order_quantity_fn(self, price, theta): 30 | return max(0,theta[0] + theta[1] * price + theta[2] * price ** (-2)) 31 | 32 | # returns derivative for a given price and theta vector 33 | def derivative_fn(self, price, theta): 34 | return np.array([1, price, price ** (-2)]) 35 | 36 | # this function takes in the decision and exogenous information to return 37 | # new state 38 | def transition_fn(self, decision, exog_info): 39 | 40 | self.learning_list.append(self.state.theta) 41 | print(' theta ',self.state.theta) 42 | 43 | # compute derivative and update theta 44 | derivative = np.array([0, 0, 0]) 45 | if self.order_quantity_fn(self.state.price, self.state.theta) < exog_info['demand']: 46 | derivative = (self.state.price - self.cost) * self.derivative_fn(self.state.price, self.state.theta) 47 | else: 48 | derivative = (- self.cost) * self.derivative_fn(self.state.price, self.state.theta) 49 | 50 | new_theta = self.state.theta + decision.step_size * derivative 51 | 52 | new_counter = self.state.counter + 1 if np.dot(self.past_derivative, derivative) < 0 else self.state.counter 53 | print(' step ', decision.step_size) 54 | print(' derivative ', derivative) 55 | print('new theta ',new_theta) 56 | 57 | 58 | self.past_derivative = derivative 59 | 60 | # generate random price 61 | if (self.PRICE_PROCESS == 'RW'): 62 | coin = self.prng.uniform() 63 | delta = 0 64 | if coin < .2: 65 | delta = -1 66 | elif coin >.8: 67 | delta = 1 68 | 69 | new_price = min(self.high,max(self.low,self.state.price + delta)) 70 | else: 71 | new_price = self.prng.uniform(self.low, self.high) 72 | 73 | 74 | 75 | return {"counter": new_counter, "price": new_price, "theta": new_theta} 76 | 77 | # this function calculates how much money we make 78 | def objective_fn(self, decision, exog_info): 79 | self.price = self.state.price 80 | self.order_quantity=self.order_quantity_fn(self.state.price, self.state.theta) 81 | obj_part = self.state.price * min(self.order_quantity, exog_info['demand']) - self.cost * self.order_quantity 82 | return obj_part -------------------------------------------------------------------------------- /AdaptiveMarketPlanning/ParametricModelDriverScript.py: -------------------------------------------------------------------------------- 1 | """ 2 | Parametric Model Driver Script 3 | 4 | """ 5 | 6 | from collections import namedtuple 7 | from ParametricModel import ParametricModel 8 | from AdaptiveMarketPlanningPolicy import AdaptiveMarketPlanningPolicy 9 | 10 | import numpy as np 11 | import pandas as pd 12 | import matplotlib.pyplot as plt 13 | 14 | if __name__ == "__main__": 15 | # this is an example of creating a model and running a simulation for a certain trial size 16 | 17 | # define state variables 18 | state_names = ['counter', 'price', 'theta'] 19 | init_state = {'counter': 0, 'price': 26, 'theta': np.array([1, 1, 1])} 20 | decision_names = ['step_size'] 21 | 22 | # read in variables from excel file 23 | file = 'ParametricModel parameters.xlsx' 24 | raw_data = pd.ExcelFile(file) 25 | data = raw_data.parse('parameters') 26 | cost = data.iat[0, 2] 27 | trial_size = np.rint(data.iat[1, 2]).astype(int) 28 | price_low = data.iat[2, 2] 29 | price_high = data.iat[3, 2] 30 | theta_step = data.iat[4, 2] 31 | T = data.iat[5, 2] 32 | reward_type = data.iat[6, 2] 33 | 34 | # initialize model and run simulations 35 | M = ParametricModel(state_names, decision_names, init_state, T, reward_type,cost, price_low = price_low, price_high = price_high) 36 | print("Theta_step ",theta_step) 37 | P = AdaptiveMarketPlanningPolicy(M, theta_step) 38 | 39 | rewards_per_iteration = [] 40 | learning_list_per_iteration = [] 41 | for ite in list(range(trial_size)): 42 | print("Starting iteration ", ite) 43 | reward,learning_list = P.run_policy() 44 | M.learning_list=[] 45 | #print(learning_list) 46 | rewards_per_iteration.append(reward) 47 | learning_list_per_iteration.append(learning_list) 48 | print("Ending iteration ", ite," Reward ",reward) 49 | 50 | 51 | nElem = np.arange(1,trial_size+1) 52 | 53 | rewards_per_iteration = np.array(rewards_per_iteration) 54 | rewards_per_iteration_sum = rewards_per_iteration.cumsum() 55 | rewards_per_iteration_cum_avg = rewards_per_iteration_sum/nElem 56 | 57 | if (reward_type=="Cumulative"): 58 | rewards_per_iteration_cum_avg = rewards_per_iteration_cum_avg/T 59 | rewards_per_iteration = rewards_per_iteration/T 60 | 61 | 62 | print("Reward type: {}, theta_step: {}, T: {} - Average reward over {} iteratios is: {}".format(reward_type,theta_step,T,trial_size,rewards_per_iteration_cum_avg[-1])) 63 | 64 | price = np.arange(price_low, price_high, 1) 65 | optimal = -np.log(cost/price) * 100 66 | df = pd.DataFrame({'Price' : price, 'OptOrderQuantity' : optimal}) 67 | print(df) 68 | 69 | ite = np.random.randint(0,trial_size) 70 | theta_ite = learning_list_per_iteration[ite] 71 | #print("Thetas for iteration {}".format(ite)) 72 | #print(theta_ite) 73 | 74 | #Ploting the reward 75 | fig1, axsubs = plt.subplots(1,2,sharex=True,sharey=True) 76 | fig1.suptitle("Reward type: {}, theta_step: {}, T: {}".format(reward_type,theta_step,T) ) 77 | 78 | axsubs[0].plot(nElem, rewards_per_iteration_cum_avg, 'g') 79 | axsubs[0].set_title('Cum_average reward') 80 | 81 | axsubs[1].plot(nElem, rewards_per_iteration, 'g') 82 | axsubs[1].set_title('Reward per iteration') 83 | #Create a big subplot 84 | ax = fig1.add_subplot(111, frameon=False) 85 | # hide tick and tick label of the big axes 86 | plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False) 87 | ax.set_ylabel('USD', labelpad=0) # Use argument `labelpad` to move label downwards. 88 | ax.set_xlabel('Iterations', labelpad=10) 89 | plt.show() 90 | 91 | 92 | if (False): 93 | for i in range(trial_size): 94 | M.step(AdaptiveMarketPlanningPolicy(M, theta_step).kesten_rule()) 95 | 96 | # plot results 97 | price = np.arange(price_low, price_high, 0.1) 98 | optimal = -np.log(cost/price) * 100 99 | plt.plot(price, optimal, color = 'green', label = "analytical solution") 100 | order_quantity = [M.order_quantity_fn(k, M.state.theta) for k in price] 101 | plt.plot(price, order_quantity, color = 'blue', label = "parametrized solution") 102 | plt.legend() 103 | plt.show() -------------------------------------------------------------------------------- /AdaptiveMarketPlanning/README.txt: -------------------------------------------------------------------------------- 1 | In order to run the code for question 2, set the parameters on the file "Base parameters.xlsx” sheet “parameters” and then run "python AdaptiveMarketPlanningDriverScript.py”. 2 | 3 | In order to run the code for the parametric model, set the parameters on the file “ParametricModel parameters.xlsx” sheet “parameters” and then run "python ParametricModelDriverScript.py” -------------------------------------------------------------------------------- /AssetSelling/AssetSelling.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# An Asset Selling Model\n", 8 | "In this notebook you will optimize some simple parametric policies for the asset selling problem.\n", 9 | "\n", 10 | "We start by creating an instance of the model and an instance of a policy. Let's start with the Sell-Low-policy. First, we instantiatie a model and specify the initial value for the state (price) and the length of the time horizon T in the constructor. It would also be possible to exert more control on the exogenous information process by specifying parameters for it, that are used inside the process." 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "import numpy as np\n", 20 | "import pandas as pd\n", 21 | "import plotly.express as px\n", 22 | "import AssetSellingModel as asm\n", 23 | "import AssetSellingPolicies as asp\n", 24 | "import BaseClasses.Util as util\n", 25 | "\n", 26 | "model = asm.AssetSellingModel(S0={\"price\": 20}, T=30)" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "Next, we create a policy for this model. The high-low-policy has two tunable parameters, namely `theta_low` and `theta_high`.\n", 34 | "Then, we run the policy for 100 iterations/episodes. The `run_policy` method returns the average objective function value over all episodes." 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "high_low_policy = asp.HighLowPolicy(model=model, theta_low=10, theta_high=30)\n", 44 | "high_low_policy.run_policy(n_iterations=100)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "## Exercise 1\n", 52 | "Execute the cell several times. How do you explain that the result is different every time? Do you notice any difference in this behavior if you change the number of iteration to 10, 1000, 10000?\n", 53 | "\n", 54 | "---" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "We investigate the results in more detail. The results of a policy run are stored in an in a DataFrame called results. Every row corresponds to one timestep of one iteration/episode." 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "high_low_policy.results" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "Next we plot a few of the 100 paths using plotly. We notice that if the price never drops below `theta_low` the asset is sold at the end of the time horizon." 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "sample_paths = np.random.choice(100, size=5, replace=False)\n", 87 | "df = high_low_policy.results.loc[high_low_policy.results.N.isin(sample_paths), :]\n", 88 | "px.line(data_frame=df, x=\"t\", y=\"price\", facet_row=\"N\", height=800)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "The average amount of money that we make selling the asset depends of course on the values of `theta_low` and `theta_high`. If we for example set `theta_low` to a higher value, it seems that the average profit is higher (note that it will be slightly different every time we execute the cell)." 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "high_low_policy.theta_low = 19\n", 105 | "high_low_policy.run_policy(n_iterations=100)" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "Next, we will try to find the best value for `theta_low` and `theta_high`. This is called *parameter tuning*. To do this, we just systematically try out different combinations of values for both. This strategy is called a *grid search* and there is a simple convenience method to automate this." 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "# Define a grid for combinations of theta_low and theta_high.\n", 122 | "# theta_low should not be larger than the starting price (20) and theta_high should not be smaller than the starting price.\n", 123 | "grid = {\"theta_low\": np.linspace(10, 20, 11), \"theta_high\": np.linspace(20, 30, 11)}\n", 124 | "result = util.grid_search(grid, high_low_policy, n_iterations=10, ordered=True)\n", 125 | "\n", 126 | "print(f\"Best parameters: {result['best_parameters']} with an objective of {result['best_performance']}.\")" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "The result object gives us the best parameters and the corresponding performance but it gives also information about all the runs. We transform them into matrix form and visualize them with a heatmap." 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "res_grid = result[\"all_runs\"].pivot(index=\"theta_low\", columns=\"theta_high\", values=\"performance\")\n", 143 | "px.imshow(res_grid)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "Apparently, with the given uncertainty model and the high-low policy, the best profit is only slightly above the start price." 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "## Exercise 2\n", 158 | "1. Create an instance of the tracking policy that is implemented in the class `TrackPolicy` in the module `AssetSellingPolicies` and run the policy for 100 iterations. Describe in your own words how this policy makes a decision. \n", 159 | "2. The policy has one tunable parameter `theta`. Run a grid search to find the best value for `theta`. Is the tracking policy better than the high-low policy?" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "---" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "In the first version of our sequential decision model we used a stochastic model to generate observations. We now introduce a new version, where we draw sample obervations $W_{t+1}$ from historical data. Consider the following version of our problem:\n", 174 | "\n", 175 | "*You own a share of a company at the beginning of the month. Every day, you need to decide if you sell it (for the closing price of this day) or not. If by the end of the month the stock is still in your posession, it is sold at the closing price of the last day of the month.*\n", 176 | "\n", 177 | "To generate different observations for one month, we will use 10 years of historical data where we scale the data so each month starts at zero. This gives us 120 observations in total that we will use to tune our policy.\n", 178 | " \n", 179 | "As an example, we download data of the SAP stock using the package `yfinance` and reshape it to match our needs." 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "import yfinance as yf\n", 189 | "\n", 190 | "def get_historical_monthly_paths(stock_name, start=\"2014-01-01\", end=\"2023-12-31\"):\n", 191 | " stock = yf.Ticker(stock_name)\n", 192 | "\n", 193 | " # Get historical market data (this makes an API call to Yahoo Finance)\n", 194 | " hist = stock.history(start=start, end=end, interval=\"1d\")\n", 195 | "\n", 196 | " # We just keep the \"Close\" column\"\n", 197 | " hist = hist.drop([\"Open\", \"High\", \"Low\", \"Volume\", \"Dividends\", \"Stock Splits\"], axis=1)\n", 198 | "\n", 199 | " # Enumerate the months from the start and store as a separate column\n", 200 | " hist[\"N\"] = hist.index.tz_convert(None).to_period('M')\n", 201 | " hist[\"N\"] = hist[\"N\"].apply(lambda x: x.ordinal) - hist[\"N\"].iloc[0].ordinal\n", 202 | "\n", 203 | " # Get the Close price at the beginning of every month and subtract from the Close value \n", 204 | " hist_month_start = hist.groupby(\"N\").head(1).rename({\"Close\": \"Close_Month_Start\"}, axis=1)\n", 205 | " hist_month_start = pd.merge(hist, hist_month_start, on=\"N\")\n", 206 | " hist_month_start[\"price\"] = (hist_month_start[\"Close\"] - hist_month_start[\"Close_Month_Start\"])\n", 207 | " \n", 208 | " return hist_month_start.drop([\"Close\", \"Close_Month_Start\"], axis=1)\n", 209 | "\n", 210 | "hist_prices = get_historical_monthly_paths(stock_name=\"SAP\")" 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | "Let's have a look at our historical sample paths $W_{t+1}$:" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "metadata": {}, 224 | "outputs": [], 225 | "source": [ 226 | "hist_prices" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "We now create a new model where the exogenous information process is modified so that at each iteration, on month of the historical data is selected. We also create an instance of the high-low policy and run it for 120 iterations." 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "model_hist = asm.AssetSellingModelHistorical(hist_data=hist_prices)\n", 243 | "policy_hist = asp.HighLowPolicy(model=model_hist, theta_low=-10, theta_high=10)\n", 244 | "policy_hist.run_policy(n_iterations=120)" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": {}, 250 | "source": [ 251 | "Next, we need to optimize `theta_high` and `theta_low`. To get a feeling for which values we might try, we first have a look at the distribution of monthly price deviations." 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "px.histogram(hist_prices, x=\"price\")" 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "metadata": {}, 266 | "source": [ 267 | "We see that during one month, the difference of the stock price to the price at the beginning of the month is usually between -30 and +30 (with some outliers). We do a grid search on a 16x16 grid to find the best combination of `theta_low` and `theta_high`." 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": null, 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [ 276 | "grid = {\"theta_low\": np.linspace(-30, 0, 16), \"theta_high\": np.linspace(0, 30, 16)}\n", 277 | "result = util.grid_search(grid, policy_hist, n_iterations=120, ordered=True)\n", 278 | "\n", 279 | "print(f\"Best parameters: {result['best_parameters']} with an objective of {result['best_performance']}.\")" 280 | ] 281 | }, 282 | { 283 | "cell_type": "markdown", 284 | "metadata": {}, 285 | "source": [ 286 | "## Exercise 3\n", 287 | "Go to https://finance.yahoo.com/ to look up names and historical charts of stocks. Repeat the steps above with a stock of your choice (Tesla? Wirecard? ...?). You should be able to use the function `get_historical_monthly_paths` from above to get the data in the appropriate format.Try to find a policy, i.e. \"sell-low\", \"high-low\", or \"track\", with corresponding parameters that maximizes the expected profit.\n", 288 | "\n", 289 | "---" 290 | ] 291 | } 292 | ], 293 | "metadata": { 294 | "kernelspec": { 295 | "display_name": "sda", 296 | "language": "python", 297 | "name": "python3" 298 | }, 299 | "language_info": { 300 | "codemirror_mode": { 301 | "name": "ipython", 302 | "version": 3 303 | }, 304 | "file_extension": ".py", 305 | "mimetype": "text/x-python", 306 | "name": "python", 307 | "nbconvert_exporter": "python", 308 | "pygments_lexer": "ipython3", 309 | "version": "3.10.12" 310 | } 311 | }, 312 | "nbformat": 4, 313 | "nbformat_minor": 2 314 | } 315 | -------------------------------------------------------------------------------- /AssetSelling/AssetSellingModel.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append("../") 4 | from BaseClasses.SDPModel import SDPModel 5 | import pandas as pd 6 | 7 | 8 | class AssetSellingModel(SDPModel): 9 | def __init__( 10 | self, 11 | S0: dict, 12 | t0: float = 0, 13 | T: float = 1, 14 | seed: int = 42, 15 | alpha: float = 0.7, 16 | var: float = 2, 17 | bias_df: pd.DataFrame = None, 18 | upstep: float = 1, 19 | downstep: float = -1, 20 | ) -> None: 21 | state_names = ["price", "bias", "price_smoothed", "resource"] 22 | 23 | # Set default values for helper states 24 | if "bias" not in S0: 25 | S0["bias"] = "Neutral" 26 | if "price_smoothed" not in S0: 27 | S0["price_smoothed"] = S0["price"] 28 | if "resource" not in S0: 29 | S0["resource"] = 1 30 | 31 | decision_names = ["sell"] 32 | super().__init__(state_names, decision_names, S0, t0, T, seed) 33 | self.alpha = alpha 34 | self.var = var 35 | if bias_df is None: 36 | self.bias_df = pd.DataFrame( 37 | {"Up": [0.9, 0.1, 0], "Neutral": [0.2, 0.6, 0.2], "Down": [0, 0.1, 0.9]} 38 | ) 39 | self.bias_df.index = ["Up", "Neutral", "Down"] 40 | else: 41 | self.bias_df = bias_df 42 | self.upstep = upstep 43 | self.downstep = downstep 44 | 45 | def is_finished(self): 46 | """ 47 | Check if the model run (episode) is finished. 48 | This is either the case when the time is over or we no longer hold the asset. 49 | 50 | Returns: 51 | bool: True if the run is finished, False otherwise. 52 | """ 53 | hold_asset = self.state.resource 54 | return super().is_finished() or not hold_asset 55 | 56 | def exog_info_fn(self, decision): 57 | """ 58 | Generates exogenous information for the asset selling model. 59 | 60 | Args: 61 | decision: The decision made (not used). 62 | 63 | Returns: 64 | A dictionary containing the updated price and bias. 65 | 66 | Notes: 67 | - The change in price is assumed to be normally distributed with mean bias and given variance. 68 | - The bias changes in every step according to given parameters. 69 | - The new price is set to 0 whenever the random process gives a negative price. 70 | """ 71 | biasprob = self.bias_df[self.state.bias] 72 | 73 | coin = self.prng.uniform() 74 | if coin < biasprob["Up"]: 75 | new_bias = "Up" 76 | bias = self.upstep 77 | elif coin >= biasprob["Up"] and coin < biasprob["Neutral"] + biasprob["Up"]: 78 | new_bias = "Neutral" 79 | bias = 0 80 | else: 81 | new_bias = "Down" 82 | bias = self.downstep 83 | 84 | price_delta = self.prng.normal(bias, self.var) 85 | updated_price = self.state.price + price_delta 86 | new_price = 0.0 if updated_price < 0.0 else updated_price 87 | 88 | return { 89 | "price": new_price, 90 | "bias": new_bias, 91 | } 92 | 93 | def transition_fn(self, decision, exog_info): 94 | alpha = self.alpha 95 | new_resource = 0 if decision.sell == 1 else self.state.resource 96 | new_price_smoothed = (1 - alpha) * self.state.price_smoothed + alpha * exog_info["price"] 97 | 98 | return {"resource": new_resource, "price_smoothed": new_price_smoothed} 99 | 100 | def objective_fn(self, decision, exog_info): 101 | sell_size = 1 if decision.sell == 1 and self.state.resource != 0 else 0 102 | return self.state.price * sell_size 103 | 104 | 105 | class AssetSellingModelHistorical(AssetSellingModel): 106 | def __init__( 107 | self, 108 | hist_data: pd.DataFrame, 109 | alpha: float = 0.7, 110 | ) -> None: 111 | super().__init__(S0={"price": 0.0}, alpha=alpha) 112 | self.T = 100 113 | self.hist_data = hist_data 114 | 115 | def reset(self, reset_prng: bool = False): 116 | # Get the subset of the historical data that corresponds to the current episode 117 | self.episode_data = self.hist_data.loc[self.hist_data["N"] == self.episode_counter, :] 118 | self.episode_data = self.episode_data["price"].tolist() 119 | self.episode_data.pop(0) 120 | self.T = len(self.episode_data) 121 | super().reset(reset_prng) 122 | 123 | def exog_info_fn(self, decision): 124 | return {"price": self.episode_data.pop(0), "bias": "Neutral"} 125 | -------------------------------------------------------------------------------- /AssetSelling/AssetSellingPolicies.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append("../") 4 | from BaseClasses.SDPModel import SDPModel 5 | from BaseClasses.SDPPolicy import SDPPolicy 6 | 7 | 8 | class SellLowPolicy(SDPPolicy): 9 | def __init__(self, model: SDPModel, policy_name: str = "SellLow", theta_low: float = 10): 10 | super().__init__(model, policy_name) 11 | self.theta_low = theta_low 12 | 13 | def get_decision(self, state, t, T): 14 | new_decision = {"sell": 1, "hold": 0} if state.price < self.theta_low else {"sell": 0, "hold": 1} 15 | 16 | if t == T - 1: 17 | new_decision = {"sell": 1, "hold": 0} 18 | return new_decision 19 | 20 | 21 | class HighLowPolicy(SDPPolicy): 22 | def __init__( 23 | self, model: SDPModel, policy_name: str = "HighLow", theta_low: float = 10, theta_high: float = 30 24 | ): 25 | super().__init__(model, policy_name) 26 | self.theta_low = theta_low 27 | self.theta_high = theta_high 28 | 29 | def get_decision(self, state, t, T): 30 | new_decision = ( 31 | {"sell": 1, "hold": 0} 32 | if state.price < self.theta_low or state.price > self.theta_high 33 | else {"sell": 0, "hold": 1} 34 | ) 35 | 36 | if t == T - 1: 37 | new_decision = {"sell": 1, "hold": 0} 38 | 39 | return new_decision 40 | 41 | 42 | class TrackPolicy(SDPPolicy): 43 | def __init__(self, model: SDPModel, policy_name: str = "Track", theta: float = 10): 44 | super().__init__(model, policy_name) 45 | self.theta = theta 46 | 47 | def get_decision(self, state, t, T): 48 | new_decision = ( 49 | {"sell": 1, "hold": 0} 50 | if state.price >= state.price_smoothed + self.theta 51 | or state.price <= state.price_smoothed - self.theta 52 | else {"sell": 0, "hold": 1} 53 | ) 54 | 55 | if t == T - 1: 56 | new_decision = {"sell": 1, "hold": 0} 57 | return new_decision 58 | -------------------------------------------------------------------------------- /BaseClasses/Dummy.py: -------------------------------------------------------------------------------- 1 | from SDPModel import SDPModel 2 | from SDPPolicy import SDPPolicy 3 | from copy import deepcopy, copy 4 | 5 | 6 | class DummyModel(SDPModel): 7 | def exog_info_fn(self, decision): 8 | return {"W": self.prng.random()} 9 | 10 | def transition_fn(self, decision, exog_info): 11 | return {"S": exog_info["W"]} 12 | 13 | def objective_fn(self, decision, exog_info): 14 | return 0.0 15 | 16 | 17 | class DummyPolicy(SDPPolicy): 18 | def get_decision(self, state): 19 | return {"x": 0} 20 | 21 | 22 | model = DummyModel(state_names=["S"], decision_names=["x"], S0={"S": 0.0}, T=10) 23 | # Initialize different policies (different thetas) with a deep copy of the model to guarantee 24 | # that both are run with the same random values from the prng. 25 | policy = DummyPolicy(model=deepcopy(model), policy_name="dummy policy") 26 | policy2 = DummyPolicy(model=deepcopy(model), policy_name="dummy policy2") 27 | print(policy.policy_name) 28 | policy.run_policy(n_iterations=3) 29 | print(policy2.policy_name) 30 | policy2.run_policy(n_iterations=2) 31 | -------------------------------------------------------------------------------- /BaseClasses/SDPModel.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | import numpy as np 3 | from abc import ABC, abstractmethod 4 | 5 | 6 | class SDPModel(ABC): 7 | """ 8 | Sequential decision problem base class 9 | 10 | This class represents a base class for sequential decision problems. 11 | It provides methods for initializing the problem, resetting the state, 12 | performing a single step in the problem, and updating the time index. 13 | 14 | Attributes: 15 | State (namedtuple): Named tuple representing the state variables. 16 | Decision (namedtuple): Named tuple representing the decision variables. 17 | state_names (list): List of state variable names. 18 | decision_names (list): List of decision variable names. 19 | initial_state (State): Initial state of the problem. 20 | state (State): Current state of the problem. 21 | objective (float): Objective value of the problem. 22 | t (float): Current time index. 23 | T (float): Terminal time. 24 | prng (RandomState): Random number generator. 25 | episode_counter (int): Which set of historical data (episode) to be used. 26 | 27 | Methods: 28 | __init__: Initializes an instance of the SDPModel class. 29 | reset: Resets the SDPModel to its initial state. 30 | build_state: Sets the new state values using the provided information. 31 | build_decision: Builds a decision object using the provided information. 32 | exog_info_fn: Abstract method for generating exogenous information. 33 | transition_fn: Abstract method for computing the state transition. 34 | objective_fn: Abstract method for computing the objective value. 35 | update_t: Updates the value of the time index. 36 | step: Performs a single step in the sequential decision problem. 37 | """ 38 | 39 | def __init__( 40 | self, 41 | state_names: list, 42 | decision_names: list, 43 | S0: dict, 44 | t0: float = 0, 45 | T: float = 1, 46 | seed: int = 42, 47 | ) -> None: 48 | """ 49 | Initializes an instance of the SDPModel class. 50 | 51 | Args: 52 | state_names (list): List of state variable names. 53 | decision_names (list): List of decision variable names. 54 | S0 (dict): Initial state values. 55 | t0 (float, optional): Initial time. Defaults to 0. 56 | T (float, optional): Terminal time. Defaults to 1. 57 | seed (int, optional): Seed for random number generation. Defaults to 42. 58 | exog_params (dict, optional): (Static) parameters to be used by the exogenuous information process. 59 | state_params (dict, optional): (Static) parameters to be used by the state transition function. 60 | """ 61 | self.State = namedtuple("State", state_names) 62 | self.Decision = namedtuple("Decision", decision_names) 63 | 64 | self.state_names = state_names 65 | self.decision_names = decision_names 66 | 67 | self.initial_state = self.build_state(S0) 68 | self.state = self.build_state(S0) 69 | 70 | self.objective = 0.0 71 | self.t0 = t0 72 | self.t = t0 73 | self.T = T 74 | self.seed = seed 75 | self.prng = np.random.RandomState(seed) 76 | self.episode_counter = 0 77 | 78 | def reset(self, reset_prng: bool = False): 79 | """ 80 | Resets the SDPModel to its initial state. 81 | 82 | This method resets the state, objective, and time variables of the SDPModel 83 | to their initial values. 84 | 85 | Parameters: 86 | None 87 | 88 | Returns: 89 | None 90 | """ 91 | self.state = self.initial_state 92 | self.objective = 0.0 93 | self.t = self.t0 94 | if reset_prng is True: 95 | self.prng = np.random.RandomState(self.seed) 96 | 97 | def build_state(self, info: dict): 98 | """ 99 | Sets the new state values using the provided information. 100 | 101 | Args: 102 | info (dict): A dictionary containing the new values for all state variables. 103 | 104 | Returns: 105 | State: The updated state object. 106 | """ 107 | return self.State(*[info[k] for k in self.state_names]) 108 | 109 | def build_decision(self, info: dict): 110 | """ 111 | Builds a decision object using the provided information. 112 | 113 | Args: 114 | info (dict): A dictionary containing the new values for all decision variables. 115 | 116 | Returns: 117 | Decision: The decision object. 118 | """ 119 | return self.Decision(*[info[k] for k in self.decision_names]) 120 | 121 | @abstractmethod 122 | def exog_info_fn(self, decision): 123 | """ 124 | Abstract method for generating exogenous information. 125 | 126 | This method should be implemented in the derived classes to generate 127 | the exogenous information based on the current decision. 128 | 129 | Args: 130 | decision (namedtuple): The current decision. 131 | 132 | Returns: 133 | dict: A dictionary containing the exogenous information. 134 | """ 135 | pass 136 | 137 | @abstractmethod 138 | def transition_fn(self, decision, exog_info: dict): 139 | """ 140 | Abstract method for computing the state transition. 141 | 142 | This method should be implemented in the derived classes to compute 143 | the state transition based on the current state, decision, and exogenous information. 144 | 145 | Args: 146 | decision (namedtuple): The current decision. 147 | exog_info (dict): The exogenous information. 148 | 149 | Returns: 150 | dict: A dictionary containing the updated state variables. 151 | """ 152 | pass 153 | 154 | @abstractmethod 155 | def objective_fn(self, decision, exog_info: dict): 156 | """ 157 | Abstract method for computing the objective value. 158 | 159 | This method should be implemented in the derived classes to compute 160 | the objective value contribution based on the current state, decision, 161 | and exogenous information. 162 | 163 | Args: 164 | decision (namedtuple): The current decision. 165 | exog_info (dict): The exogenous information. 166 | 167 | Returns: 168 | float: The contribution to the objective. 169 | """ 170 | pass 171 | 172 | def is_finished(self): 173 | """ 174 | Check if the model is finished. By default, the model runs until the end of the time horizon 175 | but the method can be overwritten to model episodic tasks where the time horizon ends earlier. 176 | 177 | Returns: 178 | bool: True if the run is finished, False otherwise. 179 | """ 180 | if self.t >= self.T: 181 | return True 182 | else: 183 | return False 184 | 185 | def update_t(self): 186 | """ 187 | Update the value of the time index t. 188 | """ 189 | self.t += 1 190 | 191 | return self.t 192 | 193 | def step(self, decision): 194 | """ 195 | Performs a single step in the sequential decision problem. 196 | 197 | Args: 198 | decision (namedtuple): The decision made at the current state. 199 | 200 | Returns: 201 | The new state after the step and a flag indicating if the episode is finished. 202 | """ 203 | # Generate new exogenous information W_t+1 204 | exog_info = self.exog_info_fn(decision) 205 | 206 | # Compute objective C_t based on W_t+1, x_t, S_t (state is not updated yet) 207 | self.objective += self.objective_fn(decision, exog_info) 208 | 209 | # Execute transition function and add new state to exog_info dict 210 | exog_info.update(self.transition_fn(decision, exog_info)) 211 | 212 | # Build new state from state variables and (optionally) exog_info variables. 213 | # This is convenient if some of the exogenous variables are also state variables. 214 | self.state = self.build_state(exog_info) 215 | 216 | # Update time counter 217 | self.update_t() 218 | 219 | # From the returned state S_t+1, the policy generates a new decision 220 | return self.state 221 | -------------------------------------------------------------------------------- /BaseClasses/SDPPolicy.py: -------------------------------------------------------------------------------- 1 | from copy import copy 2 | from abc import ABC, abstractmethod 3 | import pandas as pd 4 | from . import SDPModel 5 | 6 | 7 | class SDPPolicy(ABC): 8 | def __init__(self, model: SDPModel, policy_name: str = ""): 9 | self.model = model 10 | self.policy_name = policy_name 11 | self.results = pd.DataFrame() 12 | self.performance = pd.NA 13 | 14 | @abstractmethod 15 | def get_decision(self, state, t, T): 16 | """ 17 | Returns the decision made by the policy based on the given state. 18 | 19 | Args: 20 | state (namedtuple): The current state of the system. 21 | t (float): The current time step. 22 | T (float): The end of the time horizon / total number of time steps. 23 | 24 | Returns: 25 | dict: The decision made by the policy. 26 | """ 27 | pass 28 | 29 | def run_policy(self, n_iterations: int = 1): 30 | """ 31 | Runs the policy over the time horizon [0,T] for a specified number of iterations and return the mean performance. 32 | 33 | Args: 34 | n_iterations (int): The number of iterations to run the policy. Default is 1. 35 | 36 | Returns: 37 | None 38 | """ 39 | result_list = [] 40 | # Note: the random number generator is not reset when calling copy(). 41 | # When calling deepcopy(), it is reset (then all iterations are exactly the same). 42 | for i in range(n_iterations): 43 | model_copy = copy(self.model) 44 | model_copy.episode_counter = i 45 | model_copy.reset(reset_prng=False) 46 | state_t_plus_1 = None 47 | while model_copy.is_finished() is False: 48 | state_t = model_copy.state 49 | decision_t = model_copy.build_decision(self.get_decision(state_t, model_copy.t, model_copy.T)) 50 | 51 | # Logging 52 | results_dict = {"N": i, "t": model_copy.t, "C_t sum": model_copy.objective} 53 | results_dict.update(state_t._asdict()) 54 | results_dict.update(decision_t._asdict()) 55 | result_list.append(results_dict) 56 | 57 | state_t_plus_1 = model_copy.step(decision_t) 58 | 59 | results_dict = {"N": i, "t": model_copy.t, "C_t sum": model_copy.objective} 60 | if state_t_plus_1 is not None: 61 | results_dict.update(state_t_plus_1._asdict()) 62 | result_list.append(results_dict) 63 | 64 | # Logging 65 | self.results = pd.DataFrame.from_dict(result_list) 66 | # t_end per iteration 67 | self.results["t_end"] = self.results.groupby("N")["t"].transform("max") 68 | 69 | # performance of one iteration is the cumulative objective at t_end 70 | self.performance = self.results.loc[self.results["t"] == self.results["t_end"], ["N", "C_t sum"]] 71 | self.performance = self.performance.set_index("N") 72 | 73 | # For reporting, convert cumulative objective to contribution per time 74 | self.results["C_t"] = self.results.groupby("N")["C_t sum"].diff().shift(-1) 75 | 76 | if self.results["C_t sum"].isna().sum() > 0: 77 | print(f"Warning! For {self.results['C_t sum'].isna().sum()} iterations the performance was NaN.") 78 | 79 | return self.performance.mean().iloc[0] 80 | -------------------------------------------------------------------------------- /BaseClasses/Util.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from itertools import product 3 | from copy import deepcopy 4 | from . import SDPPolicy 5 | 6 | 7 | def grid_search(grid: dict, policy: SDPPolicy.SDPPolicy, n_iterations: int, ordered: bool = False): 8 | if len(grid) != 2 and ordered: 9 | ordered = False 10 | print("Warning: Grid search for ordered parameters only works if there are exactly two parameters.") 11 | best_performance = 0.0 12 | best_parameters = None 13 | rows = [] 14 | params = grid.keys() 15 | for v in product(*grid.values()): 16 | if ordered: 17 | if v[0] >= v[1]: 18 | continue 19 | 20 | # Do a deep copy so all parameter sets get the same random numbers 21 | policy_copy = deepcopy(policy) 22 | 23 | for param, value in zip(params, v): 24 | setattr(policy_copy, param, value) 25 | 26 | performance = policy_copy.run_policy(n_iterations=n_iterations) 27 | 28 | row = dict(zip(params, v)) 29 | row["performance"] = performance 30 | rows.append(row) 31 | if performance > best_performance: 32 | best_performance = performance 33 | best_parameters = dict(zip(params, v)) 34 | 35 | return { 36 | "best_parameters": best_parameters, 37 | "best_performance": best_performance, 38 | "all_runs": pd.DataFrame(rows), 39 | } 40 | -------------------------------------------------------------------------------- /BloodManagement/BloodManagementModel.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import (namedtuple, defaultdict) 3 | 4 | class Model(): 5 | """ 6 | Base class for model 7 | """ 8 | 9 | def __init__(self, state_names, x_names, init_state, Bld_Net,params): 10 | 11 | self.params = params 12 | self.init_state = init_state 13 | self.state_names = state_names 14 | self.x_names = x_names 15 | self.State = namedtuple('State', state_names) 16 | self.state = self.build_state(init_state) 17 | self.Decision = namedtuple('Decision', x_names) 18 | self.obj = 0.0 19 | self.Bld_Net = Bld_Net 20 | self.bld_inv = init_state['BloodInventory'] 21 | self.demand = init_state['Demand'] 22 | self.donation = init_state['Donation'] 23 | 24 | # include initial inventory into the network 25 | for i in range(self.params['NUM_BLD_NODES']): 26 | self.Bld_Net.bloodamount[i] = self.bld_inv[i] 27 | 28 | # include initial demand into the network 29 | for i in range(self.params['NUM_DEM_NODES']): 30 | self.Bld_Net.demandamount[i] = self.demand[i] 31 | 32 | 33 | def build_state(self, info): 34 | return self.State(*[info[k] for k in self.state_names]) 35 | 36 | def build_decision(self, info): 37 | return self.Decision(*[info[k] for k in self.x_names]) 38 | 39 | # exogenous information = demand from t-1 to t and new donated blood 40 | def exog_info_fn(self, exog_info): 41 | self.demand = exog_info.demand 42 | # update the demand nodes 43 | for i in range(self.params['NUM_DEM_NODES']): 44 | self.Bld_Net.demandamount[i] = exog_info.demand[i] 45 | # save the donation vector to the model 46 | self.donation = exog_info.donation 47 | return exog_info 48 | 49 | def transition_fn(self, decision): 50 | # iterate through hold vector 51 | hold = decision[0] 52 | for i in range(self.params['NUM_BLD_NODES']): 53 | self.Bld_Net.holdamount[i] = hold[i] 54 | 55 | 56 | rev_don = list(reversed(self.donation)) 57 | rev_hld = list(reversed(self.Bld_Net.holdamount)) 58 | # age the blood at hold node and add in the donations 59 | 60 | for i in range(self.params['NUM_BLD_NODES']): 61 | if (i % self.params['MAX_AGE'] == self.params['MAX_AGE']-1): 62 | # add donation 63 | rev_hld[i] = rev_don[i // self.params['MAX_AGE']] 64 | else: 65 | # age 66 | rev_hld[i] = rev_hld[i+1] 67 | 68 | rev_hld = list(reversed(rev_hld)) 69 | # amount at blood node = amount at hold node 70 | for i in range(self.params['NUM_BLD_NODES']): 71 | self.Bld_Net.bloodamount[i] = rev_hld[i] 72 | 73 | # updating obj value 74 | self.obj += decision[1] 75 | 76 | # update current state 77 | self.bld_inv = self.Bld_Net.bloodamount 78 | return self.state 79 | 80 | def objective_fn(self): 81 | return self.obj 82 | 83 | ######################################################################################################## 84 | 85 | class Exog_Info(): 86 | def __init__(self, demand, donation): 87 | # list consisting of blood demand objects 88 | self.demand = demand 89 | # list consisting of blood unit objects donated to the blood inventory 90 | self.donation = donation 91 | 92 | 93 | # function to generate random exogenous information dependent on blood type and time t 94 | def generate_exog_info_by_bloodtype(t, Bld_Net, params): 95 | # demand 96 | demand= [] 97 | if (t in params['TIME_PERIODS_SURGE'] and np.random.uniform(0, 1) < params['SURGE_PROB']): 98 | factor = params['SURGE_FACTOR'] 99 | else: 100 | factor = 0 101 | demand = [round(np.random.uniform(0, params['MAX_DEM_BY_BLOOD'][dmd[0]]*params['SURGERYTYPES_PROP'][dmd[1]]*params['SUBSTITUTION_PROP'][dmd[2]])) + factor*int(np.random.poisson(params['MAX_DEM_BY_BLOOD'][dmd[0]]*params['SURGERYTYPES_PROP'][dmd[1]]*params['SUBSTITUTION_PROP'][dmd[2]])) for dmd in Bld_Net.demandnodes] 102 | 103 | # donation 104 | donation = [round(np.random.uniform(0, params['MAX_DON_BY_BLOOD'][i])) for i in params['Bloodtypes']] 105 | return Exog_Info(demand, donation) 106 | 107 | 108 | # function to generate random exogenous information dependent on blood type and time t 109 | def generate_exog_info_by_bloodtype_p(t, Bld_Net, params): 110 | # demand 111 | if (t in params['TIME_PERIODS_SURGE'] and np.random.uniform(0, 1) < params['SURGE_PROB']): 112 | factor = params['SURGE_FACTOR'] 113 | else: 114 | factor = 1 115 | 116 | demand = [int(np.random.poisson(factor*params['MAX_DEM_BY_BLOOD'][dmd[0]]*params['SURGERYTYPES_PROP'][dmd[1]]*params['SUBSTITUTION_PROP'][dmd[2]])) for dmd in Bld_Net.demandnodes] 117 | 118 | if False: 119 | demand=[] 120 | for dmd in Bld_Net.demandnodes: 121 | if dmd[0]=="O-": 122 | if dmd[1]=="Urgent": 123 | demand.append(1) 124 | else: 125 | eleDem=max(0,int(np.random.poisson(factor*params['MAX_DEM_BY_BLOOD'][dmd[0]]-1))-1) 126 | demand.append(eleDem) 127 | 128 | else: 129 | demand.append(int(np.random.poisson(factor*params['MAX_DEM_BY_BLOOD'][dmd[0]]*params['SURGERYTYPES_PROP'][dmd[1]]*params['SUBSTITUTION_PROP'][dmd[2]]))) 130 | 131 | 132 | 133 | #donation 134 | donation = [int(np.random.poisson(params['MAX_DON_BY_BLOOD'][i])) for i in params['Bloodtypes']] 135 | 136 | return Exog_Info(demand, donation) 137 | 138 | 139 | 140 | ########################################################################################################## 141 | # function to calculate one step contribution 142 | def contribution(params,bloodnode, demandnode): 143 | 144 | # if substutition is not allowed 145 | if (demandnode[2] == False and bloodnode[0] != demandnode[0]) or (demandnode[2] == True and params['SubMatrix'][(bloodnode[0], demandnode[0])] == False): 146 | value=params['INFEASIABLE_SUBSTITUTION_PENALTY'] 147 | else: 148 | # start giving a bonus depending on the age of the blood 149 | #value = params['AGE_BONUS'][int(bloodnode[1])] 150 | value=0 151 | # no substitution 152 | if bloodnode[0] == demandnode[0]: 153 | value += params['NO_SUBSTITUTION_BONUS'] 154 | # filling urgent demand 155 | if demandnode[1] == 'Urgent': 156 | value += params['URGENT_DEMAND_BONUS'] 157 | # filling elective demand 158 | else: 159 | value += params['ELECTIVE_DEMAND_BONUS'] 160 | 161 | if demandnode[1] == 'Elective': 162 | value += params['BLOOD_FOR_ELECTIVE_PENALTY'] 163 | 164 | 165 | 166 | return(value) -------------------------------------------------------------------------------- /BloodManagement/BloodManagementNetwork.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import (namedtuple, defaultdict) 3 | 4 | from BloodManagementModel import contribution 5 | 6 | 7 | class Graph: 8 | def __init__(self): 9 | self.bloodnodes = list() 10 | self.bloodamount = [] 11 | 12 | self.demandnodes = list() 13 | self.demandamount = [] 14 | self.demcontrib = {} 15 | 16 | self.demedges = defaultdict(list) 17 | self.demweights = {} 18 | 19 | 20 | 21 | self.supersink = None 22 | 23 | self.holdnodes = list() 24 | self.holdamount = [] 25 | self.holdedges = defaultdict(list) 26 | self.holdweights = {} 27 | self.holdvbar = [] 28 | 29 | self.parallelarr = {} 30 | self.varr = {} 31 | 32 | self.sqGrad = {} #this will store the sum of the squared gradients when using AdaGrad stepsizes. 33 | 34 | 35 | # supersink_node 36 | def add_supersinknode(self, name): 37 | self.supersink = name 38 | self.amount[name] = 0 39 | 40 | # node of type (bloodtype, age) for current blood inventory 41 | def add_bloodnode(self, name): 42 | self.bloodnodes.append(name) 43 | self.bloodamount.append(0) 44 | 45 | # node - (bloodtype, age) 46 | def add_demandnode(self, name): 47 | self.demandnodes.append(name) 48 | self.demandamount.append(0) 49 | 50 | # node - (bloodtype, age) 51 | def add_holdnode(self, name): 52 | self.holdnodes.append(name) 53 | self.holdamount.append(0) 54 | 55 | # create an edge between two nodes 56 | def add_demedge(self, from_node, to_node, weight): 57 | self.demedges[from_node].append(to_node) 58 | self.demweights[(from_node, to_node)] = weight 59 | 60 | # create an edge between two nodes 61 | def add_holdedge(self, from_node, to_node, weight): 62 | self.holdedges[from_node].append(to_node) 63 | self.holdweights[(from_node, to_node)] = weight 64 | 65 | def add_parallel(self, t, from_node, to_node, parallelarray): 66 | self.parallelarr[(t, from_node, to_node)] = parallelarray 67 | 68 | def add_varr(self, t, from_node, to_node, varr): 69 | self.varr[(t, from_node, to_node)] = varr 70 | 71 | def add_demcontribArr(self, bldnode,demcontribArr): 72 | self.demcontrib[bldnode] = demcontribArr 73 | 74 | def add_sqGradArr(self, t, bldnode,sqGradArr): 75 | self.sqGrad[(t,bldnode)] = sqGradArr 76 | 77 | 78 | def create_bld_net(params): 79 | # create the network 80 | Bl_Net = Graph() 81 | Bl_Net.supersink = ('supersink', np.inf) 82 | # (BloodUnit, Age) pairs and respective hold nodes 83 | for i in params['Bloodtypes']: 84 | for j in params['Ages']: 85 | Bl_Net.add_bloodnode((i, str(j))) 86 | Bl_Net.add_holdnode((i, str(j))) 87 | 88 | # all possible demand nodes 89 | for i in params['Bloodtypes']: 90 | for j in params['Surgerytypes']: 91 | for k in params['Substitution']: 92 | Bl_Net.add_demandnode((i, j, k)) 93 | 94 | #add edges from (bloodunit, age) pairs to suitable demand nodes 95 | for bld in Bl_Net.bloodnodes: 96 | for dmd in Bl_Net.demandnodes: 97 | weight = contribution(params,bld, dmd) 98 | Bl_Net.add_demedge(bld, dmd, weight) 99 | 100 | for bld in Bl_Net.bloodnodes: 101 | demcontribArr = [contribution(params,bld, dmd) for dmd in Bl_Net.demandnodes] 102 | Bl_Net.add_demcontribArr(bld,demcontribArr) 103 | 104 | 105 | # add edges from blood nodes to hold nodes 106 | for bld in Bl_Net.bloodnodes: 107 | for hld in Bl_Net.holdnodes: 108 | if bld[0] == hld[0] and bld[1] == hld[1]: 109 | Bl_Net.add_holdedge(bld, hld, 0) 110 | 111 | # add parallel edges from hold nodes to supersink 112 | for t in params['Times']: 113 | for hld in Bl_Net.holdnodes: 114 | parArr = np.zeros(params['NUM_PARALLEL_LINKS']) 115 | vArr = np.zeros(params['NUM_PARALLEL_LINKS']) 116 | Bl_Net.add_parallel(t, hld, Bl_Net.supersink, parArr) 117 | Bl_Net.add_varr(t, hld, Bl_Net.supersink, vArr) 118 | 119 | sqGradArr = np.zeros(params['NUM_PARALLEL_LINKS']) 120 | Bl_Net.add_sqGradArr(t, hld, sqGradArr) 121 | 122 | 123 | 124 | 125 | return(Bl_Net) 126 | 127 | -------------------------------------------------------------------------------- /BloodManagement/BloodManagementPolicy.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import cvxopt 4 | from collections import (namedtuple, defaultdict) 5 | 6 | 7 | def initLPMatrices(params,Bld_Net): 8 | #Initializing the matrix for the LP 9 | A = np.zeros((params['NUM_BLD_NODES'], params['NUM_BLD_NODES']*(params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS']))) 10 | for i in range(params['NUM_BLD_NODES']): 11 | for j in range(params['NUM_BLD_NODES']*(params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS'])): 12 | if (j < (i+1)*(params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS'])) and (j >= i*(params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS'])): 13 | #Checking for feasibility 14 | k=j-i*(params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS']) 15 | if (k= 0 36 | for i in range(params['NUM_BLD_NODES']): 37 | for j in range(params['NUM_DEM_NODES']): 38 | G[params['NUM_DEM_NODES'] + params['NUM_BLD_NODES']*params['NUM_PARALLEL_LINKS'] + i*params['NUM_DEM_NODES'] + j, (params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS'])*i + j] = -1 39 | 40 | 41 | # ineq constr for x_parallel >= 0 42 | for i in range(params['NUM_BLD_NODES']): 43 | for j in range(params['NUM_PARALLEL_LINKS']): 44 | G[params['NUM_DEM_NODES'] + params['NUM_BLD_NODES']*params['NUM_PARALLEL_LINKS'] + params['NUM_DEM_NODES']*params['NUM_BLD_NODES'] + i*params['NUM_PARALLEL_LINKS'] + j,(params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS'])*i + params['NUM_DEM_NODES'] + j] = -1 45 | 46 | 47 | h = np.ones(params['NUM_DEM_NODES'] + params['NUM_BLD_NODES']*params['NUM_PARALLEL_LINKS']) 48 | h[params['NUM_DEM_NODES']::params['NUM_PARALLEL_LINKS']]= params['SLOPE_CAPAC_LAST'] 49 | h = np.append(h, np.zeros(params['NUM_BLD_NODES']*params['NUM_DEM_NODES'] + params['NUM_BLD_NODES']*params['NUM_PARALLEL_LINKS'])) 50 | 51 | A = cvxopt.matrix(A) 52 | G = cvxopt.matrix(G) 53 | 54 | coeff = [np.concatenate((np.array(Bld_Net.demcontrib[bld]),np.zeros(params['NUM_PARALLEL_LINKS']))) if int(bld[1])< params['MAX_AGE']-1 else np.concatenate((np.array(Bld_Net.demcontrib[bld]),np.add(np.zeros(params['NUM_PARALLEL_LINKS']),params['DISCARD_BLOOD_PENALTY']))) for bld in Bld_Net.bloodnodes] 55 | coeff = [ai for a in coeff for ai in a ] 56 | coeff = np.array(coeff) 57 | 58 | return (A,G,h,coeff) 59 | 60 | class Policy(): 61 | """ 62 | Base class for Static Stochastic Shortest Path Model policy 63 | """ 64 | 65 | def __init__(self,params,Bld_Net): 66 | """ 67 | Initializes the policy 68 | """ 69 | 70 | self.A,self.G,self.h,self.coeff = initLPMatrices(params,Bld_Net) 71 | 72 | 73 | def getLPSol(self,params,M,iteration,t,solDemList,solHoldList,IS_TRAINING): 74 | 75 | c_t = [np.concatenate((np.multiply(np.array(M.Bld_Net.demcontrib[bld]),-1),np.multiply(M.Bld_Net.parallelarr[(t, bld, M.Bld_Net.supersink)],-params['DISCOUNT_FACTOR']))) if int(bld[1])< params['MAX_AGE']-1 else np.concatenate((np.multiply(np.array(M.Bld_Net.demcontrib[bld]),-1),np.add(np.multiply(M.Bld_Net.parallelarr[(t, bld, M.Bld_Net.supersink)],-params['DISCOUNT_FACTOR']),-params['DISCARD_BLOOD_PENALTY']))) for bld in M.Bld_Net.bloodnodes] 76 | c = [ai for a in c_t for ai in a ] 77 | b = np.array(M.Bld_Net.bloodamount) 78 | self.h[:params['NUM_DEM_NODES']] = M.Bld_Net.demandamount 79 | 80 | 81 | c = cvxopt.matrix(c) 82 | b = cvxopt.matrix(b,size=(params['NUM_BLD_NODES'],1),tc='d') 83 | h = cvxopt.matrix(self.h) 84 | 85 | cvxopt.solvers.options['show_progress'] = False 86 | sol = cvxopt.solvers.lp(c, self.G, h, self.A, b,solver='glpk',options={'glpk':{'msg_lev':'GLP_MSG_OFF'}}) 87 | #sol = cvxopt.solvers.lp(c, self.G, h, self.A, b) 88 | 89 | x = sol['x'] 90 | 91 | x = np.array(x) 92 | x = np.squeeze(x) 93 | 94 | val = np.dot(x, self.coeff) 95 | 96 | 97 | 98 | xDem = [x[i*(params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS']):i*(params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS'])+params['NUM_DEM_NODES']] for i in list(range(params['NUM_BLD_NODES']))] 99 | xDemFlat = [xij for xi in xDem for xij in xi] 100 | solDemRec=(iteration,t,xDem.copy()) 101 | solDemList.append(solDemRec) 102 | 103 | hld=[np.sum(x[i*(params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS'])+params['NUM_DEM_NODES']:(i+1)*(params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS'])]) for i in list(range(params['NUM_BLD_NODES']))] 104 | solHoldRecord = (iteration,t,hld.copy()) 105 | solHoldList.append(solHoldRecord) 106 | hld = np.array(hld) 107 | 108 | invByBlood = [np.sum(M.bld_inv[i*params['MAX_AGE']:(i+1)*params['MAX_AGE']]) for i in list(range(len(params['Bloodtypes']))) ] 109 | demByBlood = [np.sum(M.Bld_Net.demandamount[i*(len(params['Surgerytypes'])*len(params['Substitution'])):(i+1)*(len(params['Surgerytypes'])*len(params['Substitution']))]) for i in list(range(len(params['Bloodtypes']))) ] 110 | 111 | xDemFlat = [xij for xi in xDem for xij in xi] 112 | xDemMat = np.array(xDemFlat).reshape(params['NUM_BLD_NODES'],params['NUM_DEM_NODES']) 113 | xDemMatColSum = xDemMat.sum(axis=0) 114 | covByBlood = [ np.sum(xDemMatColSum[i*(len(params['Surgerytypes'])*len(params['Substitution'])):(i+1)*(len(params['Surgerytypes'])*len(params['Substitution']))]) for i in list(range(len(params['Bloodtypes']))) ] 115 | covByBlood = np.array(covByBlood).astype(int) 116 | 117 | hldByBlood = [int(np.sum(hld[i*params['MAX_AGE']:(i+1)*params['MAX_AGE']])) for i in list(range(len(params['Bloodtypes']))) ] 118 | disByBlood = hld[params['MAX_AGE']-1::params['MAX_AGE']] 119 | disByBlood = np.array(disByBlood) 120 | disByBlood = disByBlood.astype(int) 121 | 122 | if False: 123 | print('Iteration = ', iteration) 124 | print('Time period = ', t) 125 | print('Demand = ', np.sum(M.Bld_Net.demandamount)) 126 | print('Supply = ', np.sum(M.Bld_Net.bloodamount)) 127 | print('Blood Used = ', np.sum(M.bld_inv) - np.sum(hld)) 128 | print('Blood Held = ', np.sum(hld)) 129 | print('Inventory by BloodType ',invByBlood) 130 | print('Demand By BloodType ',demByBlood) 131 | print('Used By BloodType ', list(covByBlood)) 132 | print('Hold By BloodType ',hldByBlood) 133 | print('Discard By BloodType ', list(disByBlood)) 134 | print('Contribution = ', val) 135 | print('Donation = ', np.sum(M.donation)) 136 | print('\n') 137 | 138 | hld = hld.astype(int) 139 | 140 | if IS_TRAINING and params['IS_PERTUB']: 141 | epsilon = PERTUB_GEN.poisson(LAMBDA_PERTUB, params['NUM_BLD_NODES']) 142 | signE = PERTUB_GEN.choice([-1,1], size=params['NUM_BLD_NODES'], replace=True, p=None) 143 | hld = hld+epsilon*signE 144 | hld = np.maximum(np.zeros(params['NUM_BLD_NODES']),hld) 145 | hld = hld.astype(int) 146 | 147 | # dual variables 148 | d = sol['y'] 149 | 150 | return sol,val,x,hld,d,solDemList,solHoldList 151 | 152 | def updateVFAs(self,params,M,iteration,t,d, slopesList,updateVfaList): 153 | alpha = 0 154 | 155 | # set the dual variables to respective parallel arcs 156 | for i in range(params['NUM_BLD_NODES']): 157 | # put the value of the dual varible d[i+1] in the parallel arc, associated 158 | # with the amount of resource in the inventory associated with holdnode[i] 159 | # the holdnodes with the oldest age do not get updated 160 | 161 | recordSlopes = (iteration,t,M.Bld_Net.parallelarr[(t, M.Bld_Net.holdnodes[i], M.Bld_Net.supersink)].copy()) 162 | slopesList.append(recordSlopes) 163 | 164 | index = M.bld_inv[i] 165 | if index>=0: 166 | if (t>0 and M.Bld_Net.holdnodes[i][1]= params['NUM_PARALLEL_LINKS'] - 1: 170 | index = params['NUM_PARALLEL_LINKS'] - 1 171 | 172 | arr = M.Bld_Net.varr[(t-1,M.Bld_Net.holdnodes[i], M.Bld_Net.supersink)] 173 | sqGradArr = M.Bld_Net.sqGrad[(t-1,M.Bld_Net.holdnodes[i])] 174 | 175 | 176 | if iteration < params['NUM_ITER_STEP_ONE']: 177 | alpha = 1 178 | else: 179 | if (params['STEPSIZE_RULE'] == 'C'): 180 | alpha = params['ALPHA'] 181 | elif (params['STEPSIZE_RULE'] == 'A'): 182 | sqGradArr[index] += np.power(vhat-arr[index],2) 183 | alpha = params['ETA']/(np.sqrt(sqGradArr[index]+params['STEP_EPS'])) 184 | 185 | vbar = arr[index] 186 | vnew = alpha*vhat +(1-alpha)*vbar 187 | arr[index] = vnew 188 | 189 | recordUpdateVfa = (iteration,t-1,M.Bld_Net.holdnodes[i][0],M.Bld_Net.holdnodes[i][1],index,vhat,vbar,sqGradArr[index],alpha,vnew) 190 | updateVfaList.append(recordUpdateVfa) 191 | 192 | #Projecting back in case the vfa is not concave anymore 193 | if (vnew>vbar): #Look to the left 194 | indSetL=[i for i in list(range(0,index+1)) if arr[i]<=vnew] 195 | if (len(indSetL)>0): 196 | if params['PROJECTION_ALGO'] == 'Avg': 197 | avg = np.mean(arr[indSetL]) 198 | arr[indSetL]=avg 199 | elif params['PROJECTION_ALGO'] == 'Copy': 200 | arr[indSetL]=vnew 201 | else: 202 | if index > 0: 203 | j=index-1 204 | while (j>=0 and arr[j] < arr[j+1]): 205 | arr[j]= alpha*vhat +(1-alpha)*arr[j] 206 | j-=1 207 | else: 208 | arr[index]=vnew 209 | 210 | 211 | 212 | elif (vnew=vnew] 214 | if (len(indSetR)>0): 215 | if params['PROJECTION_ALGO'] == 'Avg': 216 | avg = np.mean(arr[indSetR]) 217 | arr[indSetR]=avg 218 | elif params['PROJECTION_ALGO'] == 'Copy': 219 | arr[indSetR]=vnew 220 | else: 221 | if index < params['NUM_PARALLEL_LINKS']-1: 222 | j=index+1 223 | while (j arr[j-1]): 224 | arr[j] = alpha*vhat +(1-alpha)*arr[j] 225 | j+=1 226 | else: 227 | arr[index]=vnew 228 | 229 | return alpha,slopesList,updateVfaList 230 | 231 | -------------------------------------------------------------------------------- /BloodManagement/OutputAll.txt: -------------------------------------------------------------------------------- 1 | Instance AvgContrib UrgentCoverage ElectiveCoverage AvgCoverage DiscardProportion Utility 2 | 3 | PolicyMYOPIC_SURGE_0.5_PEN_-4.0_ALPHA_0.00 21447.15 0.91 0.69 0.81 0.00 978.12 4 | PolicyMYOPIC_SURGE_0.5_PEN_-9.0_ALPHA_0.00 20447.25 0.92 0.64 0.79 0.00 981.20 5 | PolicyMYOPIC_SURGE_0.5_PEN_-14.0_ALPHA_0.00 23924.20 0.97 0.25 0.61 0.03 981.18 6 | PolicyMYOPIC_SURGE_0.5_PEN_-14.0_ALPHA_0.00 23924.20 0.97 0.25 0.61 0.03 981.18 7 | PolicyMYOPIC_SURGE_0.5_PEN_-14.0_ALPHA_0.00 23924.20 0.97 0.25 0.61 0.03 981.18 8 | PolicyMYOPIC_SURGE_0.5_PEN_-14.0_ALPHA_0.00 23924.20 0.97 0.25 0.61 0.03 981.18 9 | PolicyMYOPIC_SURGE_0.5_PEN_-14.0_ALPHA_0.00 23924.20 0.97 0.25 0.61 0.03 985.89 10 | PolicyMYOPIC_SURGE_0.5_PEN_-14.0_ALPHA_0.00 23924.20 0.97 0.25 0.61 0.03 986.00 11 | PolicyMYOPIC_SURGE_0.5_PEN_-9.0_ALPHA_0.00 20447.25 0.92 0.64 0.79 0.00 984.00 12 | -------------------------------------------------------------------------------- /BloodManagement/Parameters.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/djanka2/stochastic-optimization/8b654e10480705901a6e48e869e2e0a779e97933/BloodManagement/Parameters.xlsx -------------------------------------------------------------------------------- /ClinicalTrials/ClinicalTrialsDriverScript.py: -------------------------------------------------------------------------------- 1 | """ 2 | Clinical Trials Driver Script class 3 | 4 | Raluca Cobzaru (c) 2018 5 | 6 | """ 7 | 8 | from collections import namedtuple 9 | import numpy as np 10 | import scipy 11 | import pandas as pd 12 | from ClinicalTrialsModel import ClinicalTrialsModel 13 | from ClinicalTrialsPolicy import ClinicalTrialsPolicy 14 | import matplotlib.pyplot as plt 15 | import time 16 | 17 | if __name__ == "__main__": 18 | time_total = time.time() 19 | np.random.seed(2345678173) 20 | # initializes a policy object and a model object, then runs the policy on the model 21 | policy_names = ['model_A', 'model_B', 'model_C', 'model_C_extension'] 22 | state_names = ['potential_pop', 'success', 'failure', 'l_response'] 23 | # extracts data from given data set; defines initial state 24 | file = 'Parameters.xlsx' 25 | raw_data = pd.ExcelFile(file) 26 | data = raw_data.parse('Parameters') 27 | initial_state = {'potential_pop': float(data.iat[0, 0]), 28 | 'success': data.iat[1, 0], 29 | 'failure': float(data.iat[2, 0]), 30 | 'l_response': float(data.iat[3, 0]), 31 | 'theta_stop_low': data.iat[4, 0], 32 | 'theta_stop_high': data.iat[5, 0], 33 | 'alpha': data.iat[6, 0], 34 | 'K': int(data.iat[7, 0]), 35 | 'N': int(data.iat[8, 0]), 36 | 'trial_size': int(data.iat[9, 0]), 37 | 'patient_cost': data.iat[10, 0], 38 | 'program_cost': data.iat[11, 0], 39 | 'success_rev': data.iat[12, 0], 40 | 'sampling_size': int(data.iat[13, 0]), 41 | 'enroll_min': int(data.iat[14, 0]), 42 | 'enroll_max': int(data.iat[15, 0]), 43 | 'enroll_step': int(data.iat[16, 0]), 44 | 'H': int(data.iat[17, 0]), 45 | 'true_l_response': data.iat[18, 0], 46 | 'true_succ_rate': data.iat[19, 0]} 47 | model_name = data.iat[20, 0] 48 | numIterations = int(data.iat[21,0]) 49 | 50 | decision_names = ['enroll', 'prog_continue', 'drug_success'] 51 | 52 | ######################################################################### 53 | #HINT!!!!! Insert the loop here for questions 4 and 5 54 | M = ClinicalTrialsModel(state_names, decision_names, initial_state, False) 55 | P = ClinicalTrialsPolicy(M, policy_names) 56 | t = 0 57 | stop = False 58 | policy_info = {'model_A': [-1, stop], 59 | 'model_B': [-1, stop], 60 | 'model_C': [-1, stop], 61 | 'model_C_extension': [-1, stop]} 62 | 63 | policy_value=P.run_policy(policy_info, model_name, t) 64 | #End HINT! 65 | ####################################################################### 66 | 67 | 68 | 69 | 70 | print("Total elapsed time {:.2f} secs".format(time.time()-time_total)) 71 | 72 | pass -------------------------------------------------------------------------------- /ClinicalTrials/ClinicalTrialsDriverScriptSolutionQ4.py: -------------------------------------------------------------------------------- 1 | """ 2 | Clinical Trials Driver Script class 3 | 4 | Raluca Cobzaru (c) 2018 5 | 6 | """ 7 | 8 | from collections import namedtuple 9 | import numpy as np 10 | import scipy 11 | import pandas as pd 12 | from ClinicalTrialsModel import ClinicalTrialsModel 13 | from ClinicalTrialsPolicy import ClinicalTrialsPolicy 14 | import matplotlib.pyplot as plt 15 | import time 16 | 17 | if __name__ == "__main__": 18 | time_total = time.time() 19 | np.random.seed(2345678173) 20 | # initializes a policy object and a model object, then runs the policy on the model 21 | policy_names = ['model_A', 'model_B', 'model_C', 'model_C_extension'] 22 | state_names = ['potential_pop', 'success', 'failure', 'l_response'] 23 | # extracts data from given data set; defines initial state 24 | file = 'Parameters.xlsx' 25 | raw_data = pd.ExcelFile(file) 26 | data = raw_data.parse('Parameters') 27 | initial_state = {'potential_pop': float(data.iat[0, 0]), 28 | 'success': data.iat[1, 0], 29 | 'failure': float(data.iat[2, 0]), 30 | 'l_response': float(data.iat[3, 0]), 31 | 'theta_stop_low': data.iat[4, 0], 32 | 'theta_stop_high': data.iat[5, 0], 33 | 'alpha': data.iat[6, 0], 34 | 'K': int(data.iat[7, 0]), 35 | 'N': int(data.iat[8, 0]), 36 | 'trial_size': int(data.iat[9, 0]), 37 | 'patient_cost': data.iat[10, 0], 38 | 'program_cost': data.iat[11, 0], 39 | 'success_rev': data.iat[12, 0], 40 | 'sampling_size': int(data.iat[13, 0]), 41 | 'enroll_min': int(data.iat[14, 0]), 42 | 'enroll_max': int(data.iat[15, 0]), 43 | 'enroll_step': int(data.iat[16, 0]), 44 | 'H': int(data.iat[17, 0]), 45 | 'true_l_response': data.iat[18, 0], 46 | 'true_succ_rate': data.iat[19, 0]} 47 | model_name = data.iat[20, 0] 48 | numIterations = int(data.iat[21,0]) 49 | 50 | decision_names = ['enroll', 'prog_continue', 'drug_success'] 51 | 52 | ############################################################ 53 | #Solution Q4 54 | avg_policy_value = 0 55 | for i in range(0,numIterations): 56 | 57 | M = ClinicalTrialsModel(state_names, decision_names, initial_state, False) 58 | P = ClinicalTrialsPolicy(M, policy_names) 59 | t = 0 60 | stop = False 61 | policy_info = {'model_A': [-1, stop], 62 | 'model_B': [-1, stop], 63 | 'model_C': [-1, stop], 64 | 'model_C_extension': [-1, stop]} 65 | policy_value = P.run_policy(policy_info, model_name, t) 66 | avg_policy_value += policy_value 67 | print("Finished run policy for iteration {} - Value: {} and Avg_value: {:,}".format(i,policy_value,avg_policy_value/(i+1))) 68 | avg_policy_value = avg_policy_value/numIterations 69 | print("Average values after {} iterations is {:,}".format(numIterations,avg_policy_value)) 70 | #End Solution Q4 71 | ############################################################ 72 | 73 | 74 | 75 | 76 | 77 | 78 | print("Total elapsed time {:.2f} secs".format(time.time()-time_total)) 79 | 80 | pass -------------------------------------------------------------------------------- /ClinicalTrials/ClinicalTrialsDriverScriptSolutionQ5.py: -------------------------------------------------------------------------------- 1 | """ 2 | Clinical Trials Driver Script class 3 | 4 | Raluca Cobzaru (c) 2018 5 | 6 | """ 7 | 8 | from collections import namedtuple 9 | import numpy as np 10 | import scipy 11 | import pandas as pd 12 | from ClinicalTrialsModel import ClinicalTrialsModel 13 | from ClinicalTrialsPolicy import ClinicalTrialsPolicy 14 | import matplotlib.pyplot as plt 15 | import time 16 | 17 | if __name__ == "__main__": 18 | time_total = time.time() 19 | np.random.seed(2345678173) 20 | # initializes a policy object and a model object, then runs the policy on the model 21 | policy_names = ['model_A', 'model_B', 'model_C', 'model_C_extension'] 22 | state_names = ['potential_pop', 'success', 'failure', 'l_response'] 23 | # extracts data from given data set; defines initial state 24 | file = 'Parameters.xlsx' 25 | raw_data = pd.ExcelFile(file) 26 | data = raw_data.parse('Parameters') 27 | initial_state = {'potential_pop': float(data.iat[0, 0]), 28 | 'success': data.iat[1, 0], 29 | 'failure': float(data.iat[2, 0]), 30 | 'l_response': float(data.iat[3, 0]), 31 | 'theta_stop_low': data.iat[4, 0], 32 | 'theta_stop_high': data.iat[5, 0], 33 | 'alpha': data.iat[6, 0], 34 | 'K': int(data.iat[7, 0]), 35 | 'N': int(data.iat[8, 0]), 36 | 'trial_size': int(data.iat[9, 0]), 37 | 'patient_cost': data.iat[10, 0], 38 | 'program_cost': data.iat[11, 0], 39 | 'success_rev': data.iat[12, 0], 40 | 'sampling_size': int(data.iat[13, 0]), 41 | 'enroll_min': int(data.iat[14, 0]), 42 | 'enroll_max': int(data.iat[15, 0]), 43 | 'enroll_step': int(data.iat[16, 0]), 44 | 'H': int(data.iat[17, 0]), 45 | 'true_l_response': data.iat[18, 0], 46 | 'true_succ_rate': data.iat[19, 0]} 47 | model_name = data.iat[20, 0] 48 | numIterations = int(data.iat[21,0]) 49 | 50 | decision_names = ['enroll', 'prog_continue', 'drug_success'] 51 | 52 | ################################################################ 53 | #Solution Q5 54 | theta_list = list(np.arange(.77,.79,0.005)) 55 | theta_avg=[] 56 | for theta in theta_list: 57 | initial_state.update({'theta_stop_low':theta}) 58 | avg_policy_value = 0 59 | for i in range(0,numIterations): 60 | 61 | M = ClinicalTrialsModel(state_names, decision_names, initial_state, False) 62 | P = ClinicalTrialsPolicy(M, policy_names) 63 | t = 0 64 | stop = False 65 | policy_info = {'model_A': [-1, stop], 66 | 'model_B': [-1, stop], 67 | 'model_C': [-1, stop], 68 | 'model_C_extension': [-1, stop]} 69 | policy_value = P.run_policy(policy_info, model_name, t) 70 | avg_policy_value += policy_value 71 | print("Finished run policy for iteration {} - Value: {} and Avg_value: {:,}".format(i,policy_value,avg_policy_value/(i+1))) 72 | 73 | avg_policy_value = avg_policy_value/numIterations 74 | print("Theta {} - Average values after {} iterations is {:,}".format(initial_state['theta_stop_low'],numIterations,avg_policy_value)) 75 | theta_avg.append(avg_policy_value) 76 | 77 | print(theta_list) 78 | print(theta_avg) 79 | plt.plot(theta_list,theta_avg,'bo') 80 | plt.show() 81 | #End Solution Q5 82 | ############################################################### 83 | 84 | 85 | 86 | 87 | print("Total elapsed time {:.2f} secs".format(time.time()-time_total)) 88 | 89 | pass -------------------------------------------------------------------------------- /ClinicalTrials/ClinicalTrialsDriverScriptSolutionQ6.py: -------------------------------------------------------------------------------- 1 | """ 2 | Clinical Trials Driver Script class 3 | 4 | Raluca Cobzaru (c) 2018 5 | 6 | """ 7 | 8 | from collections import namedtuple 9 | import numpy as np 10 | import scipy 11 | import pandas as pd 12 | from ClinicalTrialsModel import ClinicalTrialsModel 13 | from ClinicalTrialsPolicySolutionQ6 import ClinicalTrialsPolicy 14 | import matplotlib.pyplot as plt 15 | import time 16 | 17 | if __name__ == "__main__": 18 | time_total = time.time() 19 | np.random.seed(2345678173) 20 | # initializes a policy object and a model object, then runs the policy on the model 21 | policy_names = ['model_A', 'model_B', 'model_C', 'model_C_extension'] 22 | state_names = ['potential_pop', 'success', 'failure', 'l_response'] 23 | # extracts data from given data set; defines initial state 24 | file = 'Parameters.xlsx' 25 | raw_data = pd.ExcelFile(file) 26 | data = raw_data.parse('Parameters') 27 | initial_state = {'potential_pop': float(data.iat[0, 0]), 28 | 'success': data.iat[1, 0], 29 | 'failure': float(data.iat[2, 0]), 30 | 'l_response': float(data.iat[3, 0]), 31 | 'theta_stop_low': data.iat[4, 0], 32 | 'theta_stop_high': data.iat[5, 0], 33 | 'alpha': data.iat[6, 0], 34 | 'K': int(data.iat[7, 0]), 35 | 'N': int(data.iat[8, 0]), 36 | 'trial_size': int(data.iat[9, 0]), 37 | 'patient_cost': data.iat[10, 0], 38 | 'program_cost': data.iat[11, 0], 39 | 'success_rev': data.iat[12, 0], 40 | 'sampling_size': int(data.iat[13, 0]), 41 | 'enroll_min': int(data.iat[14, 0]), 42 | 'enroll_max': int(data.iat[15, 0]), 43 | 'enroll_step': int(data.iat[16, 0]), 44 | 'H': int(data.iat[17, 0]), 45 | 'true_l_response': data.iat[18, 0], 46 | 'true_succ_rate': data.iat[19, 0]} 47 | model_name = data.iat[20, 0] 48 | numIterations = int(data.iat[21,0]) 49 | 50 | decision_names = ['enroll', 'prog_continue', 'drug_success'] 51 | 52 | 53 | M = ClinicalTrialsModel(state_names, decision_names, initial_state, False) 54 | P = ClinicalTrialsPolicy(M, policy_names) 55 | t = 0 56 | stop = False 57 | policy_info = {'model_A': [-1, stop], 58 | 'model_B': [-1, stop], 59 | 'model_C': [-1, stop], 60 | 'model_C_extension': [-1, stop]} 61 | 62 | policy_value=P.run_policy(policy_info, model_name, t) 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | print("Total elapsed time {:.2f} secs".format(time.time()-time_total)) 72 | 73 | pass -------------------------------------------------------------------------------- /ClinicalTrials/ClinicalTrialsModel.py: -------------------------------------------------------------------------------- 1 | """ 2 | Clinical Trials Model class 3 | 4 | Raluca Cobzaru (c) 2018 5 | Adapted from code by Donghun Lee (c) 2018 6 | 7 | """ 8 | from collections import namedtuple 9 | import numpy as np 10 | from scipy.stats import binom 11 | import math 12 | import pandas as pd 13 | 14 | def trunc_poisson_fn(count, mean): 15 | """ 16 | returns list of truncated Poisson distribution with given mean and values count 17 | 18 | :param count: int - maximal value considered by the distribution 19 | :param mean: float - mean of Poisson distribution 20 | :return list(float) - vector of truncated Poisson pmfs 21 | """ 22 | trunc_probs = [] 23 | sum = 0.0 24 | for r in range(0, count): 25 | trunc_probs.insert(r, 1/math.factorial(r)*(mean**r)*np.exp(-mean)) 26 | sum += trunc_probs[r] 27 | trunc_probs.insert(count, 1-sum) 28 | return trunc_probs 29 | 30 | def mc_success_fn(count, mean, samples, N, K): 31 | """ 32 | simulates enrollment and success process using Monte Carlo sampling 33 | 34 | :param count: int - count of new potential patients 35 | :param mean: float - mean of the truncated Poisson distribution 36 | :param samples: list(float) - samples for the true success rate (assuming sampled distribution) 37 | :param N: int - number of Monte Carlo samples 38 | :param K: int - number of samples for the true success rate 39 | :return: dict - number of enrollments and successes 40 | """ 41 | enrollment_samples = [] 42 | success_samples = [] 43 | trunc_probs = trunc_poisson_fn(count, mean) 44 | # simulates enrollment process using truncated Poisson probabilities 45 | for n in range(N): 46 | success_samples.append(0) 47 | MC_r_sample = np.random.choice(range(count+1), size=None, replace=True, p=trunc_probs) 48 | enrollment_samples.append(MC_r_sample) 49 | MC_prob_sample = samples[np.random.randint(0, K)] 50 | # simulates success count using sample probability 51 | for k in range(0, MC_r_sample): 52 | bernoulli_sim = np.random.uniform(0, 1) 53 | if bernoulli_sim < MC_prob_sample: 54 | success_samples[n] += 1 55 | # uniformly chooses enrollments and successes from MC samples 56 | enrolled = np.random.choice(enrollment_samples) 57 | return {"mc_enroll": enrolled, 58 | "mc_success": success_samples[enrollment_samples.index(enrolled)]} 59 | 60 | class ClinicalTrialsModel(): 61 | """ 62 | Base class for model 63 | """ 64 | 65 | def __init__(self, state_variables, decision_variables, s_0, simulation, exog_info_fn=None, transition_fn=None, 66 | objective_fn=None, seed=20180529): 67 | """ 68 | Initializes the model 69 | 70 | :param state_variables: list(str) - state variable dimension names 71 | :param decision_variables: list(str) - decision variable dimension names 72 | :param s_0: dict - needs to contain at least the information to populate initial state using state_names 73 | :param simulation: bool - if True, simulates exogenous data; if False, uses data from given dataset 74 | :param exog_info_fn: function - calculates relevant exogenous information 75 | :param transition_fn: function - takes in decision variables and exogenous information to describe how the state 76 | evolves 77 | :param objective_fn: function - calculates contribution at time t 78 | :param seed: int - seed for random number generator 79 | """ 80 | 81 | self.init_args = {seed: seed} 82 | self.prng = np.random.RandomState(seed) 83 | self.initial_state = s_0 84 | self.state_variables = state_variables 85 | self.State = namedtuple('State', state_variables) 86 | self.state = self.build_state(s_0) 87 | self.simulation = simulation 88 | self.decision_variables = decision_variables 89 | self.Decision = namedtuple('Decision', decision_variables) 90 | self.objective = 0.0 91 | 92 | def build_state(self, info): 93 | """ 94 | returns a state containing all the given state information 95 | 96 | :param info: dict - all state information 97 | :return: namedtuple - a state object 98 | """ 99 | return self.State(*[info[k] for k in self.state_variables]) 100 | 101 | def build_decision(self, info): 102 | """ 103 | returns a decision containing all the given deicison information 104 | 105 | :param info: dict - all decision info 106 | :return: namedtuple - a decision object 107 | """ 108 | return self.Decision(*[info[k] for k in self.decision_variables]) 109 | 110 | 111 | 112 | def exog_info_fn(self, decision): 113 | """ 114 | returns the exogenous information dependent on a random process 115 | :param decision: int - number of new potential patients 116 | :return: dict - new enrollments and the number of successes among them 117 | """ 118 | if self.simulation == False: 119 | exog_patients = math.floor(np.random.poisson(lam=self.initial_state['true_l_response'] * (self.state.potential_pop + decision.enroll), size=None)) 120 | exog_succ = math.floor(np.random.binomial(exog_patients, self.initial_state['true_succ_rate'], size=None)) 121 | 122 | 123 | #exog_patients = math.floor(self.initial_state['true_l_response'] * (self.state.potential_pop + decision.enroll)) 124 | #exog_succ = math.floor(self.initial_state['true_succ_rate'] * exog_patients) 125 | 126 | return {"new_patients": exog_patients, 127 | "succ_count": exog_succ} 128 | else: 129 | r_bar = math.floor(self.state.l_response * (self.state.potential_pop + decision.enroll)) 130 | # implements new patients and success process using Monte Carlo sampling 131 | p_true_samples = np.random.beta(self.state.success, self.state.failure, self.initial_state['K']) 132 | MC_samples = mc_success_fn(decision.enroll, r_bar, p_true_samples, self.initial_state['N'], self.initial_state['K']) 133 | return {"new_patients": MC_samples['mc_enroll'], 134 | "succ_count": MC_samples['mc_success']} 135 | 136 | 137 | def transition_fn(self, decision, exog_info): 138 | """ 139 | updates the state given the decision and exogenous information 140 | :param decision: namedtuple - contains all decision info 141 | :param exog_info: contains all exogenous information 142 | :return: dict - updated state 143 | """ 144 | enroll_pop = decision.prog_continue * (self.state.potential_pop + decision.enroll) 145 | new_lambda = (1-self.initial_state['alpha']) * self.state.l_response + self.initial_state['alpha'] * exog_info['new_patients']/(self.state.potential_pop + decision.enroll) 146 | new_succ = self.state.success + exog_info['succ_count'] 147 | new_fail = self.state.failure + (exog_info['new_patients'] - exog_info['succ_count']) 148 | return {"potential_pop": enroll_pop, 149 | "success": new_succ, 150 | "failure": new_fail, 151 | "l_response": new_lambda} 152 | 153 | def objective_fn(self, decision): 154 | """ 155 | computes contribution of enrollments 156 | :param decision: namedtuple - contains all decision info 157 | :param exog_info: contains all exogenous info 158 | :return: float - calculated contribution 159 | """ 160 | obj_part = (1-decision.prog_continue) * decision.drug_success * self.initial_state['success_rev'] - decision.prog_continue * (self.initial_state['program_cost'] + self.initial_state['patient_cost'] * decision.enroll) 161 | return obj_part 162 | 163 | def step(self, decision): 164 | """ 165 | steps the process forward by one time increment by updating the sum of the contributions, the 166 | exogenous information, and the state variable 167 | :param decision: namedtuple - contains all decision info 168 | :return: none 169 | """ 170 | exog_info = self.exog_info_fn(decision) 171 | self.objective += self.objective_fn(decision) 172 | exog_info.update(self.transition_fn(decision, exog_info)) 173 | self.state = self.build_state(exog_info) 174 | 175 | if __name__ == "__main__": 176 | # this is an example of creating a model, using a random policy, and running until the drug is declared a success/failure or 177 | # we reach the maximum number of trials 178 | t = 0 179 | stop = False 180 | # extracts data from given data set; defines initial state 181 | file = 'Trials Parameters.xlsx' 182 | raw_data = pd.ExcelFile(file) 183 | data = raw_data.parse('Exogenous Data') 184 | state_variables = ['potential_pop', 'success', 'failure', 'l_response'] 185 | initial_state = {'potential_pop': float(data.iat[0, 0]), 186 | 'success': data.iat[1, 0], 187 | 'failure': float(data.iat[2, 0]), 188 | 'l_response': float(data.iat[3, 0]), 189 | 'theta_stop_low': data.iat[4, 0], 190 | 'theta_stop_high': data.iat[5, 0], 191 | 'alpha': data.iat[6, 0], 192 | 'K': int(data.iat[7, 0]), 193 | 'N': int(data.iat[8, 0]), 194 | 'trial_size': int(data.iat[9, 0]), 195 | 'patient_cost': data.iat[10, 0], 196 | 'program_cost': data.iat[11, 0], 197 | 'success_rev': data.iat[12, 0], 198 | 'sampling_size': int(data.iat[13, 0]), 199 | 'enroll_min': int(data.iat[14, 0]), 200 | 'enroll_max': int(data.iat[15, 0]), 201 | 'enroll_step': int(data.iat[16, 0]), 202 | 'H': int(data.iat[17, 0]), 203 | 'true_l_response': data.iat[18, 0], 204 | 'true_succ_rate': data.iat[19, 0]} 205 | decision_variables = ['enroll', 'prog_continue', 'drug_success'] 206 | M = ClinicalTrialsModel(state_variables, decision_variables, initial_state, False) 207 | 208 | while t <= initial_state['trial_size'] and stop == False: 209 | p_belief = M.state.success / (M.state.success + M.state.failure) 210 | # drug_success = 1 if successful, 0 if failure, -1 if continue trial 211 | if p_belief > initial_state['theta_stop_high']: 212 | decision = {'prog_continue': 0, 'drug_success': 1} 213 | stop = True 214 | elif p_belief < initial_state['theta_stop_low']: 215 | decision = {'prog_continue': 0, 'drug_success': 0} 216 | stop = True 217 | else: 218 | decision = {'prog_continue': 1, 'drug_success': -1} 219 | decision['enroll'] = np.random.choice(range(initial_state['enroll_min'], initial_state['enroll_max']+initial_state['enroll_step'], initial_state['enroll_step'])) if stop == False else 0 220 | x = M.build_decision(decision) 221 | print("t={}, obj={}, state.potential_pop={}, state.success={}, state.failure={}, x={}".format(t, M.objective, M.state.potential_pop, M.state.success, M.state.failure, x)) 222 | M.step(x) 223 | t += 1 224 | 225 | print("\nStopping state: ") 226 | print("t={}, obj={}, state.potential_pop={}, state.success={}, state.failure={}, x={}".format(t, M.objective, M.state.potential_pop, M.state.success, M.state.failure, x)) 227 | 228 | pass -------------------------------------------------------------------------------- /ClinicalTrials/Parameters.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/djanka2/stochastic-optimization/8b654e10480705901a6e48e869e2e0a779e97933/ClinicalTrials/Parameters.xlsx -------------------------------------------------------------------------------- /EnergyStorage_I/BackwardDP.py: -------------------------------------------------------------------------------- 1 | """ 2 | Backward dynamic programming class 3 | """ 4 | from EnergyStorageModel import EnergyStorageModel as ESM 5 | import numpy as np 6 | import pandas as pd 7 | from bisect import bisect 8 | import matplotlib.pyplot as plt 9 | import math 10 | import time 11 | from collections import namedtuple,defaultdict 12 | 13 | class BDP(): 14 | """ 15 | Base class to implement backward dynamic programming 16 | """ 17 | 18 | def __init__(self, discrete_prices, discrete_energy, price_changes, discrete_price_changes, 19 | f_p, stop_time, model): 20 | """ 21 | Initializes the model 22 | 23 | :param discrete_prices: list - list of discretized prices 24 | :param discrete_energy: list - list of discretized energy amounts 25 | :param price_changes: list - list of price changes 26 | :param discrete_price_changes: list - list of discretized price changes 27 | :param f_p: ndarray - contains f(p) values 28 | :param stop_time: int - time at which loop terminates 29 | :param model: energy storage model 30 | 31 | """ 32 | self.discrete_energy = discrete_energy 33 | self.discrete_prices = discrete_prices 34 | self.price_changes = price_changes 35 | self.discrete_price_changes = discrete_price_changes 36 | self.f_p = f_p 37 | self.time = stop_time - 1 38 | self.model = model 39 | self.terminal_contribution = 0 40 | self.values_dict = None #this will store the vfas - it will be computed by the method bellman_2D or bellman_3D 41 | 42 | 43 | 44 | def state_transition(self, state, decision, exog_info): 45 | """ 46 | this function tells us what state we transition to if we are in some state and make a decision 47 | (restricted to states in possible_states) 48 | 49 | :param state: namedtuple - the state of the model at a given time 50 | :param decision: namedtuple - contains all decision info 51 | :param exog_info: any exogenous info 52 | :return: new state object 53 | """ 54 | 55 | 56 | new_energy = state.energy_amount + (self.model.init_args['eta'] * decision.buy) - decision.sell 57 | adjusted_new_energy = math.ceil(new_energy) 58 | 59 | 60 | if len(state) == 2: 61 | new_price = state.price + exog_info 62 | elif len(state) == 3: 63 | new_price = 0.5*state.prev_price + 0.5*state.price + exog_info 64 | 65 | if new_price <= min(self.discrete_prices): 66 | adjusted_new_price = min(self.discrete_prices) 67 | elif new_price >= max(self.discrete_prices): 68 | adjusted_new_price = max(self.discrete_prices) 69 | else: 70 | index = bisect(self.discrete_prices, new_price) 71 | adjusted_new_price = self.discrete_prices[index] 72 | 73 | 74 | if len(state) == 2: 75 | new_state = self.model.build_state({'energy_amount': adjusted_new_energy, 'price': adjusted_new_price}) 76 | 77 | elif len(state) == 3: 78 | prev_price = state.price 79 | if prev_price <= min(self.discrete_prices): 80 | adjusted_prev_price = min(self.discrete_prices) 81 | elif prev_price >= max(self.discrete_prices): 82 | adjusted_prev_price = max(self.discrete_prices) 83 | else: 84 | index = bisect(self.discrete_prices, prev_price) 85 | adjusted_prev_price = self.discrete_prices[index] 86 | 87 | new_state = self.model.build_state({'energy_amount': adjusted_new_energy, 88 | 'price': adjusted_new_price, 89 | 'prev_price': adjusted_prev_price}) 90 | 91 | 92 | return new_state 93 | 94 | def bellman(self): 95 | """ 96 | this function computes the value function using Bellman's equation for a 2D state variable 97 | 98 | :return: list - list of contribution values 99 | """ 100 | 101 | # make list of all possible 2D states using discretized prices and discretized energy values 102 | 103 | self.possible_states = [] 104 | if len(self.model.state_variable) == 2: 105 | for price in self.discrete_prices: 106 | for energy in self.discrete_energy: 107 | state = self.model.build_state({'energy_amount': energy,'price': price}) 108 | self.possible_states.append(state) 109 | else: 110 | for p in self.discrete_prices: 111 | for prev_p in self.discrete_prices: 112 | for energy in self.discrete_energy: 113 | state = self.model.build_state({'energy_amount': energy,'price': p,'prev_price': prev_p}) 114 | self.possible_states.append(state) 115 | 116 | print("State dimension: {}. State space size: {}. Exogenous info size: {}".format(len(self.model.state_variable),len(self.possible_states),len(self.discrete_price_changes))) 117 | 118 | 119 | time = self.time 120 | values = defaultdict(dict) 121 | 122 | while time != -1: 123 | max_list = {} 124 | for state in self.possible_states: 125 | price = state.price 126 | energy = state.energy_amount 127 | v_list = [] 128 | for d in self.model.possible_decisions: 129 | x = self.model.build_decision(d, energy) 130 | contribution = price * (self.model.init_args['eta']*x.sell - x.buy) 131 | sum_w = 0 132 | w_index = 0 133 | for w in self.discrete_price_changes: 134 | f = self.f_p[w_index] if w_index == 0 else self.f_p[w_index] - self.f_p[w_index - 1] 135 | next_state = self.state_transition(state, x, w) 136 | next_v = values[time + 1][next_state] if time < self.time \ 137 | else self.terminal_contribution 138 | sum_w += f * next_v 139 | w_index += 1 140 | 141 | v = contribution + sum_w 142 | v_list.append(v) 143 | 144 | max_value = max(v_list) 145 | decList=["Buy","Sell","Hold"] 146 | #print("Time: {} State: price={:.2f}, energy={:.2f} - Buy: {:.2f} Sell: {:.2f} Hold: {:.2f} - Max_value {:.2f} - maxDec {} ".format(time,price, energy,v_list[0],v_list[1],v_list[2],max_value,decList[v_list.index(max(v_list))])) 147 | max_list.update({state: max_value}) 148 | values[time]=max_list 149 | time -= 1 150 | pass 151 | 152 | self.values_dict=values 153 | return values 154 | 155 | 156 | -------------------------------------------------------------------------------- /EnergyStorage_I/EnergyStorageDriverScript.py: -------------------------------------------------------------------------------- 1 | """ 2 | Energy storage driver script 3 | 4 | """ 5 | import time 6 | from collections import namedtuple 7 | import pandas as pd 8 | import numpy as np 9 | from EnergyStorageModel import EnergyStorageModel as ESM 10 | from EnergyStoragePolicy import EnergyStoragePolicy 11 | from BackwardDP import BDP 12 | import matplotlib.pyplot as plt 13 | from copy import copy 14 | from scipy.ndimage.interpolation import shift 15 | import pickle 16 | from bisect import bisect 17 | 18 | 19 | def process_raw_price_data(file, params): 20 | DISC_TYPE = "FROM_CUM" 21 | # DISC_TYPE = "OTHER" 22 | 23 | print( 24 | "Processing raw price data. Constructing price change list and cdf using {}".format( 25 | DISC_TYPE 26 | ) 27 | ) 28 | tS = time.time() 29 | 30 | # load energy price data from the Excel spreadsheet 31 | raw_data = pd.read_excel(file, sheet_name="Raw Data") 32 | 33 | # look at data spanning a week 34 | data_selection = raw_data.iloc[0 : params["T"], 0:5] 35 | 36 | # rename columns to remove spaces (otherwise we can't access them) 37 | cols = data_selection.columns 38 | cols = cols.map(lambda x: x.replace(" ", "_") if isinstance(x, str) else x) 39 | data_selection.columns = cols 40 | 41 | # sort prices in ascending order 42 | sort_by_price = data_selection.sort_values("PJM_RT_LMP") 43 | # print(sort_by_price.head()) 44 | 45 | hist_price = np.array(data_selection["PJM_RT_LMP"].tolist()) 46 | # print(hist_price[0]) 47 | 48 | max_price = sort_by_price["PJM_RT_LMP"].max() 49 | min_price = sort_by_price["PJM_RT_LMP"].min() 50 | print("Min price {:.2f} and Max price {:.2f}".format(min_price, max_price)) 51 | 52 | # sort prices in ascending order 53 | sort_by_price = data_selection.sort_values("PJM_RT_LMP") 54 | 55 | # calculate change in price and sort values of change in price in ascending order 56 | data_selection["Price_Shift"] = data_selection.PJM_RT_LMP.shift(1) 57 | data_selection["Price_Change"] = ( 58 | data_selection["PJM_RT_LMP"] - data_selection["Price_Shift"] 59 | ) 60 | sort_price_change = data_selection.sort_values("Price_Change") 61 | 62 | # discretize change in price and obtain f(p) for each price change 63 | max_price_change = sort_price_change["Price_Change"].max() 64 | min_price_change = sort_price_change["Price_Change"].min() 65 | print( 66 | "Min price change {:.2f} and Max price change {:.2f}".format( 67 | min_price_change, max_price_change 68 | ) 69 | ) 70 | 71 | # there are 191 values for price change 72 | price_changes_sorted = sort_price_change["Price_Change"].tolist() 73 | # remove the last NaN value 74 | price_changes_sorted.pop() 75 | 76 | if DISC_TYPE == "FROM_CUM": 77 | # discretize price change by interpolating from cumulative distribution 78 | xp = price_changes_sorted 79 | fp = np.arange(len(price_changes_sorted) - 1) / (len(price_changes_sorted) - 1) 80 | cum_fn = np.append(fp, 1) 81 | 82 | # obtain 30 discrete prices 83 | discrete_price_change_cdf = np.linspace(0, 1, params["nPriceChangeInc"]) 84 | discrete_price_change_list = [] 85 | for i in discrete_price_change_cdf: 86 | interpolated_point = np.interp(i, cum_fn, xp) 87 | discrete_price_change_list.append(interpolated_point) 88 | else: 89 | price_change_range = max_price_change - min_price_change 90 | price_change_increment = price_change_range / params["nPriceChangeInc"] 91 | discrete_price_change = np.arange( 92 | min_price_change, max_price_change, price_change_increment 93 | ) 94 | discrete_price_change_list = list( 95 | np.append(discrete_price_change, max_price_change) 96 | ) 97 | 98 | f_p = np.arange(len(price_changes_sorted) - 1) / (len(price_changes_sorted) - 1) 99 | cum_fn = np.append(f_p, 1) 100 | discrete_price_change_cdf = [] 101 | for c in discrete_price_change_list: 102 | interpolated_point = np.interp(c, price_changes_sorted, cum_fn) 103 | discrete_price_change_cdf.append(interpolated_point) 104 | 105 | price_changes_sorted = np.array(price_changes_sorted) 106 | discrete_price_change_list = np.array(discrete_price_change_list) 107 | discrete_price_change_cdf = np.array(discrete_price_change_cdf) 108 | discrete_price_change_pdf = discrete_price_change_cdf - shift( 109 | discrete_price_change_cdf, 1, cval=0 110 | ) 111 | 112 | mean_price_change = np.dot(discrete_price_change_list, discrete_price_change_pdf) 113 | 114 | # print("discrete_price_change_list ",discrete_price_change_list) 115 | # print("discrete_price_change_cdf",discrete_price_change_cdf) 116 | # print("discrete_price_change_pdf",discrete_price_change_pdf) 117 | 118 | print( 119 | "Finishing processing raw price data in {:.2f} secs. Expected price change is {:.2f}. Hist_price len is {}".format( 120 | time.time() - tS, mean_price_change, len(hist_price) 121 | ) 122 | ) 123 | # input("enter any key to continue...") 124 | 125 | exog_params = { 126 | "hist_price": hist_price, 127 | "price_changes_sorted": price_changes_sorted, 128 | "discrete_price_change_list": discrete_price_change_list, 129 | "discrete_price_change_cdf": discrete_price_change_cdf, 130 | } 131 | 132 | return exog_params 133 | 134 | 135 | if __name__ == "__main__": 136 | file = "Parameters.xlsx" 137 | seed = 189654913 138 | 139 | # Reading the algorithm pars 140 | parDf = pd.read_excel(file, sheet_name="ParamsModel") 141 | parDict = parDf.set_index("Index").T.to_dict("list") 142 | params = {key: v for key, value in parDict.items() for v in value} 143 | params["seed"] = seed 144 | params["T"] = min(params["T"], 192) 145 | 146 | parDf = pd.read_excel(file, sheet_name="GridSearch") 147 | parDict = parDf.set_index("Index").T.to_dict("list") 148 | paramsPolicy = {key: v for key, value in parDict.items() for v in value} 149 | params.update(paramsPolicy) 150 | 151 | parDf = pd.read_excel(file, sheet_name="BackwardDP") 152 | parDict = parDf.set_index("Index").T.to_dict("list") 153 | paramsPolicy = {key: v for key, value in parDict.items() for v in value} 154 | params.update(paramsPolicy) 155 | 156 | if isinstance(params["priceDiscSet"], str): 157 | price_disc_list = params["priceDiscSet"].split(",") 158 | price_disc_list = [float(e) for e in price_disc_list] 159 | else: 160 | price_disc_list = [float(params["priceDiscSet"])] 161 | params["price_disc_list"] = price_disc_list 162 | 163 | print("Parameters ", params) 164 | # input("enter any key to continue...") 165 | 166 | # exog_params is a dictionary with three lists: hist_price, price_changes_list, discrete_price_change_cdf 167 | exog_params = process_raw_price_data(file, params) 168 | 169 | # create a model and a policy 170 | policy_names = ["buy_low_sell_high_policy", "bellman_policy"] 171 | state_variable = ["price", "energy_amount"] 172 | initial_state = { 173 | "price": exog_params["hist_price"][0], 174 | "energy_amount": params["R0"], 175 | } 176 | decision_variable = ["buy", "hold", "sell"] 177 | possible_decisions = [ 178 | {"buy": 1, "hold": 0, "sell": 0}, 179 | {"buy": 0, "hold": 0, "sell": 1}, 180 | {"buy": 0, "hold": 1, "sell": 0}, 181 | ] 182 | M = ESM( 183 | state_variable, 184 | decision_variable, 185 | initial_state, 186 | params, 187 | exog_params, 188 | possible_decisions, 189 | ) 190 | P = EnergyStoragePolicy(M, policy_names) 191 | 192 | ########################################################################## 193 | # GridSearch 194 | if params["Algorithm"] == "GridSearch": 195 | # obtain the theta values to carry out a full grid search 196 | grid_search_theta_values = P.grid_search_theta_values(params) 197 | print(grid_search_theta_values) 198 | # input("enter any key to continue...") 199 | 200 | # use those theta values to calculate corresponding contribution values 201 | contribution_values_dict = P.perform_grid_search( 202 | params, grid_search_theta_values[0] 203 | ) 204 | 205 | # plot those contribution values on a heat map, with theta_buy on the horizontal axis and theta_sell on the 206 | # vertical axis 207 | P.plot_heat_map( 208 | contribution_values_dict, 209 | grid_search_theta_values[1], 210 | grid_search_theta_values[2], 211 | ) 212 | ################################################################################## 213 | 214 | ################################################################################# 215 | # BackwardDP 216 | if params["Algorithm"] == "BackwardDP": 217 | # Constructing the state space 218 | # make list of possible energy amount stored at a time 219 | discrete_energy = np.array([0.0, 1.0]) 220 | 221 | # make list of prices with different increments 222 | min_price = np.min(exog_params["hist_price"]) 223 | max_price = np.max(exog_params["hist_price"]) 224 | 225 | for inc in params["price_disc_list"]: 226 | discrete_prices = np.arange(min_price, max_price + inc, inc) 227 | 228 | print("\nStarting BackwardDP 2D") 229 | test_2D = BDP( 230 | discrete_prices, 231 | discrete_energy, 232 | exog_params["price_changes_sorted"], 233 | exog_params["discrete_price_change_list"], 234 | exog_params["discrete_price_change_cdf"], 235 | params["T"], 236 | copy(M), 237 | ) 238 | 239 | # 2D states - time the process with a 2D state variable 240 | t0 = time.time() 241 | value_dict = test_2D.bellman() 242 | t1 = time.time() 243 | time_elapsed = t1 - t0 244 | print("Time_elapsed_2D_model={:.2f} secs.".format(time_elapsed)) 245 | 246 | print("Starting policy evaluation for the actual sample path") 247 | tS = time.time() 248 | contribution = P.run_policy(test_2D, "bellman_policy", params["T"]) 249 | print( 250 | "Contribution using BackwardDP 2D is {:.2f}. Finished in {:.2f}s".format( 251 | contribution, time.time() - tS 252 | ) 253 | ) 254 | 255 | if params["run3D"]: 256 | print("\nStarting BackwardDP 3D") 257 | 258 | state_variable_3 = ["price", "energy_amount", "prev_price"] 259 | 260 | index = bisect(discrete_prices, exog_params["hist_price"][1]) 261 | adjusted_p1 = discrete_prices[index] 262 | index = bisect(discrete_prices, exog_params["hist_price"][0]) 263 | adjusted_p0 = discrete_prices[index] 264 | initial_state_3 = { 265 | "price": adjusted_p1, 266 | "energy_amount": params["R0"], 267 | "prev_price": adjusted_p0, 268 | } 269 | 270 | M3 = ESM( 271 | state_variable_3, 272 | decision_variable, 273 | initial_state_3, 274 | params, 275 | exog_params, 276 | possible_decisions, 277 | ) 278 | P3 = EnergyStoragePolicy(M3, policy_names) 279 | 280 | test_3D = BDP( 281 | discrete_prices, 282 | discrete_energy, 283 | exog_params["price_changes_sorted"], 284 | exog_params["discrete_price_change_list"], 285 | exog_params["discrete_price_change_cdf"], 286 | params["T"], 287 | copy(M3), 288 | ) 289 | 290 | t0 = time.time() 291 | value_dict = test_3D.bellman() 292 | t1 = time.time() 293 | time_elapsed = t1 - t0 294 | print("Time_elapsed_3D_model={:.2f} secs.".format(time_elapsed)) 295 | 296 | print("Starting policy evaluation for the actual sample path") 297 | tS = time.time() 298 | contribution = P3.run_policy(test_3D, "bellman_policy", params["T"]) 299 | print( 300 | "Contribution using BackwardDP 3D is {:.2f}. Finished in {:.2f}s".format( 301 | contribution, time.time() - tS 302 | ) 303 | ) 304 | 305 | ######################################################################### 306 | -------------------------------------------------------------------------------- /EnergyStorage_I/EnergyStorageModel.py: -------------------------------------------------------------------------------- 1 | """ 2 | Energy storage model class 3 | Adapted from code by Donghun Lee (c) 2018 4 | 5 | """ 6 | from collections import namedtuple 7 | import numpy as np 8 | import pandas as pd 9 | 10 | 11 | class EnergyStorageModel: 12 | """ 13 | Base class for energy storage model 14 | """ 15 | 16 | def __init__( 17 | self, 18 | state_variable, 19 | decision_variable, 20 | state_0, 21 | params, 22 | exog_params, 23 | possible_decisions, 24 | exog_info_fn=None, 25 | transition_fn=None, 26 | objective_fn=None, 27 | ): 28 | """ 29 | Initializes the model 30 | 31 | :param state_variable: list(str) - state variable dimension names 32 | :param decision_variable: list(str) - decision variable dimension names 33 | :param state_0: dict - contains the information to populate initial state, including eta (the fraction of 34 | energy maintained when charging or discharging the battery) and battery capacity 35 | :param params: all the parameters including DataFrame (exog_data) containning the price information 36 | :param possible_decisions: list - list of possible decisions we could make 37 | :param exog_info_fn: function - calculates relevant exogenous information 38 | :param transition_fn: function - takes in decision variables and exogenous information to describe how the state 39 | evolves 40 | :param objective_fn: function - calculates contribution at time t 41 | """ 42 | 43 | self.init_args = params 44 | self.prng = np.random.RandomState(params["seed"]) 45 | self.exog_params = exog_params 46 | 47 | self.initial_state = state_0 48 | self.state_variable = state_variable 49 | self.decision_variable = decision_variable 50 | 51 | self.possible_decisions = possible_decisions 52 | self.State = namedtuple("State", state_variable) 53 | self.state = self.build_state(self.initial_state) 54 | self.Decision = namedtuple("Decision", decision_variable) 55 | self.objective = 0.0 56 | 57 | # This will keep a list of states visited 58 | self.states = [self.state] 59 | 60 | def reset(self): 61 | self.objective = 0.0 62 | self.state = self.build_state(self.initial_state) 63 | self.states = [self.state] 64 | 65 | def build_state(self, info): 66 | """ 67 | this function returns a state containing all the state information needed 68 | 69 | :param info: dict - contains all state information 70 | :return: namedtuple - a state object 71 | """ 72 | return self.State(*[info[k] for k in self.state_variable]) 73 | 74 | def build_decision(self, info, energy_amount): 75 | """ 76 | this function returns a decision 77 | 78 | :param info: dict - contains all decision info 79 | :param energy_amount: float - amount of energy 80 | :return: namedtuple - a decision object 81 | 82 | """ 83 | info_copy = {"buy": 0, "hold": 0, "sell": 0} 84 | # the amount of power that can be bought or sold is limited by constraints 85 | for k in self.decision_variable: 86 | if k == "buy" and info[k] > 0: 87 | info_copy[k] = ( 88 | self.init_args["Rmax"] - energy_amount 89 | ) / self.init_args["eta"] 90 | elif k == "sell" and info[k] > energy_amount: 91 | info_copy[k] = energy_amount 92 | else: 93 | info_copy[k] = info[k] 94 | return self.Decision(*[info_copy[k] for k in self.decision_variable]) 95 | 96 | def exog_info_fn(self, time): 97 | next_price = self.exog_params["hist_price"][time] 98 | 99 | return next_price 100 | 101 | def transition_fn(self, time, decision): 102 | """ 103 | this function takes in the decision and exogenous information to update the state 104 | 105 | :param time: int - time at which the state is at 106 | :param decision: namedtuple - contains all decision info 107 | :return: updated state 108 | """ 109 | new_price = self.exog_info_fn(time) 110 | new_energy_amount = ( 111 | self.state.energy_amount 112 | + (self.init_args["eta"] * decision.buy) 113 | - decision.sell 114 | ) 115 | 116 | if len(self.state_variable) == 2: 117 | state = self.build_state( 118 | {"energy_amount": new_energy_amount, "price": new_price} 119 | ) 120 | 121 | elif len(self.state_variable) == 3: 122 | state = self.build_state( 123 | { 124 | "energy_amount": new_energy_amount, 125 | "price": new_price, 126 | "prev_price": self.state.price, 127 | } 128 | ) 129 | 130 | return state 131 | 132 | def objective_fn(self, decision): 133 | """ 134 | this function calculates the contribution, which depends on the decision and the price 135 | 136 | :param decision: namedtuple - contains all decision info 137 | :return: float - calculated contribution 138 | """ 139 | obj_part = self.state.price * ( 140 | self.init_args["eta"] * decision.sell - decision.buy 141 | ) 142 | return obj_part 143 | 144 | def step(self, time, decision): 145 | """ 146 | this function steps the process forward by one time increment by updating the sum of the contributions 147 | and the state variable 148 | 149 | :param time: int - time at which the state is at 150 | :param decision: decision: namedtuple - contains all decision info 151 | :return: none 152 | """ 153 | self.objective += self.objective_fn(decision) 154 | self.state = self.transition_fn(time, decision) 155 | self.states.append(self.state) 156 | -------------------------------------------------------------------------------- /EnergyStorage_I/EnergyStoragePolicy.py: -------------------------------------------------------------------------------- 1 | """ 2 | Energy storage policy class 3 | 4 | """ 5 | from collections import namedtuple 6 | import pandas as pd 7 | import numpy as np 8 | from EnergyStorageModel import EnergyStorageModel as ESM 9 | import matplotlib.pyplot as plt 10 | from copy import copy 11 | import time 12 | 13 | 14 | class EnergyStoragePolicy: 15 | """ 16 | Base class for decision policy 17 | """ 18 | 19 | def __init__(self, model, policy_names): 20 | """ 21 | Initializes the policy 22 | 23 | :param model: EnergyStorageModel - the model that the policy is being implemented on 24 | :param policy_names: list(str) - list of policies 25 | """ 26 | 27 | self.model = model 28 | self.policy_names = policy_names 29 | self.Policy = namedtuple("Policy", policy_names) 30 | 31 | def buy_low_sell_high_policy(self, time, state, theta): 32 | """ 33 | this function implements the buy low, sell high policy for the ESM 34 | 35 | :param state: namedtuple - the state of the model at a given time 36 | :param theta: tuple - contains the parameters needed to run the policy 37 | :return: a decision made based on the policy 38 | """ 39 | lower_limit = theta[0] 40 | upper_limit = theta[1] 41 | if state.price <= lower_limit: 42 | new_decision = self.model.possible_decisions[0] 43 | elif state.price >= upper_limit: 44 | new_decision = self.model.possible_decisions[1] 45 | else: 46 | new_decision = self.model.possible_decisions[2] 47 | return new_decision 48 | 49 | def bellman_policy(self, time, state, bellman_model): 50 | price = state.price 51 | energy = state.energy_amount 52 | 53 | maxValue = -np.inf 54 | maxDec = None 55 | for d in self.model.possible_decisions: 56 | x = self.model.build_decision(d, energy) 57 | contribution = price * (x.sell - x.buy) 58 | 59 | sum_w = 0 60 | w_index = 0 61 | for w in bellman_model.discrete_price_changes: 62 | f = ( 63 | bellman_model.f_p[w_index] 64 | if w_index == 0 65 | else bellman_model.f_p[w_index] - bellman_model.f_p[w_index - 1] 66 | ) 67 | next_state = bellman_model.state_transition(state, x, w) 68 | next_v = ( 69 | bellman_model.values_dict[time + 1][next_state] 70 | if time < bellman_model.time 71 | else bellman_model.terminal_contribution 72 | ) 73 | sum_w += f * next_v 74 | 75 | w_index += 1 76 | # print("w_index={}".format(w_index)) 77 | v = contribution + sum_w 78 | if v > maxValue: 79 | maxValue = v 80 | maxDec = d 81 | return maxDec 82 | 83 | def run_policy(self, policy_info, policy, stop_time): 84 | """ 85 | this function runs the model with a selected policy 86 | 87 | :param policy_info: dict - dictionary of policies and their associated parameters 88 | :param policy: str - the name of the chosen policy 89 | :param stop_time: float - stop time 90 | :return: float - calculated contribution 91 | """ 92 | time = 0 93 | model_copy = copy(self.model) 94 | nTrades = {"buy": 0, "sell": 0, "hold": 0} 95 | buy_list = [] 96 | sell_list = [] 97 | 98 | while time != model_copy.init_args["T"]: 99 | decision = getattr(self, policy)(time, model_copy.state, policy_info) 100 | 101 | # Last time period - we are going to sell energy 102 | if time == model_copy.init_args["T"] - 1: 103 | decision = {"buy": 0, "hold": 0, "sell": 1} 104 | 105 | x = model_copy.build_decision(decision, model_copy.state.energy_amount) 106 | 107 | nTrades["buy"] += x.buy 108 | nTrades["sell"] += x.sell 109 | nTrades["hold"] += model_copy.state.energy_amount 110 | if x.buy > 0: 111 | buy_list.append((time, model_copy.state.price)) 112 | elif x.sell > 0: 113 | sell_list.append((time, model_copy.state.price)) 114 | 115 | # print("time={}, obj={}, state.energy_amount={}, state.price={}, x={}".format(time, model_copy.objective,model_copy.state.energy_amount, model_copy.state.price, x)) 116 | 117 | # step the model forward one iteration 118 | model_copy.step(time, x) 119 | # increment time 120 | time += 1 121 | contribution = model_copy.objective 122 | 123 | print( 124 | "Energy traded - Sell: {:.2f} - Buy: {:.2f} - Hold % : {:.2f}".format( 125 | nTrades["sell"], 126 | nTrades["buy"], 127 | nTrades["hold"] / model_copy.init_args["T"], 128 | ) 129 | ) 130 | print("Sell times and prices ") 131 | for i in range(len(sell_list)): 132 | print( 133 | "t = {:.2f} and price = {:.2f}".format(sell_list[i][0], sell_list[i][1]) 134 | ) 135 | print("Buy times and prices ") 136 | for i in range(len(buy_list)): 137 | print( 138 | "t = {:.2f} and price = {:.2f}".format(buy_list[i][0], buy_list[i][1]) 139 | ) 140 | 141 | return contribution 142 | 143 | def perform_grid_search(self, params, theta_values): 144 | """ 145 | this function calculates the contribution for each theta value in a list 146 | 147 | :param policy_info: dict - dictionary of policies and their associated parameters 148 | :param policy: str - the name of the chosen policy 149 | :param stop_time: float - stop time 150 | :param theta_values: list - list of all possible thetas to be tested 151 | :return: list - list of contribution values corresponding to each theta 152 | """ 153 | 154 | tS = time.time() 155 | contribution_values_dict = {} 156 | 157 | bestTheta = None 158 | bestContribution = -np.inf 159 | 160 | for theta in theta_values: 161 | # print("Starting theta {}".format(theta)) 162 | if theta[0] >= theta[1]: 163 | contribution_values_dict[theta] = 0 164 | else: 165 | contribution = self.run_policy( 166 | theta, "buy_low_sell_high_policy", params["T"] 167 | ) 168 | contribution_values_dict[theta] = contribution 169 | best_theta = max( 170 | contribution_values_dict, key=contribution_values_dict.get 171 | ) 172 | print( 173 | "Finishing theta {} with contribution {:.2f}. Best theta so far {}. Best contribution {:.2f}".format( 174 | theta, 175 | contribution, 176 | best_theta, 177 | contribution_values_dict[best_theta], 178 | ) 179 | ) 180 | 181 | print("Finishing GridSearch in {:.2f} secs".format(time.time() - tS)) 182 | return contribution_values_dict 183 | 184 | def grid_search_theta_values(self, params): 185 | """ 186 | this function gives a list of theta values needed to run a full grid search 187 | 188 | """ 189 | theta_buy_values = np.arange( 190 | params["theta_buy_min"], params["theta_buy_max"], params["theta_inc"] 191 | ) 192 | theta_sell_values = np.arange( 193 | params["theta_sell_min"], params["theta_sell_max"], params["theta_inc"] 194 | ) 195 | 196 | theta_values = [(x, y) for x in theta_buy_values for y in theta_sell_values] 197 | 198 | return theta_values, theta_buy_values, theta_sell_values 199 | 200 | def plot_heat_map(self, contribution_dict, theta_buy_values, theta_sell_values): 201 | """ 202 | this function plots a heat map 203 | 204 | :param contribution_dict: dict of contribution values 205 | :param theta_buy_values: list - list of theta_buy_values 206 | :param theta_sell_values: list - list of theta_sell_values 207 | :return: none (plots a heat map) 208 | """ 209 | 210 | contribution_values = [ 211 | contribution_dict[(theta_buy, theta_sell)] 212 | for theta_sell in theta_sell_values 213 | for theta_buy in theta_buy_values 214 | ] 215 | contributions = np.array(contribution_values) 216 | increment_count = len(theta_buy_values) 217 | contributions = np.reshape(contributions, (-1, increment_count)) 218 | 219 | fig, ax = plt.subplots() 220 | im = ax.imshow(contributions, cmap="hot", origin="lower", aspect="auto") 221 | # create colorbar 222 | cbar = ax.figure.colorbar(im, ax=ax) 223 | # cbar.ax.set_ylabel(cbarlabel, rotation=-90, va="bottom") 224 | # we want to show all ticks... 225 | ax.set_xticks(np.arange(0, len(theta_buy_values), 5)) 226 | ax.set_yticks(np.arange(0, len(theta_sell_values), 5)) 227 | # ... and label them with the respective list entries 228 | ax.set_xticklabels(theta_buy_values[::5]) 229 | ax.set_yticklabels(theta_sell_values[::5]) 230 | # rotate the tick labels and set their alignment. 231 | # plt.setp(ax.get_xticklabels(), rotation=45, ha="right",rotation_mode="anchor") 232 | ax.set_title("Heatmap of contribution values across different values of theta") 233 | 234 | ax.set_ylabel("Theta sell high values") 235 | ax.set_xlabel("Theta buy low values") 236 | 237 | # fig.tight_layout() 238 | plt.show() 239 | return True 240 | -------------------------------------------------------------------------------- /EnergyStorage_I/Parameters.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/djanka2/stochastic-optimization/8b654e10480705901a6e48e869e2e0a779e97933/EnergyStorage_I/Parameters.xlsx -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 donghun2018 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MedicalDecisionDiabetes/MedicalDecisionDiabetes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Learning the Best Diabetes Medication\n", 8 | "\n", 9 | "Here we implement the Bayesian belief model from Chapter 4 to find the best Diabetes medication. The following inputs are needed to fully specify the statistical model:\n", 10 | "\n", 11 | "1. `S0`: For every drug, we maintain a *belief* about its A1C reduction. The beliefs are modelled as a set of normal distributions (i.e., two parameters per drug) that evolve as we make observations. The initial belief is specified with a mean and standard deviation derived from, e.g., the efficacy of each drug over an entire population (many individuals).\n", 12 | "2. `mu_truth`: When simulating the model, the true (but unknown) value for the A1C reduction of every drug must be simulated as well. We do that by directly passing a `scipy.stats.uniform` object, from which the model can draw samples. Caution: The two arguments of `scipy.stats.uniform`, `loc` and `scale` are not the upper and lower bound, but the lower and upper bounds will be `[loc, loc+scale]`. It is also possible to select fixed values for `mu_truth`. In this case, select a uniform distribution where the lower and upper bound are equal. \n", 13 | "3. `sigma_W`: This is the standard deviation of an observation. Observations are sampled from a normal distribution with mean `mu_truth` and standard deviation `sigma_W`.\n", 14 | "\n", 15 | "We first create a model where `mu_truth` is fixed, that means we have only one random process (observational uncertainty)." 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "import scipy.stats\n", 25 | "import numpy as np\n", 26 | "import plotly.express as px\n", 27 | "import plotly.graph_objects as go\n", 28 | "import MedicalDecisionDiabetesModel as mddm\n", 29 | "import MedicalDecisionDiabetesPolicies as mddp\n", 30 | "import BaseClasses.Util as util\n", 31 | "\n", 32 | "S0 = {\n", 33 | " \"M\": [0.32, 0.12],\n", 34 | " \"Sens\": [0.28, 0.09],\n", 35 | " \"Secr\": [0.3, 0.17],\n", 36 | " \"AGI\": [0.26, 0.15],\n", 37 | " \"PA\": [0.21, 0.11],\n", 38 | "}\n", 39 | "\n", 40 | "mu_truth = {\n", 41 | " \"M\": scipy.stats.uniform(loc=0.3, scale=0.0),\n", 42 | " \"Sens\": scipy.stats.uniform(loc=0.2, scale=0.0),\n", 43 | " \"Secr\": scipy.stats.uniform(loc=0.4, scale=0.0),\n", 44 | " \"AGI\": scipy.stats.uniform(loc=0.33, scale=0.0),\n", 45 | " \"PA\": scipy.stats.uniform(loc=0.35, scale=0.0),\n", 46 | "}\n", 47 | "\n", 48 | "model = mddm.MedicalDecisionDiabetesModel(S0=S0, mu_truth=mu_truth, sigma_W=0.05, T=20)" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "Next, we create an upper confidence bound policy and run it for 1000 iterations. In each iteration, we have 20 observations/trials. The objective is to maximize the total level of A1C reduction with these 20 trials. " 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "policy = mddp.UCB(model, theta=1)\n", 65 | "policy.run_policy(n_iterations=1000)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "We have a closer look at one sample run. The state variable for every drug are given as triples. We create one column for mean, standard deviation, and $N_x$. " 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "for drug in S0.keys():\n", 82 | " policy.results[drug + \"_mu\"] = policy.results[drug].apply(lambda x: x[0])\n", 83 | " policy.results[drug + \"_sigma\"] = 1.0/policy.results[drug].apply(lambda x: np.sqrt(x[1]))\n", 84 | " policy.results[drug + \"_N\"] = policy.results[drug].apply(lambda x: x[2])\n", 85 | " policy.results[drug + \"_chosen\"] = policy.results.groupby(\"N\")[drug + \"_N\"].diff()" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "Now we plot a random iteration of 20 trials. We plot\n", 93 | "- the current belief $\\mu^n_x$ for every drug\n", 94 | "- the current uncertainty in the belief $\\sigma^n_x$ as errorbars\n", 95 | "\n", 96 | "Only when a drug is chosen, $\\mu^n_x$ and $\\sigma^n_x$ will change. $\\sigma^n_x$ will be monotonically decreasing (we are getting more certain the more often we try a drug)." 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "sample_paths = np.random.choice(1000, size=1, replace=False)\n", 106 | "df = policy.results.loc[policy.results.N.isin(sample_paths), :]\n", 107 | "\n", 108 | "long_df = df.melt(id_vars=[\"t\",\"N\"], value_vars=[\"M_mu\", \"Sens_mu\", \"Secr_mu\", \"AGI_mu\", \"PA_mu\"], value_name=\"mu\", var_name=\"drug\")\n", 109 | "long_df[\"sigma\"] = df[[\"M_sigma\", \"Sens_sigma\", \"Secr_sigma\", \"AGI_sigma\", \"PA_sigma\"]].unstack().values\n", 110 | "long_df[\"chosen\"] = df[[\"M_chosen\", \"Sens_chosen\", \"Secr_chosen\", \"AGI_chosen\", \"PA_chosen\"]].unstack().values\n", 111 | "\n", 112 | "px.line(data_frame=long_df, x=\"t\", y=\"mu\", color=\"drug\", error_y=\"sigma\", facet_row=\"N\", hover_data=\"chosen\", markers=True)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "## Exercise 1\n", 120 | "Perform a grid search for the UCB policy with values $\\theta=0.0,0.2,\\dots,2.0$ and plot the performance for each value of $\\theta$. What do you learn from this plot?" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "---" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "## Exercise 2\n", 135 | "In this exercise we investigate the interval estimation policy. We are going to evaluate it for three different sets of thruths. For each of the three cases run a grid search for $\\theta=0.0,0.2,\\dots,2.0$ with 10000 iterations and plot the average performance against the value of $\\theta$.\n", 136 | "\n", 137 | "1. Use the values for `S0` and `mu_truth` just as given at the beginning of the notebook (you can even reuse the model object). \n", 138 | "2. Let $\\mu_x^0$ be your initial belief about the performance of drug $x$. Use $\\mu_x^0$ as given in `S0` above but simulate the truth by taking a sample of a uniform distribution on the interval $[0.5\\mu_x^0, 1.5\\mu_x^0]$. This is an example of having a prior distribution of belief (in this case, that is normally distributed) but sampling the truth from a different distribution (that is uniformly distributed around the mean).\n", 139 | "3. Set `S0` such that the prior is $\\mu_x^0=0.3$ for all five drugs $x$ with initial standard deviation $\\sigma_x^0=0.1$. Sample `mu_truth` for all five drugs uniformly from the interval $[0.15,0.45]$.\n", 140 | "\n", 141 | "What conclusions can you draw from each of the plots?" 142 | ] 143 | } 144 | ], 145 | "metadata": { 146 | "kernelspec": { 147 | "display_name": "sda", 148 | "language": "python", 149 | "name": "python3" 150 | }, 151 | "language_info": { 152 | "codemirror_mode": { 153 | "name": "ipython", 154 | "version": 3 155 | }, 156 | "file_extension": ".py", 157 | "mimetype": "text/x-python", 158 | "name": "python", 159 | "nbconvert_exporter": "python", 160 | "pygments_lexer": "ipython3", 161 | "version": "3.10.12" 162 | } 163 | }, 164 | "nbformat": 4, 165 | "nbformat_minor": 2 166 | } 167 | -------------------------------------------------------------------------------- /MedicalDecisionDiabetes/MedicalDecisionDiabetesModel.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append("../") 4 | from BaseClasses.SDPModel import SDPModel 5 | 6 | 7 | class MedicalDecisionDiabetesModel(SDPModel): 8 | def __init__( 9 | self, 10 | mu_truth: dict, 11 | sigma_W: float, 12 | S0: dict, 13 | t0: float = 0, 14 | T: float = 1, 15 | seed: int = 42, 16 | ) -> None: 17 | state_names = list(S0.keys()) 18 | self.sigma_W = sigma_W 19 | self.beta_W = 1 / self.sigma_W**2 20 | self.mu_truth = mu_truth 21 | 22 | # For each drug, add the number of times the drug has been prescribed as a state 23 | for state in S0: 24 | if len(S0[state]) < 3: 25 | S0[state].append(0) 26 | if len(S0[state]) != 3: 27 | print(f"Need to provide prior mu and sigma for drug {state}!") 28 | # TODO: proper error handling 29 | return 30 | else: 31 | # S0 contains mu and sigma, but we model the states as mu and beta 32 | mu, sigma, N = S0[state] 33 | S0[state] = [mu, 1 / sigma**2, N] 34 | 35 | # Create one sample of the truth 36 | self.mu_truth_sample = {} 37 | for state in self.mu_truth: 38 | self.mu_truth_sample[state] = self.mu_truth[state].rvs() 39 | 40 | decision_names = ["choice"] 41 | 42 | super().__init__(state_names, decision_names, S0, t0, T, seed) 43 | 44 | def reset(self, reset_prng: bool = False): 45 | super().reset(reset_prng) 46 | 47 | # When the model is reset, create a new sample of the truth 48 | for state in self.mu_truth: 49 | self.mu_truth_sample[state] = self.mu_truth[state].rvs() 50 | 51 | # this function gives the exogenous information that is dependent on a random process 52 | # In our case, exogeneous information: W^(n+1) = mu_x + eps^(n+1), 53 | # Where eps^(n+1) is normally distributed with mean 0 and known variance (here s.d. 0.05) 54 | # W^(n+1)_x : reduction in A1C level 55 | # self.prng.normal takes two values, mu and sigma. 56 | def exog_info_fn(self, decision): 57 | x = decision.choice 58 | W = self.prng.normal(self.mu_truth_sample[x], self.sigma_W) 59 | 60 | return {"reduction": W} 61 | 62 | # this function takes in the decision and exogenous information to return\ 63 | # the new mu and beta values corresponding to the decision. 64 | def transition_fn(self, decision, exog_info): 65 | # For all states except one the state values do not change. 66 | new_state = {state: getattr(self.state, state) for state in self.state_names} 67 | 68 | # Update the state for the drug that was prescribed in this step 69 | x = decision.choice 70 | mu_x, beta_x, N_x = getattr(self.state, x) 71 | mu_x = (beta_x * mu_x + self.beta_W * exog_info["reduction"]) / (beta_x + self.beta_W) 72 | beta_x = beta_x + self.beta_W 73 | N_x += 1 # count of no. times drug x was given. 74 | 75 | new_state[x] = [mu_x, beta_x, N_x] 76 | 77 | return new_state 78 | 79 | # contribution is W (reduction in A1C level) 80 | def objective_fn(self, decision, exog_info): 81 | W = exog_info["reduction"] 82 | return W 83 | -------------------------------------------------------------------------------- /MedicalDecisionDiabetes/MedicalDecisionDiabetesPolicies.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append("../") 4 | from BaseClasses.SDPModel import SDPModel 5 | from BaseClasses.SDPPolicy import SDPPolicy 6 | from math import sqrt, log 7 | import numpy as np 8 | 9 | 10 | class UCB(SDPPolicy): 11 | def __init__(self, model: SDPModel, policy_name: str = "UCB", theta: float = 1): 12 | self.theta = theta 13 | super().__init__(model, policy_name) 14 | 15 | def get_decision(self, state, t, T): 16 | # this method implements the Upper Confidence Bound policy 17 | # N.B: can't implement this at time t=0 (from t=1 at least). 18 | # Also can't divide by zero, which means we need each drug to have been tested at least once. 19 | # 20 | # Note that state has a list of 3 entries, for each key(type of drug) in the dictionary 21 | # {"drug" : [mu_empirical, beta, number of times drug given to patient]} 22 | obj_approx = {} 23 | for s in state._fields: 24 | mu, beta, N = getattr(state, s) 25 | obj_approx[s] = mu + self.theta * sqrt(log(t + 1) / (N + 1)) 26 | 27 | optimal_decision = max(obj_approx, key=obj_approx.get) 28 | 29 | return {"choice": optimal_decision} 30 | 31 | 32 | class IE(SDPPolicy): 33 | def __init__(self, model: SDPModel, policy_name: str = "IE", theta: float = 1): 34 | self.theta = theta 35 | super().__init__(model, policy_name) 36 | 37 | def get_decision(self, state, t, T): 38 | obj_approx = {} 39 | for s in state._fields: 40 | mu, beta, N = getattr(state, s) 41 | sigma = 1 / sqrt(beta) 42 | obj_approx[s] = mu + self.theta * sigma 43 | 44 | optimal_decision = max(obj_approx, key=obj_approx.get) 45 | 46 | return {"choice": optimal_decision} 47 | 48 | 49 | class PureExploitation(SDPPolicy): 50 | def __init__(self, model: SDPModel, policy_name: str = "PureExploitation"): 51 | super().__init__(model, policy_name) 52 | 53 | def get_decision(self, state, t, T): 54 | obj_approx = {} 55 | for s in state._fields: 56 | mu, beta, N = getattr(state, s) 57 | obj_approx[s] = mu 58 | 59 | optimal_decision = max(obj_approx, key=obj_approx.get) 60 | 61 | return {"choice": optimal_decision} 62 | 63 | 64 | class PureExploration(SDPPolicy): 65 | def __init__(self, model: SDPModel, policy_name: str = "PureExploration", seed: int = 42): 66 | self.prng = np.random.RandomState(seed) 67 | super().__init__(model, policy_name) 68 | 69 | def get_decision(self, state, t, T): 70 | optimal_decision = self.prng.choice(state._fields) 71 | 72 | return {"choice": optimal_decision} 73 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Sequential Decision Problem Modeling Library 2 | 3 | This is a refactoring and evolution of the Sequential Decision Problem Modeling Library from Castle Lab, Princeton Univ. The goal is to make the problem code more structured, easily extendable, and more readable. 4 | 5 | The major changes are: 6 | - Introduction of abstract base classes `SDPModel` and `SDPPolicy` from which all sequential decision problems and policies inherit 7 | - Jupyter Notebook with plotly as frontend 8 | 9 | Furthermore, the code was cleaned up for readability and exercises were added to some of the Notebooks. 10 | 11 | ## Installation 12 | 13 | Requires Python 3 and the following packages: 14 | - numpy 15 | - scipy 16 | - pandas 17 | - plotly.express 18 | - yfinance (for AssetSelling) 19 | - osmnx (for StochasticShortestPath) 20 | - networkx (for StochasticShortestPath) 21 | 22 | ## Included Problem Models 23 | 24 | This is work in progress. For now, new models exist for 25 | - `AssetSelling` 26 | - `MedicalDecisionDiabetes` 27 | - `StochasticShortestPath_static` 28 | 29 | Further models will be added in the future. The other folders contain the models from the original repository [https://github.com/wbpowell328/stochastic-optimization]. 30 | 31 | There is an `ipynb`-file in each problem folder which is the starting point for running the models. 32 | -------------------------------------------------------------------------------- /StochasticShortestPath_Dynamic/Driver.py: -------------------------------------------------------------------------------- 1 | """ 2 | Stochastic Shortest Paths - Learning the costs 3 | Dynamic Model - search for the parameter theta, 4 | which represents the percentile of the distribution 5 | of each cost to use to make sure we get a penalty as 6 | small as possible. Run it using python command. 7 | 8 | Author: Andrei Graur 9 | """ 10 | 11 | from collections import namedtuple 12 | import math 13 | from copy import copy 14 | import numpy as np 15 | import matplotlib.pyplot as plt 16 | import pandas as pd 17 | import xlrd 18 | 19 | from Model import StaticModel 20 | from GraphGenerator import GraphGenerator 21 | from Policy import LookaheadPolicy 22 | 23 | 24 | if __name__ == "__main__": 25 | file = "Parameters.xlsx" 26 | seed = 189654913 27 | METRIC = "PERCENTILE" 28 | 29 | # Reading the algorithm pars 30 | parDf = pd.read_excel(file, sheet_name="Parameters") 31 | parDict = parDf.set_index("Index").T.to_dict("list") 32 | params = {key: v for key, value in parDict.items() for v in value} 33 | params["seed"] = seed 34 | theta_list = params["theta_cost_set"].split() 35 | 36 | print("Parameters ", params) 37 | 38 | # Initializing the network 39 | G = GraphGenerator(params) 40 | if params["networkType"] == "Steps": 41 | nTries = G.createNetworkSteps() 42 | else: 43 | nTries = G.createNetworkChance() 44 | 45 | print( 46 | "Created network in {} tries. From origin {} to destination {}. Number of steps is {} and the average cost is {:.2f}. The deadline to define lateness will be {:.2f}".format( 47 | nTries, 48 | G.start_node, 49 | G.end_node, 50 | G.steps, 51 | G.get_avg_cost_paths(), 52 | G.get_deadline(), 53 | ) 54 | ) 55 | # input("Press Enter to continue...") 56 | 57 | # Initializing the model 58 | state_names = ["node"] 59 | init_state = {"node": G.start_node} 60 | decision_names = ["nextNode"] 61 | 62 | M = StaticModel(state_names, decision_names, init_state, params, G) 63 | 64 | # Initialing the lists that will hold the results 65 | x = [] 66 | avgCostList = [] 67 | avgPenaltyList = [] 68 | avgStepsList = [] 69 | 70 | # Iterating over theta 71 | for theta in theta_list: 72 | theta = float(theta) 73 | M.start_new_theta(theta) 74 | x.append(theta) 75 | 76 | cost, penalty, steps = M.runTrials(params["nIterations"], G.get_deadline()) 77 | 78 | avgCostList.append(cost) 79 | avgPenaltyList.append(penalty) 80 | avgStepsList.append(steps) 81 | 82 | print( 83 | "Avg total cost with parameter {0} is {1:.3f}. Probability of being late is {2:.2f} and avg number of steps is {3:.2f}\n ".format( 84 | theta, cost, penalty, steps 85 | ) 86 | ) 87 | 88 | print("ThetaCost ", x) 89 | print("AvgCost ", avgCostList) 90 | print("ProbLateness ", avgPenaltyList) 91 | print("AvgSteps ", avgStepsList) 92 | 93 | # Ploting the results 94 | fig1, axsubs = plt.subplots(1, 2) 95 | fig1.suptitle( 96 | "Comparison of theta^cost - origin {}, destination {}, dist {} - deadline {} and number of iterations {}".format( 97 | M.G.start_node, 98 | M.G.end_node, 99 | M.G.steps, 100 | G.get_deadline(), 101 | params["nIterations"], 102 | ) 103 | ) 104 | 105 | axsubs[0].plot(x, avgCostList) 106 | axsubs[0].set_title("Average Cost") 107 | axsubs[0].set_xlabel("Percentile") 108 | axsubs[0].set_ylabel("$") 109 | 110 | axsubs[1].plot(x, avgPenaltyList) 111 | axsubs[1].set_title("Probability of being late (Risk) ") 112 | axsubs[1].set_xlabel("Percentile") 113 | axsubs[1].set_ylabel("%") 114 | 115 | plt.show() 116 | 117 | pass 118 | -------------------------------------------------------------------------------- /StochasticShortestPath_Dynamic/GraphGenerator.py: -------------------------------------------------------------------------------- 1 | """ 2 | This program generates a graph with 35 vertices that will be used for 3 | comparing the dynamic and static lookahead approaches on it. 4 | 5 | Run without any arguements. 6 | 7 | Author: Andrei Graur 8 | 9 | """ 10 | 11 | import numpy as np 12 | import networkx as nx 13 | import pandas as pd 14 | import math 15 | from collections import namedtuple, defaultdict 16 | 17 | 18 | class GraphGenerator: 19 | """ 20 | Base class for the static model 21 | """ 22 | 23 | def __init__(self, params): 24 | self.init_args = params 25 | self.prng = np.random.RandomState(params["seed"]) 26 | self.meanCosts = defaultdict(dict) 27 | self.dist = defaultdict(dict) 28 | self.spreads = defaultdict(dict) 29 | self.neighbors = defaultdict(list) 30 | self.vertices = [] 31 | 32 | # The start and end node will change based on the network graph that is going to be constructed - 33 | # we are going to select the pair with the longest shortest path 34 | self.start_node = 0 35 | self.end_node = 0 36 | self.steps = 0 37 | self.vertexCount = 1 38 | self.Horizon = self.vertexCount + 1 39 | self.mPathsList = [] 40 | self.nPaths = 0 41 | 42 | def createNetworkSteps(self): 43 | filename = "Network_Steps.xlsx" 44 | nSteps = self.init_args["nSteps"] 45 | 46 | G = nx.DiGraph() 47 | 48 | nodeCount = 0 49 | nodesPerLevel = defaultdict(list) 50 | midGraph = math.ceil(nSteps / 2) 51 | 52 | for level in range(nSteps): 53 | if level < midGraph: 54 | nNodes = level * 2 + 1 55 | else: 56 | nNodes = (nSteps - level - 1) * 2 + 1 57 | 58 | for i in range(nNodes): 59 | nodesPerLevel[level].append(nodeCount) 60 | nodeCount += 1 61 | 62 | for level in range(nSteps - 1): 63 | for i in nodesPerLevel[level]: 64 | G.add_node(i) 65 | edge_set = list( 66 | self.prng.choice( 67 | nodesPerLevel[level + 1], 68 | min(3, len(nodesPerLevel[level + 1])), 69 | replace=False, 70 | ) 71 | ) 72 | for j in edge_set: 73 | meanWeight = 1 74 | G.add_edge(i, j, weight=meanWeight) 75 | 76 | self.construct_network_objects(G, filename, 0, nodeCount - 1) 77 | 78 | return 1 79 | 80 | def get_deadline(self): 81 | return ( 82 | self.init_args["costMin"] 83 | + (self.init_args["costMax"] - self.init_args["costMin"]) 84 | * self.init_args["deadlinePerc"] 85 | ) * (self.steps) 86 | 87 | def get_avg_cost_paths(self, shouldPrintPaths=False): 88 | # Printing the length and the costs of all paths 89 | totalCostList = [] 90 | if shouldPrintPaths: 91 | print( 92 | "*************Printing the length and the costs of all paths************" 93 | ) 94 | p = 0 95 | for path in self.mPathsList: 96 | nSteps = len(path) 97 | totalCost = 0 98 | p += 1 99 | pathString = "Path {}: ".format(p) 100 | for n in range(nSteps - 1): 101 | fromNode = path[n] 102 | toNode = path[n + 1] 103 | totalCost += self.meanCosts[fromNode][toNode] 104 | # edge = " ({}, {}, {:.2f}, {:.2f}) ".format(fromNode,toNode,self.meanCosts[fromNode][toNode],totalCost) 105 | # pathString += edge 106 | pathString += " - {} steps and {:.2f} total mean cost".format( 107 | nSteps, totalCost 108 | ) 109 | totalCostList.append(totalCost) 110 | if shouldPrintPaths: 111 | print(pathString) 112 | avgTotalCost = np.array(totalCostList).mean() 113 | return avgTotalCost 114 | 115 | def construct_network_objects(self, G, filename, start_node, end_node): 116 | size = G.number_of_nodes() 117 | recordList = [] 118 | for fromNode in range(size): 119 | self.vertices.append(fromNode) 120 | 121 | for toNode in G.neighbors(fromNode): 122 | self.neighbors[fromNode].append(toNode) 123 | self.meanCosts[fromNode][toNode] = self.prng.uniform( 124 | self.init_args["costMin"], self.init_args["costMax"] 125 | ) 126 | self.spreads[fromNode][toNode] = self.prng.uniform( 127 | 0, self.init_args["maxSpreadPerc"] 128 | ) 129 | self.dist[fromNode][toNode] = 1 130 | 131 | record = ( 132 | fromNode, 133 | toNode, 134 | self.meanCosts[fromNode][toNode], 135 | self.spreads[fromNode][toNode], 136 | size, 137 | ) 138 | recordList.append(record) 139 | 140 | if self.init_args["printGraph"]: 141 | headerDf = ["From", "To", "Cost", "Spread", "Graph_size"] 142 | df = pd.DataFrame.from_records(recordList, columns=headerDf) 143 | df.to_excel(filename, sheet_name="Network", index=False) 144 | 145 | self.start_node = start_node 146 | self.end_node = end_node 147 | self.steps = nx.shortest_path_length(G, start_node, end_node) 148 | self.vertexCount = size 149 | self.Horizon = self.vertexCount + 1 150 | 151 | # We need to add the dummy link of cost 0 to the destination node 152 | r = self.end_node 153 | self.spreads[r][r] = 0 154 | self.neighbors[r].append(r) 155 | self.meanCosts[r][r] = 0 156 | self.dist[r][r] = 0 157 | 158 | self.mPathsList = list( 159 | nx.all_simple_paths(G, source=self.start_node, target=self.end_node) 160 | ) 161 | self.nPaths = len(self.mPathsList) 162 | 163 | def createNetworkChance(self): 164 | filename = "Network_Chance.xlsx" 165 | chance = self.init_args["edgeProb"] 166 | size = self.init_args["nNodes"] 167 | 168 | G = nx.DiGraph() 169 | nbIterations = 0 170 | done = 0 171 | 172 | while done == 0: 173 | for i in range(size): 174 | G.add_node(i) 175 | 176 | for i in range(size): 177 | for j in range(size): 178 | if self.prng.uniform() < chance: 179 | if i != j: 180 | meanWeight = 1 181 | G.add_edge(i, j, weight=meanWeight) 182 | 183 | maxLength = 0 184 | mSource = None 185 | mDest = None 186 | mPaths = 0 187 | mPathsList = [] 188 | 189 | breakLoop = False 190 | 191 | for i in range(size): 192 | for j in range(size): 193 | if nx.has_path(G, i, j): 194 | length = nx.shortest_path_length(G, i, j) 195 | if length >= maxLength: 196 | paths = list(nx.all_simple_paths(G, source=i, target=j)) 197 | nPaths = len(paths) 198 | 199 | if nPaths > mPaths: 200 | maxLength = length 201 | mSource = i 202 | mDest = j 203 | mPaths = nPaths 204 | mPathsList = paths 205 | 206 | if length > self.init_args["lengthThreshold"]: 207 | breakLoop = True 208 | break 209 | else: 210 | pass 211 | if breakLoop: 212 | break 213 | 214 | print( 215 | "Iteration {}, Source {}, Dest {}, Length {}, number of paths {}".format( 216 | nbIterations, mSource, mDest, maxLength, mPaths 217 | ) 218 | ) 219 | 220 | if ( 221 | maxLength > self.init_args["lengthThreshold"] 222 | and mPaths > self.init_args["numberPathsThreshold"] 223 | ): 224 | # the graph is good and we will use it and stop the loop 225 | done = 1 226 | self.construct_network_objects(G, filename, mSource, mDest) 227 | else: 228 | G.clear() 229 | nbIterations += 1 230 | 231 | return nbIterations + 1 232 | -------------------------------------------------------------------------------- /StochasticShortestPath_Dynamic/Model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Stochastic Shortest Paths - Dynamic 3 | Static Model 4 | 5 | The code implementing the basic model for the Static 6 | Version. This implements the class, do not try to run 7 | this code. Run the DynamicModel_main instead. 8 | 9 | Author: Andrei Graur 10 | 11 | """ 12 | from collections import namedtuple, defaultdict 13 | 14 | import math 15 | import numpy as np 16 | import pandas as pd 17 | import xlrd 18 | 19 | from Policy import LookaheadPolicy 20 | 21 | 22 | class StaticModel: 23 | """ 24 | Base class for the static model 25 | """ 26 | 27 | def __init__(self, state_names, x_names, s_0, params, G): 28 | """ 29 | Initializes the model 30 | 31 | :param state_names: list(str) - state variable dimension names 32 | :param x_names: list(str) - decision variable dimension names 33 | :param s_0: dict - contains the inital state information 34 | :param s_0[meanCosts]: dict- meanCosts[k][l] is the mean of the cost on the link k-l 35 | :param s_0[spreads]: dict - spreads[k][l] represents the spread of the distribution of 36 | cost on link k-l 37 | :param Horizon: int - the horizon over which we are looking ahead 38 | :param vertexCount - the number of nodes in our network 39 | :param seed: int - seed for random number generator 40 | """ 41 | 42 | self.init_args = params 43 | 44 | self.init_state = s_0 45 | self.state_names = state_names 46 | self.State = namedtuple("State", state_names) 47 | self.state = self.build_state(self.init_state) 48 | 49 | self.x_names = x_names 50 | self.Decision = namedtuple("Decision", x_names) 51 | 52 | self.G = G 53 | 54 | self.theta = 0.5 55 | self.n = 0 56 | self.time = 1 57 | self.obs = 1 58 | self.estimated_costs = defaultdict(dict) 59 | self.prng = np.random.RandomState(params["seed"]) 60 | 61 | def start_new_theta(self, theta): 62 | self.theta = theta 63 | self.estimated_costs = defaultdict(dict) 64 | self.n = 0 65 | self.obs = 1 66 | self.prng = np.random.RandomState(self.init_args["seed"]) 67 | print("*****************Reseting model for theta {:.2f}".format(self.theta)) 68 | 69 | def update_estimated_costs(self): 70 | for k in range(self.G.vertexCount): 71 | for l in self.G.neighbors[k]: 72 | m_hat = self.sample_from_uniform(k, l) 73 | alpha = self.get_step_size() 74 | if alpha < 1: 75 | self.estimated_costs[k][l] = (1 - alpha) * self.estimated_costs[k][ 76 | l 77 | ] + alpha * m_hat 78 | else: 79 | self.estimated_costs[k][l] = m_hat 80 | 81 | self.estimated_costs[self.G.end_node][self.G.end_node] = 0 82 | 83 | def sample_from_uniform(self, fromNode, toNode): 84 | spread = self.G.spreads[fromNode][toNode] 85 | deviation = ( 86 | self.prng.uniform(-spread, spread) * self.G.meanCosts[fromNode][toNode] 87 | ) 88 | m_hat = self.G.meanCosts[fromNode][toNode] + deviation 89 | return m_hat 90 | 91 | def get_step_size(self): 92 | # alpha = 1/self.n 93 | # alpha = 1./self.time 94 | alpha = 1.0 / self.obs 95 | return alpha 96 | 97 | def build_state(self, info): 98 | return self.State(*[info[k] for k in self.state_names]) 99 | 100 | def build_decision(self, info): 101 | return self.Decision(*[info[k] for k in self.x_names]) 102 | 103 | # exog_info_fn: function - returns the real experienced cost of traversing a link 104 | # from 'fromNode' to 'toNode' 105 | def exog_info_fn(self, fromNode, toNode): 106 | cost_hat = self.sample_from_uniform(fromNode, toNode) 107 | return cost_hat 108 | 109 | # transition_fn: function - updates the state within the model and returns new state 110 | def transition_fn(self, decision): 111 | self.state = self.build_state({"node": decision}) 112 | self.time += 1 113 | self.obs += 1 114 | 115 | # :param objective_fn: function - returns the cost we would experience by taking 'decision' 116 | # as our next node from the current state 'state' 117 | def objective_fn(self, decision): 118 | cost = self.exog_info_fn(self.state.node, decision) 119 | return cost 120 | 121 | """ 122 | the function for running trials; it simulates solving the problem a bunch of 123 | times (nbTrials times), then takes the squared mean of the costs incurred, 124 | and then returns that mean value 125 | """ 126 | 127 | def runTrials(self, nbTrials, deadline): 128 | # variables to store values along iterations 129 | totalPenalty = 0.0 130 | totalCost = 0.0 131 | totalTime = 0.0 132 | 133 | for i in range(nbTrials): 134 | self.state = self.build_state(self.init_state) 135 | self.time = 1 136 | self.n += 1 137 | cost = 0.0 138 | # print("Theta {:.2f} Iteration {}".format(self.theta,self.n)) 139 | 140 | # Following a path - the policy function is a lookahead 141 | while self.state.node != self.G.end_node: 142 | self.update_estimated_costs() 143 | P = LookaheadPolicy(self) 144 | decision = P.get_decision("PERCENTILE") 145 | # self.build_decision({'nextNode':decision}) 146 | stepCost = self.objective_fn(decision) 147 | cost += stepCost 148 | # print("\t Theta {:.2f}, Iteration {}, Time {}, CurrNode {}, Decision {}, Step Cost {:.2f} Cum Cost {:.2f}".format(self.theta,self.n,self.time,self.state.node,decision,stepCost,cost)) 149 | self.transition_fn(decision) 150 | 151 | # end of path calculations 152 | totalCost += cost 153 | if cost > deadline: 154 | # latenessSquared = (cost - deadline) ** 2 155 | latenessSquared = 1 156 | totalPenalty += latenessSquared 157 | else: 158 | latenessSquared = 0 159 | totalTime += self.time - 1 160 | print( 161 | "End of Theta {:.2f}, Iteration {}. Cost: {:.2f}, Lateness: {:.2f} and number of steps {}".format( 162 | self.theta, self.n, cost, math.sqrt(latenessSquared), self.time - 1 163 | ) 164 | ) 165 | 166 | # end of trials 167 | avgCost = totalCost / nbTrials 168 | avgPenalty = totalPenalty / nbTrials 169 | avgTime = totalTime / nbTrials 170 | 171 | return avgCost, avgPenalty, avgTime 172 | -------------------------------------------------------------------------------- /StochasticShortestPath_Dynamic/Network_Steps.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/djanka2/stochastic-optimization/8b654e10480705901a6e48e869e2e0a779e97933/StochasticShortestPath_Dynamic/Network_Steps.xlsx -------------------------------------------------------------------------------- /StochasticShortestPath_Dynamic/Parameters.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/djanka2/stochastic-optimization/8b654e10480705901a6e48e869e2e0a779e97933/StochasticShortestPath_Dynamic/Parameters.xlsx -------------------------------------------------------------------------------- /StochasticShortestPath_Dynamic/Policy.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | The code for the lookahead policy we use in our 4 | Static Model 5 | 6 | """ 7 | 8 | import numpy as np 9 | 10 | 11 | # the lookahead policy 12 | class LookaheadPolicy: 13 | def __init__(self, model): 14 | self.model = model 15 | 16 | # function returning the decision x_t from the current state 17 | # and current time. The argument decisions is given to 18 | # use a local variable rather than for getting outside information 19 | def get_decision(self, METRIC): 20 | # the matrix with decisions to be made for each node and each time 21 | decisions = [ 22 | ([0] * self.model.G.vertexCount) for row in range(self.model.G.Horizon + 1) 23 | ] 24 | 25 | # initialize the value costs at different nodes at different times to infinity 26 | V = np.ones((self.model.G.Horizon + 1, self.model.G.vertexCount)) * np.inf 27 | # make the costs at the destination 0 28 | for t_prime in range(self.model.G.Horizon + 1): 29 | V[t_prime][self.model.G.end_node] = 0 30 | 31 | # the algortihm that uses the "stepping backwards in time" method 32 | lookAheadTime = self.model.G.Horizon - 1 33 | while lookAheadTime >= 0: 34 | for k in range(self.model.G.vertexCount): 35 | # find the solutions to Bellman's eq. that are shown 36 | # in 5.22 and 5.23 37 | argMin = -1 38 | minVal = np.inf 39 | for l in self.model.G.neighbors[k]: 40 | if METRIC == "PERCENTILE": 41 | spread = self.model.G.spreads[k][l] 42 | mean = self.model.estimated_costs[k][l] 43 | if minVal >= V[lookAheadTime + 1][l] + self.use_percentile_val( 44 | self.model.theta, spread, mean 45 | ): 46 | argMin = l 47 | minVal = V[lookAheadTime + 1][l] + self.use_percentile_val( 48 | self.model.theta, spread, mean 49 | ) 50 | else: 51 | if minVal >= V[lookAheadTime + 1][l] + dist[k][l]: 52 | argMin = l 53 | minVal = V[lookAheadTime + 1][l] + dist[k][l] 54 | 55 | # updating the solutions to the equations 56 | V[lookAheadTime][k] = minVal 57 | decisions[lookAheadTime][k] = argMin 58 | lookAheadTime -= 1 59 | 60 | return decisions[0][self.model.state.node] 61 | 62 | """ 63 | the function that takes as arguments the percentile we are going to 64 | use, theta (espressed as a value in [0,1]), the spread for a link and 65 | the mean cost of that link and returns the value corresponding to 66 | the theta precentile of the interval [(1 - spread) * mean, (1 + spread) * mean] 67 | """ 68 | 69 | def use_percentile_val(self, theta, spread, mean): 70 | point_val = 1 - spread + (2 * spread) * theta 71 | used_cost = mean * point_val 72 | return used_cost 73 | -------------------------------------------------------------------------------- /StochasticShortestPath_Static/SSPStaticModel.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append("../") 4 | from BaseClasses.SDPModel import SDPModel 5 | import networkx as nx 6 | import numpy as np 7 | from collections import namedtuple 8 | 9 | 10 | class SSPStatic(SDPModel): 11 | def __init__( 12 | self, 13 | seed: int = 42, 14 | G: nx.Graph = None, 15 | origin: int = None, 16 | destination: int = None, 17 | edge_weight: str = "travel_time", 18 | lower_bound: float = 0.8, 19 | upper_bound: float = 2.0, 20 | T: int = 300, 21 | ) -> None: 22 | 23 | # Weighted digraph 24 | self.G = G 25 | self.origin = origin 26 | self.path = [self.origin] 27 | self.actual_costs = [] 28 | self.destination = destination 29 | self.edge_weight = edge_weight 30 | 31 | # Edge weight will follow a triangular distribution 32 | self.lower_bound = lower_bound 33 | self.upper_bound = upper_bound 34 | 35 | state_names = ["CurrentNode", "CurrentNodeLinkCosts"] 36 | decision_names = ["NextNode"] 37 | t0 = 0 38 | 39 | # Create random sample out of origin node 40 | self.prng = np.random.RandomState(seed) 41 | S0 = self.sample_initial_state() 42 | 43 | super().__init__(state_names, decision_names, S0, t0, T, seed) 44 | 45 | self.reset_VFA() 46 | 47 | # TODO: catch if meaningless parameters for graph are passed 48 | 49 | def sample_initial_state(self): 50 | # Create random link costs for origin node 51 | Decision = namedtuple("Decision", "NextNode") 52 | start_decision = Decision(self.origin) 53 | S0 = self.exog_info_fn(decision=start_decision) 54 | S0["CurrentNode"] = self.origin 55 | 56 | return S0 57 | 58 | def calc_path_length(self, list_of_nodes): 59 | path_length = 0.0 60 | current_node = list_of_nodes[0] 61 | for i in range(1, len(list_of_nodes)): 62 | path_length += self.G.edges[(current_node, list_of_nodes[i], 0)][self.edge_weight] 63 | current_node = list_of_nodes[i] 64 | return path_length 65 | 66 | def exog_info_fn(self, decision): 67 | cost_dict = {} 68 | i = decision.NextNode 69 | for edge in self.G.out_edges(i): 70 | 71 | # This would be a more realistic stochastic model: 72 | # Travel time is random, but proportional to nominal travel time of the edge. 73 | # edge_data = self.G.edges[edge + (0,)] 74 | # left = edge_data[self.edge_weight] * self.lower_bound 75 | # right = edge_data[self.edge_weight] * self.upper_bound 76 | # mode = edge_data[self.edge_weight] 77 | # if np.abs(left - right) < 1e-4: 78 | # cost_dict[edge] = mode 79 | 80 | # Just choose a random number between 0 and 20s according to a triangular distribution 81 | left = 0 82 | right = 10 83 | mode = 5 84 | 85 | cost_dict[edge] = self.prng.triangular(left=left, mode=mode, right=right) 86 | 87 | return {"CurrentNodeLinkCosts": cost_dict} 88 | 89 | def reset_VFA(self): 90 | # Initialize VFA with deterministic shortest paths from all nodes to target node 91 | shortest_path = nx.shortest_path(self.G, target=self.destination, weight=self.edge_weight) 92 | self.V_t = {} 93 | for node in self.G.nodes: 94 | if node in shortest_path: 95 | self.V_t[node] = self.calc_path_length(shortest_path[node]) 96 | else: 97 | self.V_t[node] = np.inf 98 | 99 | def reset(self, reset_prng: bool = False): 100 | # Note: VFA is not reset 101 | # Sample a new initial state on reset (random costs from starting node) 102 | S0 = self.sample_initial_state() 103 | self.initial_state = self.build_state(S0) 104 | super().reset(reset_prng) 105 | self.path = [self.origin] 106 | self.actual_costs = [] 107 | 108 | def is_finished(self): 109 | """ 110 | Check if the model run (episode) is finished. 111 | This is the case when we reached the destination or the maximum number of nodes have been visited. 112 | 113 | Returns: 114 | bool: True if the run is finished, False otherwise. 115 | """ 116 | if self.t == self.T: 117 | self.objective = np.nan 118 | 119 | return self.state.CurrentNode == self.destination or self.t >= self.T 120 | 121 | def transition_fn(self, decision, exog_info: dict): 122 | return {"CurrentNode": decision.NextNode} 123 | 124 | def objective_fn(self, decision, exog_info: dict): 125 | return self.state.CurrentNodeLinkCosts[(self.state.CurrentNode, decision.NextNode)] 126 | 127 | def update_VFA(self, vhat, alpha): 128 | self.V_t[self.state.CurrentNode] = (1 - alpha) * self.V_t[self.state.CurrentNode] + alpha * vhat 129 | return self.V_t[self.state.CurrentNode] 130 | -------------------------------------------------------------------------------- /StochasticShortestPath_Static/SSPStaticPolicy.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append("../") 4 | from BaseClasses import SDPModel 5 | from BaseClasses.SDPModel import SDPModel 6 | from BaseClasses.SDPPolicy import SDPPolicy 7 | 8 | 9 | class SSPStaticPolicy(SDPPolicy): 10 | def __init__(self, model: SDPModel, policy_name: str = "", theta_step: float = 1.0): 11 | self.theta_step = theta_step 12 | super().__init__(model, policy_name) 13 | 14 | def get_decision(self, state, t, T): 15 | i = state.CurrentNode 16 | costs = { 17 | j: state.CurrentNodeLinkCosts[(i, j)] + self.model.V_t[j] for j in self.model.G.successors(i) 18 | } 19 | 20 | next_node = min(costs, key=costs.get) 21 | 22 | return {"NextNode": next_node} 23 | 24 | def train_value_function_paths(self, n_iterations: int = 1): 25 | 26 | V_t_origin = [] 27 | for i in range(n_iterations): 28 | self.model.reset() 29 | 30 | V_t_origin.append(self.model.V_t[self.model.origin]) 31 | 32 | # Create one path with the current value function approximation 33 | self.run_policy(1) 34 | 35 | # Read sampled costs from path 36 | k = len(self.model.path) - 1 37 | vhats = {self.model.path[k]: 0.0} 38 | while k > 0: 39 | k -= 1 40 | vhats[self.model.path[k]] = self.model.actual_costs[k] + vhats[self.model.path[k + 1]] 41 | 42 | # Update value function approximations for nodes on the path 43 | alpha = self.theta_step / (self.theta_step + i) 44 | for node in vhats.keys(): 45 | self.model.V_t[node] = (1 - alpha) * self.model.V_t[node] + alpha * vhats[node] 46 | 47 | def train_value_function(self, n_iterations: int = 1): 48 | 49 | V_t_origin = [] 50 | for i in range(n_iterations): 51 | alpha = self.theta_step / (self.theta_step + i) 52 | self.model.reset() 53 | 54 | V_t_origin.append(self.model.V_t[self.model.origin]) 55 | 56 | while self.model.is_finished() is False: 57 | state_t = self.model.state 58 | decision_t = self.model.build_decision(self.get_decision(state_t, self.model.t, self.model.T)) 59 | 60 | actual_costs = state_t.CurrentNodeLinkCosts[(state_t.CurrentNode, decision_t.NextNode)] 61 | vhat = actual_costs + self.model.V_t[decision_t.NextNode] 62 | 63 | self.model.update_VFA(vhat, alpha) 64 | self.model.step(decision_t) 65 | 66 | return V_t_origin 67 | -------------------------------------------------------------------------------- /TwoNewsvendor/Parameters.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/djanka2/stochastic-optimization/8b654e10480705901a6e48e869e2e0a779e97933/TwoNewsvendor/Parameters.xlsx -------------------------------------------------------------------------------- /TwoNewsvendor/TwoNewsvendor.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | This program implements the basic model for the two newsvendor problem. 4 | This code does not belong to the driverscript 5 | 6 | 7 | """ 8 | from collections import namedtuple 9 | 10 | import numpy as np 11 | import pandas as pd 12 | import math 13 | import xlrd 14 | 15 | 16 | class Exogenous_Information(): 17 | 18 | def __init__(self, params): 19 | self.init_args = params 20 | self.prng = np.random.RandomState(params['seed']) 21 | self.n=0 22 | self.demand = None 23 | self.estimate_field = None 24 | self.estimate_central = None 25 | 26 | 27 | 28 | def generate_New_Round(self): 29 | self.n = self.n + 1 30 | self.demand = int(round(self.prng.uniform(self.init_args['dem_lower_bound'], self.init_args['dem_upper_bound']))) 31 | 32 | self.estimate_field = max(0,int(round(self.demand + self.prng.normal(loc = self.init_args['est_bias_field'], scale = self.init_args['est_std_field'])))) 33 | self.estimate_central = max(0,int(round(self.demand + self.prng.normal(loc = self.init_args['est_bias_central'], scale = self.init_args['est_std_central'])))) 34 | 35 | 36 | def get_Estimate_Field(self): 37 | return self.estimate_field 38 | 39 | def get_Estimate_Central(self): 40 | return self.estimate_central 41 | 42 | def get_Demand(self): 43 | return self.demand 44 | 45 | def get_Round_Number(self): 46 | return self.n 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | class Model_Field(): 55 | """ 56 | Base class for model 57 | """ 58 | 59 | def __init__(self, state_names, x_names, s_0, params): 60 | """ 61 | Initializes the model 62 | 63 | :param state_names: list(str) - state variable dimension names 64 | :param x_names: list(str) - decision variable dimension names 65 | :param s_0: dict - contains the information needed to populate the state names 66 | with the initial state values 67 | :params params: other initial information such as unit costs 68 | for overage or underage and the smoothing constants 69 | """ 70 | 71 | self.init_args = params 72 | self.prng = np.random.RandomState(params['seed']) 73 | self.init_state = s_0 74 | self.state_names = state_names 75 | self.x_names = x_names 76 | self.State = namedtuple('State', state_names) 77 | self.Decision = namedtuple('Decision', x_names) 78 | self.pen_incurred = 0 79 | 80 | self.state = self.build_state(self.init_state) 81 | self.decision = None 82 | self.n=0 83 | self.exog_info = {} 84 | 85 | 86 | 87 | def resetModel(self,theta): 88 | 89 | self.state = self.build_state(self.init_state) 90 | self.decision = None 91 | self.n=0 92 | self.exog_info = {} 93 | 94 | 95 | 96 | def build_state(self, info): 97 | return self.State(*[info[k] for k in self.state_names]) 98 | 99 | def build_decision(self, info): 100 | self.decision = self.Decision(*[info[k] for k in self.x_names]) 101 | return self.decision 102 | 103 | def exog_info_fn(self, decision_central, demand): 104 | exog_info = [] 105 | exog_info.append(decision_central) 106 | exog_info.append(demand) 107 | return exog_info 108 | 109 | def get_alpha_bias(self): 110 | return self.init_args['alpha_bias'] 111 | 112 | def updateState(self,estimate): 113 | state_dict = self.state._asdict() 114 | state_dict['estimate']=estimate 115 | self.state = self.build_state(state_dict) 116 | 117 | 118 | 119 | 120 | 121 | def transition_fn(self, exog_info): 122 | 123 | self.n +=1 124 | 125 | state_dict = self.state._asdict() 126 | 127 | source_bias = self.state.estimate - exog_info['demand'] 128 | central_bias = exog_info['allocated_quantity'] - self.decision.quantity_requested 129 | 130 | for state_desc in ['central_bias','source_bias']: 131 | state_dict[state_desc] = (1 - self.get_alpha_bias()) * state_dict[state_desc] + self.get_alpha_bias() * eval(state_desc) 132 | 133 | self.state = self.build_state(state_dict) 134 | 135 | 136 | def objective_fn(self, exog_info): 137 | allocated = exog_info['allocated_quantity'] 138 | demand = exog_info['demand'] 139 | self.pen_incurred = (self.init_args['o_field'] * max(allocated - demand, 0) + 140 | self.init_args['u_field'] * max(demand - allocated, 0)) 141 | return -self.pen_incurred 142 | 143 | def showState(self,state_desc): 144 | return getattr(self.state,state_desc) 145 | 146 | 147 | class Model_Central(): 148 | """ 149 | Base class for model 150 | """ 151 | 152 | def __init__(self, state_names, x_names, s_0, params): 153 | """ 154 | Initializes the model 155 | 156 | :param state_names: list(str) - state variable dimension names 157 | :param x_names: list(str) - decision variable dimension names 158 | :param s_0: dict - contains the information needed to populate the state names 159 | with the initial state values and other initial information such as unit costs 160 | for overage or underage and the smoothing constants 161 | :param seed: int - seed for random number generator 162 | """ 163 | 164 | self.init_args = params 165 | self.prng = np.random.RandomState(self.init_args['seed']) 166 | self.init_state = s_0 167 | self.state_names = state_names 168 | self.x_names = x_names 169 | self.State = namedtuple('State', state_names) 170 | self.Decision = namedtuple('Decision', x_names) 171 | self.pen_incurred =0 172 | 173 | self.state = self.build_state(self.init_state) 174 | self.decision = None 175 | self.n=0 176 | self.beta_field = 0 177 | self.beta_source = 0 178 | self.delta_field = 0 179 | self.delta_source = 0 180 | self.lambda_field = 0 181 | self.lambda_source = 0 182 | 183 | def resetModel(self,theta): 184 | 185 | self.state = self.build_state(self.init_state) 186 | self.decision = None 187 | self.n=0 188 | self.beta_field = 0 189 | self.beta_source = 0 190 | self.delta_field = 0 191 | self.delta_source = 0 192 | self.lambda_field = 0 193 | self.lambda_source = 0 194 | 195 | 196 | 197 | def build_state(self, info): 198 | return self.State(*[info[k] for k in self.state_names]) 199 | 200 | def build_decision(self, info): 201 | self.decision = self.Decision(*[info[k] for k in self.x_names]) 202 | return self.decision 203 | 204 | def exog_info_fn(self, req_quantity, demand): 205 | return demand 206 | 207 | def updateState(self,field_request,estimate): 208 | state_dict = self.state._asdict() 209 | state_dict['field_request']=field_request 210 | state_dict['estimate']=estimate 211 | self.state = self.build_state(state_dict) 212 | 213 | def get_alpha_bias(self): 214 | return self.init_args['alpha_bias'] 215 | 216 | def get_alpha_learning(self): 217 | return self.init_args['alpha_learning'] 218 | 219 | 220 | 221 | def transition_fn(self, exog_info): 222 | 223 | self.n +=1 224 | 225 | state_dict = self.state._asdict() 226 | 227 | 228 | field_bias = self.state.field_request - exog_info['demand'] 229 | source_bias = self.state.estimate - exog_info['demand'] 230 | 231 | 232 | self.beta_field = (1 - self.get_alpha_learning()) * self.beta_field + self.get_alpha_learning() * (field_bias - state_dict['field_bias']) 233 | self.beta_source = (1 - self.get_alpha_learning()) * self.beta_source + self.get_alpha_learning() * (source_bias - state_dict['source_bias']) 234 | 235 | self.delta_field = (1 - self.get_alpha_learning()) * self.delta_field + self.get_alpha_learning() * ((field_bias - state_dict['field_bias'])**2) 236 | self.delta_source = (1 - self.get_alpha_learning()) * self.delta_source + self.get_alpha_learning() * ((source_bias - state_dict['source_bias'])**2) 237 | 238 | self.var_field = (self.delta_field-(self.beta_field**2))/(1-self.lambda_field) 239 | self.var_source = (self.delta_source-(self.beta_source**2))/(1-self.lambda_source) 240 | 241 | dem_field = self.var_field + (self.beta_field)**2 242 | dem_source = self.var_source + (self.beta_field)**2 243 | 244 | if dem_field < 0.001: 245 | field_w = 1 246 | source_w = 0 247 | elif dem_source < 0.001: 248 | field_w = 0 249 | source_w = 1 250 | else: 251 | field_w = 1/dem_field 252 | source_w = 1/dem_source 253 | 254 | sum_w = field_w + source_w 255 | 256 | state_dict['field_weight'] = field_w/sum_w 257 | state_dict['source_weight'] = source_w/sum_w 258 | 259 | state_dict['field_bias_hat'] = field_bias 260 | 261 | 262 | if self.n > 1: 263 | self.lambda_field = ((1 - self.get_alpha_bias())**2)*self.lambda_field + self.get_alpha_bias()**2 264 | self.lambda_source = ((1 - self.get_alpha_bias())**2)*self.lambda_source + self.get_alpha_bias()**2 265 | else: 266 | self.lambda_field = self.get_alpha_bias() 267 | self.lambda_source = self.get_alpha_bias() 268 | 269 | for state_desc in ['field_bias','source_bias']: 270 | state_dict[state_desc] = (1 - self.get_alpha_bias()) * state_dict[state_desc] + self.get_alpha_bias() * eval(state_desc) 271 | 272 | self.state = self.build_state(state_dict) 273 | 274 | 275 | 276 | def objective_fn(self, exog_info): 277 | allocated = exog_info['allocated_quantity'] 278 | demand = exog_info['demand'] 279 | self.pen_incurred = (self.init_args['o_central'] * max(allocated - demand, 0) + 280 | self.init_args['u_central'] * max(demand - allocated, 0)) 281 | return -self.pen_incurred 282 | 283 | def showState(self,state_desc): 284 | return getattr(self.state,state_desc) 285 | 286 | 287 | 288 | 289 | -------------------------------------------------------------------------------- /TwoNewsvendor/TwoNewsvendorLearning.py: -------------------------------------------------------------------------------- 1 | """ 2 | Two Newsvendor as a Learning Problem 3 | 4 | Author: Andrei Graur 5 | 6 | This program implements a model for the two newsvendor 7 | problem where the field agent and the central command 8 | both view the problem of choosing the right bias to add 9 | or substract as a learning problem. Run the code with the 10 | python command, no arguments given. 11 | 12 | """ 13 | 14 | import numpy as np 15 | import pandas as pd 16 | import math 17 | import xlrd 18 | 19 | from TwoNewsvendor import Model_Field 20 | from TwoNewsvendor import Model_Central 21 | 22 | # the class implementing the objects that represent the 23 | # avaiable choices of the two agents, which are the biases to add 24 | class Choice: 25 | def __init__(self, quantity, util_estimate, W_precision_estimate, theta, nu_bar = 0.5): 26 | ''' 27 | The function that initializes the choice object 28 | 29 | param: quantity - int: the quantity in units equal to the bias 30 | param: util_estimate - float: the estimate of what the utility will 31 | be when we use this bias; we initialize it to 0 in main 32 | param: precision_estimate - float: the estimate of what the 33 | precision of next experiment of using this bias will be 34 | param: theta - float: the tunable parameter. It can be for the UCB policy or for the IE polidy 35 | ''' 36 | 37 | self.n = 0 38 | 39 | self.quantity = quantity 40 | self.util_estimate = util_estimate 41 | self.accumulated_precision = W_precision_estimate 42 | 43 | self.theta = theta 44 | 45 | 46 | #Variables to compute the variance of W 47 | self.W_precision = W_precision_estimate 48 | self.W_variance = 1 / float(self.W_precision) 49 | 50 | self.nu_bar = nu_bar 51 | self.W_bar = util_estimate 52 | self.W_beta = 0 53 | self.W_delta = 0 54 | self.W_lambda = 0 55 | self.nu = 1 56 | 57 | 58 | 59 | 60 | 61 | 62 | # the function that uploads the results of the experiment of trying 63 | # this bias and updates the corresponding beliefs about this choice 64 | def upload_results(self, W): 65 | self.n += 1 66 | 67 | self.nu = (self.nu)/(1+self.nu-self.nu_bar) 68 | self.W_beta = (1-self.nu)*self.W_beta + self.nu * (W - self.W_bar) 69 | self.W_delta = (1-self.nu)*self.W_delta + self.nu * ((W - self.W_bar)**2) 70 | 71 | 72 | # update the variance 73 | if self.n > 1: 74 | #self.W_variance = (((self.n - 2.0) / float(self.n - 1)) * self.W_variance +(1.0 / self.n) * ((W - self.util_estimate) ** 2)) 75 | 76 | self.W_variance = (self.W_delta - (self.W_beta**2))/(1+self.W_lambda) 77 | if self.W_variance < 0.0001: 78 | self.W_precision = 10 79 | else: 80 | self.W_precision = 1 / float(self.W_variance) 81 | 82 | alpha = self.W_precision / (self.accumulated_precision + self.W_precision) 83 | 84 | if self.n >1: 85 | self.W_lambda = ((1-alpha)**2)*self.W_lambda + (alpha)**2 86 | else: 87 | self.W_lambda = (alpha)**2 88 | 89 | self.W_bar = (1-alpha)*self.W_bar + alpha*W 90 | 91 | 92 | # update estimate and experiment precision 93 | self.util_estimate = ((self.util_estimate * self.accumulated_precision + 94 | W * self.W_precision) / 95 | (self.accumulated_precision + self.W_precision)) 96 | self.accumulated_precision += self.W_precision 97 | 98 | 99 | # the function that returns the bias attribute of this object 100 | def get_choice_quantity(self): 101 | return self.quantity 102 | 103 | # the cost function approximation for this choice of bias 104 | def get_UCB_value(self, time): 105 | if self.n == 0: 106 | UCB_val = np.inf 107 | 108 | else: 109 | UCB_val = (self.util_estimate + self.theta * math.sqrt(math.log(time) / self.n)) 110 | return UCB_val 111 | 112 | def get_IE_value(self): 113 | 114 | IE_val = (self.util_estimate + self.theta * math.sqrt(1/self.accumulated_precision)) 115 | return IE_val 116 | 117 | def get_nb_experiments(self): 118 | return self.n 119 | 120 | def getAllParametersHeaderList(self): 121 | outL="bias_choice n mu_bar_estimate Beta sigma IE_value UCB_value W_nu W_beta W_delta W_variance alpha W_bar W_lambda W_precision " 122 | return outL.split() 123 | 124 | def getAllParametersList(self,time): 125 | outL="{:.2f} {} {:.2f} {:.2f} {:.2f} {:.2f} {:.2f} {:.2f} {:.2f} {:.2f} {:.2f} {:.2f} {:.2f} {:.2f} ".format(self.quantity,self.n,self.util_estimate,self.accumulated_precision,math.sqrt(1/self.accumulated_precision),self.get_IE_value(),self.get_UCB_value(time),self.nu,self.W_beta,self.W_delta,self.W_variance,self.getOSA(),self.W_bar,self.W_lambda,self.W_precision) 126 | return outL.split() 127 | 128 | def getMainParametersHeaderList(self): 129 | outL="bias_choice n mu_bar_estimate Beta sigma W_bar W_precision W_variance " 130 | return outL.split() 131 | 132 | def getMainParametersList(self): 133 | return [self.quantity,self.n,self.util_estimate,self.accumulated_precision,math.sqrt(1/self.accumulated_precision),self.W_bar,self.W_precision,self.W_variance] 134 | 135 | def printChoiceParameters(self,n): 136 | valuesList = self.getAllParametersList(n) 137 | headerList = self.getAllParametersHeaderList() 138 | outStr="" 139 | for i in range(len(valuesList)): 140 | outStr += "{}: {}, ".format(headerList[i],valuesList[i]) 141 | return outStr 142 | 143 | # the model for the field agent treating the problem as a 144 | # learning problem 145 | class Learning_model_field(Model_Field): 146 | def __init__(self, theta, *args, **kwargs): 147 | super(Learning_model_field, self).__init__(*args, **kwargs) 148 | range_list = self.init_args['bias_interval_field'].split(",") 149 | range_list = [int(e) for e in range_list] 150 | self.choice_range =range(range_list[0],range_list[1]+1) 151 | self.resetModel(theta) 152 | 153 | def resetModel(self,theta): 154 | self.choices = {} 155 | for value in self.choice_range: 156 | 157 | self.choices[value] = Choice(value, 0, 0.01, theta) 158 | 159 | super(Learning_model_field, self).resetModel(None) 160 | 161 | 162 | 163 | # the new transition function for the learning approach 164 | def transition_fn(self, exog_info): 165 | 166 | # update the results of having tried out the used choice 167 | choice_used = self.choices[self.decision.bias_applied] 168 | 169 | #print("Field Choice state pre update") 170 | #outStr = choice_used.printChoiceParameters(self.n+1) 171 | #print(outStr) 172 | 173 | choice_used.upload_results(-self.pen_incurred) 174 | # update beliefs about the external source 175 | super(Learning_model_field, self).transition_fn(exog_info) 176 | 177 | #print("Field Choice state post update") 178 | #outStr = choice_used.printChoiceParameters(self.n+1) 179 | #print(outStr) 180 | 181 | def getMainParametersList(self): 182 | listPar = [self.choices[x].getMainParametersList() for x in self.choice_range] 183 | listParFlat = [elem for l in listPar for elem in l] 184 | return listParFlat 185 | 186 | def getMainParametersHeaderList(self): 187 | listPar = [self.choices[x].getMainParametersHeaderList() for x in self.choice_range] 188 | listParFlat = [str(x)+"_field_"+elem for x,l in zip(self.choice_range,listPar) for elem in l] 189 | return listParFlat 190 | 191 | def getMainParametersDf(self): 192 | dictPar = {x:self.choices[x].getMainParametersList() for x in self.choice_range} 193 | pdPar = pd.DataFrame(dictPar) 194 | pdPar = pdPar.transpose() 195 | pdPar.columns = self.choices[self.choice_range[0]].getMainParametersHeaderList() 196 | 197 | print(pdPar) 198 | return pdPar 199 | 200 | 201 | 202 | # the model for the central command treating the problem as a 203 | # learning problem 204 | class Learning_model_central(Model_Central): 205 | def __init__(self, theta, *args, **kwargs): 206 | super(Learning_model_central, self).__init__(*args, **kwargs) 207 | range_list = self.init_args['bias_interval_central'].split(",") 208 | range_list = [int(e) for e in range_list] 209 | self.choice_range=range(range_list[0],range_list[1]+1) 210 | self.resetModel(theta) 211 | 212 | def resetModel(self,theta): 213 | self.choices = {} 214 | for value in self.choice_range: 215 | 216 | self.choices[value] = Choice(value, 0, 0.01, theta) 217 | 218 | super(Learning_model_central, self).resetModel(None) 219 | 220 | 221 | def transition_fn(self, exog_info): 222 | # update the results of having tried out the used choice 223 | choice_used = self.choices[self.decision.bias_applied] 224 | 225 | #print("Central Choice state pre update") 226 | #outStr = choice_used.printChoiceParameters(self.n+1) 227 | #print(outStr) 228 | 229 | choice_used.upload_results(-self.pen_incurred) 230 | # update beliefs about the external source 231 | super(Learning_model_central, self).transition_fn(exog_info) 232 | 233 | #print("Central Choice state pos update - W = {:.2f}".format(-self.pen_incurred)) 234 | #outStr = choice_used.printChoiceParameters(self.n+1) 235 | #print(outStr) 236 | 237 | def getMainParametersList(self): 238 | listPar = [self.choices[x].getMainParametersList() for x in self.choice_range] 239 | listParFlat = [elem for l in listPar for elem in l] 240 | return listParFlat 241 | 242 | def getMainParametersHeaderList(self): 243 | listPar = [self.choices[x].getMainParametersHeaderList() for x in self.choice_range] 244 | listParFlat = [str(x)+"_central_"+elem for x,l in zip(self.choice_range,listPar) for elem in l] 245 | return listParFlat 246 | 247 | def getMainParametersDf(self): 248 | dictPar = {x:self.choices[x].getMainParametersList() for x in self.choice_range} 249 | pdPar = pd.DataFrame(dictPar) 250 | pdPar = pdPar.transpose() 251 | pdPar.columns = self.choices[self.choice_range[0]].getMainParametersHeaderList() 252 | 253 | print(pdPar) 254 | return pdPar 255 | 256 | 257 | 258 | 259 | -------------------------------------------------------------------------------- /TwoNewsvendor/TwoNewsvendorPolicy.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 3 | The policy for the two agent newsvendor game. 4 | 5 | ''' 6 | 7 | from TwoNewsvendor import Model_Field 8 | from TwoNewsvendor import Model_Central 9 | 10 | import numpy as np 11 | import math 12 | 13 | import matplotlib.pyplot as plt 14 | 15 | def printTuple(a): 16 | printStr = "" 17 | for f in a._fields: 18 | printStr += " {}: {:.2f}".format(f,getattr(a, f)) 19 | return printStr 20 | 21 | def printTupleValues(a): 22 | printStr = "" 23 | for f in a._fields: 24 | printStr += "{:.2f} ".format(getattr(a, f)) 25 | return printStr 26 | 27 | 28 | def formatFloatList(L,p): 29 | sFormat = "{{:.{}f}} ".format(p) * len(L) 30 | outL = sFormat.format(*L) 31 | return outL.split() 32 | 33 | 34 | 35 | def plot_heat_map(ax,contribution_dict, params,theta_field_values, theta_central_values,titleString,player_sorted_by_value): 36 | """ 37 | this function plots a heat map 38 | 39 | 40 | """ 41 | 42 | # 43 | textcolors=["black", "white"] 44 | 45 | contribution_values = [contribution_dict[(theta_field,theta_central)] for theta_central in theta_central_values for theta_field in theta_field_values] 46 | contributions = np.array(contribution_values) 47 | increment_count = len(theta_field_values) 48 | contributions = np.reshape(contributions, (-1, increment_count)) 49 | 50 | 51 | 52 | 53 | im = ax.imshow(contributions, cmap='hot',origin='lower',aspect='auto',alpha=.9) 54 | threshold = im.norm(contributions.max())/2 55 | # create colorbar 56 | cbar = ax.figure.colorbar(im, ax=ax) 57 | # cbar.ax.set_ylabel(cbarlabel, rotation=-90, va="bottom") 58 | # we want to show all ticks... 59 | ax.set_xticks(np.arange(len(theta_field_values))) 60 | ax.set_yticks(np.arange(len(theta_central_values))) 61 | # ... and label them with the respective list entries 62 | ax.set_xticklabels(theta_field_values) 63 | ax.set_yticklabels(theta_central_values) 64 | # rotate the tick labels and set their alignment. 65 | #plt.setp(ax.get_xticklabels(), rotation=45, ha="right",rotation_mode="anchor") 66 | ax.set_title(titleString) 67 | 68 | for rank_player,dict_entry_player in enumerate(player_sorted_by_value): 69 | 70 | y_ind = [i for i,y in enumerate(theta_central_values) if dict_entry_player[0][1] == y] 71 | x_ind = [i for i,x in enumerate(theta_field_values) if dict_entry_player[0][0] == x] 72 | 73 | text = ax.text(x_ind[0], y_ind[0], "{}\n {:.0f}".format(rank_player,dict_entry_player[1][-1]), ha="center", va="center", color=textcolors[im.norm(dict_entry_player[1][-1]) < threshold],fontsize=7) 74 | 75 | #text = ax.text(x_ind[0], y_ind[0], "{}_{}".format(dict_entry_player[0][0], dict_entry_player[0][1]), ha="center", va="center", color=textcolors[im.norm(dict_entry_player[1][-1]) < threshold],fontsize=7) 76 | 77 | if params['policy_central']=='regular' or params['policy_central']=='punishing': 78 | ax.set_ylabel(r'$bias^{central}$',fontsize=14) 79 | elif 'learning' in params['policy_central']: 80 | ax.set_ylabel(r'$\theta^{central}$',fontsize=14) 81 | 82 | 83 | if params['policy_field']=='regular': 84 | ax.set_xlabel(r'$bias^{field}$',fontsize=14) 85 | elif 'learning' in params['policy_field']: 86 | ax.set_xlabel(r'$\theta^{field}$',fontsize=14) 87 | 88 | 89 | 90 | 91 | #fig.tight_layout() 92 | 93 | return True 94 | 95 | 96 | def create_theta_grid(params): 97 | 98 | #Field 99 | if params['policy_field']=='regular': 100 | range_list = params['bias_interval_field'].split(",") 101 | range_list = [int(e) for e in range_list] 102 | theta_field_list=list(range(range_list[0],range_list[1]+1)) 103 | 104 | elif 'learning' in params['policy_field']: 105 | if isinstance(params['theta_set_field'], str): 106 | theta_field_list = params['theta_set_field'].split(",") 107 | theta_field_list = [float(e) for e in theta_field_list] 108 | else: 109 | theta_field_list = [float(params['theta_set_field'])] 110 | 111 | #Central 112 | if params['policy_central']=='regular' or params['policy_central']=='punishing': 113 | range_list = params['bias_interval_central'].split(",") 114 | range_list = [int(e) for e in range_list] 115 | theta_central_list=list(range(range_list[0],range_list[1]+1)) 116 | 117 | elif 'learning' in params['policy_central']: 118 | if isinstance(params['theta_set_central'], str): 119 | theta_central_list = params['theta_set_central'].split(",") 120 | theta_central_list = [float(e) for e in theta_central_list] 121 | else: 122 | theta_central_list = [float(params['theta_set_central'])] 123 | 124 | theta_grid = [] 125 | for theta_field in theta_field_list: 126 | for theta_central in theta_central_list: 127 | theta_grid.append((theta_field,theta_central)) 128 | 129 | return theta_grid,theta_field_list,theta_central_list 130 | 131 | 132 | def run_policies(ite,record_budget,params,exog_info_gen,theta_field,theta_central,M_field,P_field,M_central,P_central): 133 | 134 | cost_ite_field = [] 135 | cost_ite_central = [] 136 | 137 | accum_util_field = 0 138 | accum_util_central = 0 139 | 140 | accum_request_field = 0 141 | accum_allocated_central=0 142 | 143 | record_sample_ite = [params['policy_field'],params['policy_central'],"{}_{}".format(theta_field,theta_central),ite] 144 | 145 | for n in range(params['N']): 146 | #Generate exogenous info - estimates and demand - but we are not observing the demand 147 | exog_info_gen.generate_New_Round() 148 | #print("Round {} - Estimate for the field {}, estimate for central {} and true demand {}".format(exog_info_gen.get_Round_Number(),exog_info_gen.get_Estimate_Field(),exog_info_gen.get_Estimate_Central(),exog_info_gen.get_Demand())) 149 | record_sample_t = [n,exog_info_gen.get_Round_Number(),exog_info_gen.get_Estimate_Field(),exog_info_gen.get_Estimate_Central(),exog_info_gen.get_Demand()] 150 | 151 | #Field updates its state variable with an estimate 152 | M_field.updateState(exog_info_gen.get_Estimate_Field()) 153 | #print("Field State {}".format(printTuple(M_field.state))) 154 | record_sample_t += list(M_field.state) 155 | 156 | #Field makes a decision 157 | field_request,bias_field = P_field.getDecision(M_field) 158 | M_field.build_decision({'quantity_requested': field_request,'bias_applied':bias_field}) 159 | accum_request_field += field_request 160 | #print("Field Decision {}".format(printTuple(M_field.decision))) 161 | record_sample_t += list(M_field.decision) 162 | 163 | #Central updates its state with field request and (possibly) an external estimate 164 | M_central.updateState(field_request,exog_info_gen.get_Estimate_Central()) 165 | #print("Central State {}".format(printTuple(M_central.state))) 166 | record_sample_t += list(M_central.state) 167 | 168 | #Central makes a decision 169 | decision_central,bias_central = P_central.getDecision(M_central) 170 | M_central.build_decision({'quantity_allocated': decision_central,'bias_applied':bias_central}) 171 | accum_allocated_central += decision_central 172 | #print("Central Decision {}".format(printTuple(M_central.decision))) 173 | record_sample_t += list(M_central.decision) 174 | 175 | #True demand is revelead 176 | demand = exog_info_gen.get_Demand() 177 | exog_info_pos_dec = {'allocated_quantity': decision_central, 'demand': demand} 178 | 179 | #Costs/penalties for field and central are computed 180 | util_field = M_field.objective_fn(exog_info_pos_dec) 181 | util_central = M_central.objective_fn(exog_info_pos_dec) 182 | #print("Field utility {:.2f} - Central utility {:.2f}".format(util_field,util_central)) 183 | 184 | accum_util_field += util_field 185 | accum_util_central += util_central 186 | 187 | #record_sample_t += formatFloatList([util_field,accum_util_field,util_central,accum_util_central],2) 188 | util_company = util_field + util_central 189 | accum_util_company = accum_util_field + accum_util_central 190 | 191 | record_sample_t += [util_field,accum_util_field,util_central,accum_util_central,util_company,accum_util_company] 192 | 193 | 194 | cost_ite_field.append(accum_util_field) 195 | cost_ite_central.append(accum_util_central) 196 | 197 | 198 | 199 | #Field and Central transition to next round updating all the stats 200 | M_field.transition_fn(exog_info_pos_dec) 201 | M_central.transition_fn(exog_info_pos_dec) 202 | 203 | if "learning" in params['policy_field']: 204 | record_sample_t += M_field.getMainParametersList() 205 | if "learning" in params['policy_central']: 206 | record_sample_t += M_central.getMainParametersList() 207 | 208 | record_budget.append(record_sample_ite+record_sample_t) 209 | 210 | return cost_ite_field,cost_ite_central,record_budget,accum_request_field/params['N'],accum_allocated_central/params['N'] 211 | 212 | 213 | 214 | 215 | class Policy_Field(): 216 | 217 | 218 | def __init__(self, params,theta): 219 | self.init_args = params 220 | self.theta = theta 221 | 222 | 223 | def getDecision(self,model): 224 | decision=getattr(self,self.init_args['policy_field']) 225 | return decision(model) 226 | 227 | def getLearningBias(self,model): 228 | 229 | if ("UCB" in model.init_args['policy_field']): 230 | stats = {x:model.choices[x].get_UCB_value(model.n + 1) for x in model.choice_range} 231 | else: 232 | stats = {x:model.choices[x].get_IE_value() for x in model.choice_range} 233 | 234 | bias = max(stats,key=stats.get) 235 | return bias 236 | 237 | 238 | 239 | def regular(self, model): 240 | #ATTENTION! In this policy, self.theta is the bias that field is adding - one of the values in the parameter interval "bias_interval_field" 241 | decision = round(model.state.estimate - model.state.source_bias - model.state.central_bias + self.theta) 242 | #bias = decision - (model.state.estimate - model.state.source_bias) 243 | bias = self.theta 244 | return decision, bias 245 | 246 | 247 | def learning_UCB(self,model): 248 | bias = self.getLearningBias(model) 249 | decision = round(model.state.estimate - model.state.source_bias + bias) 250 | return decision,bias 251 | 252 | 253 | return decision,bias 254 | 255 | def learning_IE(self, model): 256 | # This method implements the Interval Estimation policy 257 | 258 | bias = self.getLearningBias(model) 259 | decision = round(model.state.estimate - model.state.source_bias + bias) 260 | 261 | return decision,bias 262 | 263 | 264 | 265 | class Policy_Central(): 266 | 267 | def __init__(self, params,theta): 268 | self.init_args = params 269 | self.theta = theta 270 | 271 | 272 | def getDecision(self,model): 273 | decision=getattr(self,self.init_args['policy_central']) 274 | return decision(model) 275 | 276 | def getLearningBias(self,model): 277 | 278 | if ("UCB" in model.init_args['policy_central']): 279 | stats = {x:model.choices[x].get_UCB_value(model.n + 1) for x in model.choice_range} 280 | else: 281 | stats = {x:model.choices[x].get_IE_value() for x in model.choice_range} 282 | 283 | bias = max(stats,key=stats.get) 284 | return bias 285 | 286 | 287 | def regular(self, model): 288 | #ATTENTION! In this policy, self.theta is the bias that central is adding - one of the values in the parameter interval "bias_interval_central" 289 | decision = round(model.state.field_request - model.state.field_bias + self.theta) 290 | decision = max(0,decision) 291 | #bias = decision - model.state.field_request 292 | bias = self.theta 293 | return decision, bias 294 | 295 | def punishing(self, model): 296 | if model.state.field_bias_hat >0: 297 | decision = round(model.state.field_request - 2 * model.state.field_bias_hat) 298 | bias = - 2 * model.state.field_bias_hat 299 | else: 300 | #decision = round(model.state.field_request - model.state.field_bias + self.theta) 301 | decision = round(model.state.field_request + self.theta) 302 | bias = self.theta 303 | 304 | decision = max(0,decision) 305 | #bias = decision - model.state.field_request 306 | return decision, bias 307 | 308 | def learning_UCB(self,model): 309 | bias = self.getLearningBias(model) 310 | decision = round(model.state.field_request + bias) 311 | return max(0,decision),bias 312 | 313 | 314 | def learning_IE(self, model): 315 | # This method implements the Interval Estimation policy 316 | 317 | bias = self.getLearningBias(model) 318 | decision = round(model.state.field_request + bias) 319 | decision = max(0,decision) 320 | return decision,bias 321 | 322 | def learning_IE_two_estimates(self, model): 323 | bias = self.getLearningBias(model) 324 | decision = round(model.state.field_weight * (model.state.field_request) + model.state.source_weight * (model.state.estimate - model.state.source_bias) + bias) 325 | 326 | return max(0,decision),bias 327 | 328 | 329 | 330 | 331 | 332 | --------------------------------------------------------------------------------