├── .gitignore
├── AdaptiveMarketPlanning
    ├── AdaptiveMarketPlanningDriverScript.py
    ├── AdaptiveMarketPlanningModel.py
    ├── AdaptiveMarketPlanningPolicy.py
    ├── Base parameters.xlsx
    ├── ParametricModel parameters.xlsx
    ├── ParametricModel.py
    ├── ParametricModelDriverScript.py
    └── README.txt
├── AssetSelling
    ├── AssetSelling.ipynb
    ├── AssetSellingModel.py
    └── AssetSellingPolicies.py
├── BaseClasses
    ├── Dummy.py
    ├── SDPModel.py
    ├── SDPPolicy.py
    └── Util.py
├── BloodManagement
    ├── BloodManagementDriverScript.py
    ├── BloodManagementModel.py
    ├── BloodManagementNetwork.py
    ├── BloodManagementPolicy.py
    ├── OutputAll.txt
    └── Parameters.xlsx
├── ClinicalTrials
    ├── ClinicalTrialsDriverScript.py
    ├── ClinicalTrialsDriverScriptSolutionQ4.py
    ├── ClinicalTrialsDriverScriptSolutionQ5.py
    ├── ClinicalTrialsDriverScriptSolutionQ6.py
    ├── ClinicalTrialsModel.py
    ├── ClinicalTrialsPolicy.py
    ├── ClinicalTrialsPolicySolutionQ6.py
    └── Parameters.xlsx
├── EnergyStorage_I
    ├── BackwardDP.py
    ├── EnergyStorageDriverScript.py
    ├── EnergyStorageModel.py
    ├── EnergyStoragePolicy.py
    └── Parameters.xlsx
├── LICENSE
├── MedicalDecisionDiabetes
    ├── MedicalDecisionDiabetes Solution.ipynb
    ├── MedicalDecisionDiabetes.ipynb
    ├── MedicalDecisionDiabetesModel.py
    └── MedicalDecisionDiabetesPolicies.py
├── README.md
├── StochasticShortestPath_Dynamic
    ├── Driver.py
    ├── GraphGenerator.py
    ├── Model.py
    ├── Network_Steps.xlsx
    ├── Parameters.xlsx
    └── Policy.py
├── StochasticShortestPath_Static
    ├── SSPStatic.ipynb
    ├── SSPStaticModel.py
    ├── SSPStaticPolicy.py
    └── cache
    │   ├── 68f662685724a6b23632a0c46475a528a84ae228.json
    │   ├── 77dcc202053a9972d96023e09bd81101008c0f76.json
    │   ├── 8256670b0dc93e9243acf697fcd93e584be4855b.json
    │   ├── 95e859f7c6e1b8b135d86a49734c82a9700bac74.json
    │   ├── 9c6fcf6dda6650e68aaa0cfee53cca9627c262e8.json
    │   ├── ef512a715bcd7d55d650793744f1d1689bd42fe2.json
    │   └── f5f5991d3d305d913eeb1881f9a87a92a5bffd02.json
└── TwoNewsvendor
    ├── Parameters.xlsx
    ├── TwoNewsvendor.py
    ├── TwoNewsvendorDriverScript.py
    ├── TwoNewsvendorLearning.py
    └── TwoNewsvendorPolicy.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | .vscode/launch.json
106 | .conda/vcruntime140.dll
107 | AssetSelling/~$asset_selling_policy_parameters.xlsx
108 | MedicalDecisionDiabetes/~$MDDMparameters.xlsx
109 | stash.ipynb
110 | SSPStatic/cache
111 | AssetSelling/AssetSelling_solution.ipynb
112 | 


--------------------------------------------------------------------------------
/AdaptiveMarketPlanning/AdaptiveMarketPlanningDriverScript.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Adaptive Market Planning Driver Script
 3 | 
 4 | """
 5 | 		
 6 | from collections import namedtuple
 7 | from AdaptiveMarketPlanningModel import AdaptiveMarketPlanningModel
 8 | from AdaptiveMarketPlanningPolicy import AdaptiveMarketPlanningPolicy
 9 | 
10 | import numpy as np
11 | import pandas as pd
12 | import matplotlib.pyplot as plt
13 | 
14 | if __name__ == "__main__":
15 | 	# this is an example of creating a model and running a simulation for a certain trial size
16 | 
17 | 	# define state variables
18 | 	state_names = ['order_quantity', 'counter']
19 | 	init_state = {'order_quantity': 0, 'counter': 0}
20 | 	decision_names = ['step_size']
21 | 	
22 | 	# read in variables from excel file
23 | 	file = 'Base parameters.xlsx'
24 | 	raw_data = pd.ExcelFile(file)
25 | 	data = raw_data.parse('parameters')
26 | 	cost = data.iat[0, 2]
27 | 	trial_size = np.rint(data.iat[1, 2]).astype(int)
28 | 	price = data.iat[2, 2]
29 | 	theta_step = data.iat[3, 2]
30 | 	T = data.iat[4, 2]
31 | 	reward_type = data.iat[5, 2]
32 | 	
33 | 	# initialize model and store ordered quantities in an array
34 | 	M = AdaptiveMarketPlanningModel(state_names, decision_names, init_state, T,reward_type, price, cost)
35 | 	P = AdaptiveMarketPlanningPolicy(M, theta_step)
36 | 
37 | 	rewards_per_iteration = []
38 | 	learning_list_per_iteration = []
39 | 	for ite in list(range(trial_size)):
40 | 		print("Starting iteration ", ite)
41 | 		reward,learning_list = P.run_policy()
42 | 		M.learning_list=[]
43 | 		#print(learning_list)
44 | 		rewards_per_iteration.append(reward)
45 | 		learning_list_per_iteration.append(learning_list)
46 | 		print("Ending iteration ", ite," Reward ",reward)
47 | 
48 | 
49 | 	nElem = np.arange(1,trial_size+1)
50 | 	
51 | 	rewards_per_iteration = np.array(rewards_per_iteration)
52 | 	rewards_per_iteration_sum = rewards_per_iteration.cumsum()
53 | 	rewards_per_iteration_cum_avg = rewards_per_iteration_sum/nElem
54 | 
55 | 	if (reward_type=="Cumulative"):
56 | 		rewards_per_iteration_cum_avg = rewards_per_iteration_cum_avg/T
57 | 		rewards_per_iteration = rewards_per_iteration/T
58 | 
59 | 	optimal_order_quantity = -np.log(cost/price) * 100
60 | 	print("Optimal order_quantity for price {} and cost {} is {}".format(price,cost,optimal_order_quantity))
61 | 	print("Reward type: {}, theta_step: {}, T: {} - Average reward over {} iteratios is: {}".format(reward_type,theta_step,T,trial_size,rewards_per_iteration_cum_avg[-1]))
62 | 	
63 | 	ite = np.random.randint(0,trial_size)
64 | 	order_quantity = learning_list_per_iteration[ite]
65 | 	print("Order quantity for iteration {}".format(ite))
66 | 	print(order_quantity)
67 | 
68 | 	#Ploting the reward
69 | 	fig1, axsubs = plt.subplots(1,2,sharex=True,sharey=True)
70 | 	fig1.suptitle("Reward type: {}, theta_step: {}, T: {}".format(reward_type,theta_step,T) )
71 | 	
72 | 	axsubs[0].plot(nElem, rewards_per_iteration_cum_avg, 'g')
73 | 	axsubs[0].set_title('Cum_average reward')
74 | 	  
75 | 	axsubs[1].plot(nElem, rewards_per_iteration, 'g')
76 | 	axsubs[1].set_title('Reward per iteration')
77 | 	#Create a big subplot
78 | 	ax = fig1.add_subplot(111, frameon=False)
79 | 	# hide tick and tick label of the big axes
80 | 	plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
81 | 	ax.set_ylabel('USD', labelpad=0) # Use argument `labelpad` to move label downwards.
82 | 	ax.set_xlabel('Iterations', labelpad=10)
83 | 	plt.show()
84 | 	
85 | 	
86 | 
87 | 
88 | 	# ploting the analytical sol
89 | 	plt.xlabel("Time")
90 | 	plt.ylabel("Order quantity")
91 | 	plt.title("Analytical vs learned ordered quantity - (iteration {})".format(ite))
92 | 	time = np.arange(0, len(order_quantity))
93 | 	plt.plot(time, time * 0 - np.log(cost/price) * 100, label = "Analytical solution")
94 | 	plt.plot(time, order_quantity, label = "Kesten's Rule for theta_step {}".format(theta_step))
95 | 	plt.legend()
96 | 	plt.show()
97 | 
98 | 
99 | 	


--------------------------------------------------------------------------------
/AdaptiveMarketPlanning/AdaptiveMarketPlanningModel.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Adaptive Market Planning Model class
  3 | 
  4 | Adapted from code by Donghun Lee (c) 2018
  5 | 
  6 | """
  7 | 		
  8 | from collections import namedtuple
  9 | 
 10 | import numpy as np
 11 | import matplotlib.pyplot as plt
 12 | 
 13 | class AdaptiveMarketPlanningModel():
 14 | 	"""
 15 | 	Base class for model
 16 | 	"""
 17 | 
 18 | 	def __init__(self, state_names, x_names, s_0, T,reward_type,price = 1.0, cost = 1.0, exog_info_fn=None, transition_fn=None, objective_fn=None, seed=20180613):
 19 | 		"""
 20 | 		Initializes the model
 21 | 
 22 | 		:param state_names: list(str) - state variable dimension names
 23 | 		:param x_names: list(str) - decision variable dimension names
 24 |         :param s_0: dict - need to contain at least information to populate initial state using s_names
 25 | 		:param price: float - price p
 26 | 		:param cost: float - cost c
 27 | 		:param exog_info_fn: function - calculates relevant exogenous information
 28 | 		:param transition_fn: function - takes in decision variables and exogenous information to describe how the state
 29 | 			   evolves
 30 | 		:param objective_fn: function - calculates contribution at time t
 31 |         :param seed: int - seed for random number generator
 32 |         """
 33 | 		
 34 | 		self.init_args = {seed: seed}
 35 | 		self.prng = np.random.RandomState(seed)
 36 | 		self.init_state = s_0
 37 | 		self.T = T
 38 | 		self.reward_type = reward_type
 39 | 		self.state_names = state_names
 40 | 		self.x_names = x_names
 41 | 		self.State = namedtuple('State', state_names)
 42 | 		self.state = self.build_state(s_0)
 43 | 		self.Decision = namedtuple('Decision', x_names)
 44 | 		self.obj = 0.0
 45 | 		self.past_derivative = 0.0
 46 | 		self.cost = cost
 47 | 		self.price = price
 48 | 		self.t = 0
 49 | 		self.learning_list=[]
 50 | 
 51 | 
 52 | 
 53 | 	# this function gives a state containing all the state information needed
 54 | 	def build_state(self, info):
 55 | 		return self.State(*[info[k] for k in self.state_names])
 56 | 
 57 | 	# this function gives a decision 
 58 | 	def build_decision(self, info):
 59 | 		return self.Decision(*[info[k] for k in self.x_names])
 60 | 
 61 | 	# this function gives the exogenous information that is dependent on a random process
 62 | 	# computes the f_hat, chnage in the forecast over the horizon
 63 | 	def exog_info_fn(self, decision):
 64 | 		# return new demand based on a given distribution
 65 | 		return {"demand": self.prng.exponential(100)}
 66 | 
 67 | 	# this function takes in the decision and exogenous information to return
 68 | 	# new state
 69 | 	def transition_fn(self, decision, exog_info):
 70 | 		
 71 | 		self.learning_list.append(self.state.order_quantity)
 72 | 
 73 | 		# compute derivative
 74 | 		derivative = self.price - self.cost if self.state.order_quantity < exog_info['demand'] else - self.cost
 75 | 		# update order quantity
 76 | 		new_order_quantity = max(0, self.state.order_quantity + decision.step_size * derivative)
 77 | 		print(' step ', decision.step_size)
 78 | 		print(' derivative ', derivative)
 79 | 		# count number of times derivative changes sign
 80 | 		new_counter = self.state.counter + 1 if self.past_derivative * derivative < 0 else self.state.counter
 81 | 		self.past_derivative = derivative
 82 | 
 83 | 
 84 | 
 85 | 		return {"order_quantity": new_order_quantity, "counter": new_counter}
 86 | 
 87 | 	# this function calculates how much money we make
 88 | 	def objective_fn(self, decision, exog_info):
 89 | 		self.order_quantity=self.state.order_quantity
 90 | 		obj_part = self.price * min(self.order_quantity, exog_info['demand']) - self.cost * self.state.order_quantity
 91 | 		return obj_part
 92 | 
 93 | 	# this method steps the process forward by one time increment by updating the sum of the contributions, the
 94 | 	# exogenous information and the state variable
 95 | 	def step(self, decision):
 96 | 		self.t_update()
 97 | 		exog_info = self.exog_info_fn(decision)
 98 | 		onestep_contribution = self.objective_fn(decision, exog_info)
 99 | 
100 | 		print("t {}, Price {}, Demand {}, order_quantity {}, contribution {}".format(self.t,self.price,exog_info['demand'],self.order_quantity,onestep_contribution))
101 | 		
102 | 		#Check if cumulative or terminal reward
103 | 		if (self.reward_type == 'Cumulative'):
104 | 			self.obj += onestep_contribution
105 | 		else:
106 | 			if (self.t == self.T):
107 | 				self.obj = onestep_contribution
108 | 
109 | 		
110 | 		transition_info = self.transition_fn(decision, exog_info)
111 | 		self.state = self.build_state(transition_info)
112 | 		
113 | 		
114 | 		
115 | 	# Update method for time counter
116 | 	def t_update(self):
117 | 		self.t += 1
118 | 		return self.t


--------------------------------------------------------------------------------
/AdaptiveMarketPlanning/AdaptiveMarketPlanningPolicy.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Adaptive Market Planning Policy class
 3 | 
 4 | """
 5 | 		
 6 | from collections import namedtuple
 7 | 
 8 | import numpy as np
 9 | from copy import copy
10 | from AdaptiveMarketPlanningModel import AdaptiveMarketPlanningModel
11 | 
12 | class AdaptiveMarketPlanningPolicy():
13 | 	"""
14 | 	Base class for policy
15 | 	"""
16 | 
17 | 	def __init__(self, AdaptiveMarketPlanningModel, theta_step):
18 | 		"""
19 | 		Initializes the model
20 | 
21 | 		:param AdaptiveMarketPlanningModel: AdaptiveMarketPlanningModel - model to construct decision for
22 | 		:param theta_step: float - theta step variable
23 |         """
24 | 		
25 | 		self.M = AdaptiveMarketPlanningModel
26 | 		self.theta_step = theta_step
27 | 
28 | 	# returns decision based on harmonic step size policy
29 | 	def harmonic_rule(self):
30 | 		return self.M.build_decision({'step_size': self.theta_step / (self.theta_step + self.M.t - 1)})
31 | 		
32 | 	# returns decision based on Kesten's rule policy
33 | 	def kesten_rule(self):
34 | 		return self.M.build_decision({'step_size': self.theta_step / (self.theta_step + self.M.state.counter - 1)})
35 | 		
36 | 	# returns decision based on a constant rule policy
37 | 	def constant_rule(self):
38 | 		return self.M.build_decision({'step_size': self.theta_step})
39 | 
40 | 	# returns decision based on a constant rule policy
41 | 	def run_policy(self):
42 | 		model_copy = copy(self.M)
43 | 
44 | 		for t in range(model_copy.T):	 
45 | 			model_copy.step(AdaptiveMarketPlanningPolicy(model_copy, self.theta_step).kesten_rule())
46 | 			
47 | 		
48 | 
49 | 		return (model_copy.obj,model_copy.learning_list.copy())
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/AdaptiveMarketPlanning/Base parameters.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/djanka2/stochastic-optimization/8b654e10480705901a6e48e869e2e0a779e97933/AdaptiveMarketPlanning/Base parameters.xlsx


--------------------------------------------------------------------------------
/AdaptiveMarketPlanning/ParametricModel parameters.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/djanka2/stochastic-optimization/8b654e10480705901a6e48e869e2e0a779e97933/AdaptiveMarketPlanning/ParametricModel parameters.xlsx


--------------------------------------------------------------------------------
/AdaptiveMarketPlanning/ParametricModel.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Adaptive Market Planning Model for variable price subclass
 3 | 
 4 | """
 5 | 		
 6 | from collections import namedtuple
 7 | from AdaptiveMarketPlanningModel import AdaptiveMarketPlanningModel
 8 | 
 9 | import numpy as np
10 | 
11 | class ParametricModel(AdaptiveMarketPlanningModel):
12 | 	"""
13 | 	Subclass for Adaptive Market Planning
14 | 	"""
15 | 
16 | 	def __init__(self, state_names, x_names, s_0, T, reward_type, cost = 1.0, price_low = 1.0, price_high = 10.0, exog_info_fn=None, transition_fn=None, objective_fn=None, seed=20180613):
17 | 		"""
18 | 		Initializes the model
19 | 
20 | 		See Adaptive Market Planning Model for more details
21 |         """
22 | 		super().__init__(state_names, x_names, s_0, T, reward_type,cost = cost, exog_info_fn=exog_info_fn, transition_fn=transition_fn, objective_fn=objective_fn, seed=seed)
23 | 		self.past_derivative = np.array([0, 0, 0])
24 | 		self.low = price_low
25 | 		self.high = price_high
26 | 		self.PRICE_PROCESS  ='RW'
27 | 	
28 | 	# returns order quantity for a given price and theta vector
29 | 	def order_quantity_fn(self, price, theta):
30 | 		return max(0,theta[0] + theta[1] * price + theta[2] * price ** (-2))
31 | 	
32 | 	# returns derivative for a given price and theta vector
33 | 	def derivative_fn(self, price, theta):
34 | 		return np.array([1, price, price ** (-2)])
35 | 
36 | 	# this function takes in the decision and exogenous information to return
37 | 	# new state
38 | 	def transition_fn(self, decision, exog_info):
39 | 
40 | 		self.learning_list.append(self.state.theta)
41 | 		print(' theta ',self.state.theta)
42 | 	
43 | 		# compute derivative and update theta
44 | 		derivative = np.array([0, 0, 0])
45 | 		if self.order_quantity_fn(self.state.price, self.state.theta) < exog_info['demand']:
46 | 			derivative = (self.state.price - self.cost) * self.derivative_fn(self.state.price, self.state.theta)
47 | 		else:
48 | 			derivative = (- self.cost) * self.derivative_fn(self.state.price, self.state.theta)
49 | 		
50 | 		new_theta = self.state.theta + decision.step_size * derivative
51 | 	
52 | 		new_counter = self.state.counter + 1 if np.dot(self.past_derivative, derivative) < 0 else self.state.counter
53 | 		print(' step ', decision.step_size)
54 | 		print(' derivative ', derivative)
55 | 		print('new theta ',new_theta)
56 | 		
57 | 		
58 | 		self.past_derivative = derivative
59 | 		
60 | 		# generate random price
61 | 		if (self.PRICE_PROCESS  == 'RW'):
62 | 			coin = self.prng.uniform()
63 | 			delta = 0
64 | 			if coin < .2:
65 | 				delta = -1
66 | 			elif coin >.8:
67 | 				delta = 1
68 | 				
69 | 			new_price = min(self.high,max(self.low,self.state.price + delta))
70 | 		else:
71 | 			new_price = self.prng.uniform(self.low, self.high)
72 | 
73 | 		
74 | 		
75 | 		return {"counter": new_counter, "price": new_price, "theta": new_theta}
76 | 
77 | 	# this function calculates how much money we make
78 | 	def objective_fn(self, decision, exog_info):
79 | 		self.price = self.state.price
80 | 		self.order_quantity=self.order_quantity_fn(self.state.price, self.state.theta)
81 | 		obj_part = self.state.price * min(self.order_quantity, exog_info['demand']) - self.cost * self.order_quantity
82 | 		return obj_part


--------------------------------------------------------------------------------
/AdaptiveMarketPlanning/ParametricModelDriverScript.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Parametric Model Driver Script
  3 | 
  4 | """
  5 | 	
  6 | from collections import namedtuple
  7 | from ParametricModel import ParametricModel
  8 | from AdaptiveMarketPlanningPolicy import AdaptiveMarketPlanningPolicy
  9 | 
 10 | import numpy as np
 11 | import pandas as pd
 12 | import matplotlib.pyplot as plt
 13 | 
 14 | if __name__ == "__main__":
 15 | 	# this is an example of creating a model and running a simulation for a certain trial size
 16 | 	
 17 | 	# define state variables
 18 | 	state_names = ['counter', 'price', 'theta']
 19 | 	init_state = {'counter': 0, 'price': 26, 'theta': np.array([1, 1, 1])}
 20 | 	decision_names = ['step_size']
 21 | 	
 22 | 	# read in variables from excel file
 23 | 	file = 'ParametricModel parameters.xlsx'
 24 | 	raw_data = pd.ExcelFile(file)
 25 | 	data = raw_data.parse('parameters')
 26 | 	cost = data.iat[0, 2]
 27 | 	trial_size = np.rint(data.iat[1, 2]).astype(int)
 28 | 	price_low = data.iat[2, 2]
 29 | 	price_high = data.iat[3, 2]
 30 | 	theta_step = data.iat[4, 2]
 31 | 	T = data.iat[5, 2]
 32 | 	reward_type = data.iat[6, 2]
 33 | 
 34 | 	# initialize model and run simulations
 35 | 	M = ParametricModel(state_names, decision_names, init_state, T, reward_type,cost, price_low = price_low, price_high = price_high)
 36 | 	print("Theta_step ",theta_step)
 37 | 	P = AdaptiveMarketPlanningPolicy(M, theta_step)
 38 | 
 39 | 	rewards_per_iteration = []
 40 | 	learning_list_per_iteration = []
 41 | 	for ite in list(range(trial_size)):
 42 | 		print("Starting iteration ", ite)
 43 | 		reward,learning_list = P.run_policy()
 44 | 		M.learning_list=[]
 45 | 		#print(learning_list)
 46 | 		rewards_per_iteration.append(reward)
 47 | 		learning_list_per_iteration.append(learning_list)
 48 | 		print("Ending iteration ", ite," Reward ",reward)
 49 | 
 50 | 
 51 | 	nElem = np.arange(1,trial_size+1)
 52 | 	
 53 | 	rewards_per_iteration = np.array(rewards_per_iteration)
 54 | 	rewards_per_iteration_sum = rewards_per_iteration.cumsum()
 55 | 	rewards_per_iteration_cum_avg = rewards_per_iteration_sum/nElem
 56 | 
 57 | 	if (reward_type=="Cumulative"):
 58 | 		rewards_per_iteration_cum_avg = rewards_per_iteration_cum_avg/T
 59 | 		rewards_per_iteration = rewards_per_iteration/T
 60 | 
 61 | 	
 62 | 	print("Reward type: {}, theta_step: {}, T: {} - Average reward over {} iteratios is: {}".format(reward_type,theta_step,T,trial_size,rewards_per_iteration_cum_avg[-1]))
 63 | 
 64 | 	price = np.arange(price_low, price_high, 1)
 65 | 	optimal = -np.log(cost/price) * 100
 66 | 	df = pd.DataFrame({'Price' : price, 'OptOrderQuantity' : optimal})
 67 | 	print(df)
 68 | 	
 69 | 	ite = np.random.randint(0,trial_size)
 70 | 	theta_ite = learning_list_per_iteration[ite]
 71 | 	#print("Thetas for iteration {}".format(ite))
 72 | 	#print(theta_ite)
 73 | 
 74 | 	#Ploting the reward
 75 | 	fig1, axsubs = plt.subplots(1,2,sharex=True,sharey=True)
 76 | 	fig1.suptitle("Reward type: {}, theta_step: {}, T: {}".format(reward_type,theta_step,T) )
 77 | 	
 78 | 	axsubs[0].plot(nElem, rewards_per_iteration_cum_avg, 'g')
 79 | 	axsubs[0].set_title('Cum_average reward')
 80 | 	  
 81 | 	axsubs[1].plot(nElem, rewards_per_iteration, 'g')
 82 | 	axsubs[1].set_title('Reward per iteration')
 83 | 	#Create a big subplot
 84 | 	ax = fig1.add_subplot(111, frameon=False)
 85 | 	# hide tick and tick label of the big axes
 86 | 	plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
 87 | 	ax.set_ylabel('USD', labelpad=0) # Use argument `labelpad` to move label downwards.
 88 | 	ax.set_xlabel('Iterations', labelpad=10)
 89 | 	plt.show()
 90 | 		
 91 | 	
 92 | 	if (False):
 93 | 		for i in range(trial_size):
 94 | 			M.step(AdaptiveMarketPlanningPolicy(M, theta_step).kesten_rule())
 95 | 		
 96 | 		# plot results
 97 | 		price = np.arange(price_low, price_high, 0.1)
 98 | 		optimal = -np.log(cost/price) * 100
 99 | 		plt.plot(price, optimal, color = 'green', label = "analytical solution")
100 | 		order_quantity = [M.order_quantity_fn(k, M.state.theta) for k in price]
101 | 		plt.plot(price, order_quantity, color = 'blue', label = "parametrized solution")
102 | 		plt.legend()
103 | 		plt.show()


--------------------------------------------------------------------------------
/AdaptiveMarketPlanning/README.txt:
--------------------------------------------------------------------------------
1 | In order to run the code for question 2, set the parameters on the file "Base parameters.xlsx” sheet “parameters” and then run "python AdaptiveMarketPlanningDriverScript.py”. 
2 | 
3 | In order to run the code for the parametric model, set the parameters on the file “ParametricModel parameters.xlsx” sheet “parameters” and then run "python ParametricModelDriverScript.py”


--------------------------------------------------------------------------------
/AssetSelling/AssetSelling.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# An Asset Selling Model\n",
  8 |     "In this notebook you will optimize some simple parametric policies for the asset selling problem.\n",
  9 |     "\n",
 10 |     "We start by creating an instance of the model and an instance of a policy. Let's start with the Sell-Low-policy. First, we instantiatie a model and specify the initial value for the state (price) and the length of the time horizon T in the constructor. It would also be possible to exert more control on the exogenous information process by specifying parameters for it, that are used inside the process."
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "import numpy as np\n",
 20 |     "import pandas as pd\n",
 21 |     "import plotly.express as px\n",
 22 |     "import AssetSellingModel as asm\n",
 23 |     "import AssetSellingPolicies as asp\n",
 24 |     "import BaseClasses.Util as util\n",
 25 |     "\n",
 26 |     "model = asm.AssetSellingModel(S0={\"price\": 20}, T=30)"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "markdown",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "Next, we create a policy for this model. The high-low-policy has two tunable parameters, namely `theta_low` and `theta_high`.\n",
 34 |     "Then, we run the policy for 100 iterations/episodes. The `run_policy` method returns the average objective function value over all episodes."
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "high_low_policy = asp.HighLowPolicy(model=model, theta_low=10, theta_high=30)\n",
 44 |     "high_low_policy.run_policy(n_iterations=100)"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "metadata": {},
 50 |    "source": [
 51 |     "## Exercise 1\n",
 52 |     "Execute the cell several times. How do you explain that the result is different every time? Do you notice any difference in this behavior if you change the number of iteration to 10, 1000, 10000?\n",
 53 |     "\n",
 54 |     "---"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "We investigate the results in more detail. The results of a policy run are stored in an in a DataFrame called results. Every row corresponds to one timestep of one iteration/episode."
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "high_low_policy.results"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "Next we plot a few of the 100 paths using plotly. We notice that if the price never drops below `theta_low` the asset is sold at the end of the time horizon."
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "sample_paths = np.random.choice(100, size=5, replace=False)\n",
 87 |     "df = high_low_policy.results.loc[high_low_policy.results.N.isin(sample_paths), :]\n",
 88 |     "px.line(data_frame=df, x=\"t\", y=\"price\", facet_row=\"N\", height=800)"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "The average amount of money that we make selling the asset depends of course on the values of `theta_low` and `theta_high`. If we for example  set `theta_low` to a higher value, it seems that the average profit is higher (note that it will be slightly different every time we execute the cell)."
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "high_low_policy.theta_low = 19\n",
105 |     "high_low_policy.run_policy(n_iterations=100)"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "metadata": {},
111 |    "source": [
112 |     "Next, we will try to find the best value for `theta_low` and `theta_high`. This is called *parameter tuning*. To do this, we just systematically try out different combinations of values for both. This strategy is called a *grid search* and there is a simple convenience method to automate this."
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": [
121 |     "# Define a grid for combinations of theta_low and theta_high.\n",
122 |     "# theta_low should not be larger than the starting price (20) and theta_high should not be smaller than the starting price.\n",
123 |     "grid = {\"theta_low\": np.linspace(10, 20, 11), \"theta_high\": np.linspace(20, 30, 11)}\n",
124 |     "result = util.grid_search(grid, high_low_policy, n_iterations=10, ordered=True)\n",
125 |     "\n",
126 |     "print(f\"Best parameters: {result['best_parameters']} with an objective of {result['best_performance']}.\")"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "markdown",
131 |    "metadata": {},
132 |    "source": [
133 |     "The result object gives us the best parameters and the corresponding performance but it gives also information about all the runs. We transform them into matrix form and visualize them with a heatmap."
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": null,
139 |    "metadata": {},
140 |    "outputs": [],
141 |    "source": [
142 |     "res_grid = result[\"all_runs\"].pivot(index=\"theta_low\", columns=\"theta_high\", values=\"performance\")\n",
143 |     "px.imshow(res_grid)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "markdown",
148 |    "metadata": {},
149 |    "source": [
150 |     "Apparently, with the given uncertainty model and the high-low policy, the best profit is only slightly above the start price."
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "markdown",
155 |    "metadata": {},
156 |    "source": [
157 |     "## Exercise 2\n",
158 |     "1. Create an instance of the tracking policy that is implemented in the class `TrackPolicy` in the module `AssetSellingPolicies` and run the policy for 100 iterations. Describe in your own words how this policy makes a decision. \n",
159 |     "2. The policy has one tunable parameter `theta`. Run a grid search to find the best value for `theta`. Is the tracking policy better than the high-low policy?"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "markdown",
164 |    "metadata": {},
165 |    "source": [
166 |     "---"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "markdown",
171 |    "metadata": {},
172 |    "source": [
173 |     "In the first version of our sequential decision model we used a stochastic model to generate observations. We now introduce a new version, where we draw sample obervations $W_{t+1}$ from historical data. Consider the following version of our problem:\n",
174 |     "\n",
175 |     "*You own a share of a company at the beginning of the month. Every day, you need to decide if you sell it (for the closing price of this day) or not. If by the end of the month the stock is still in your posession, it is sold at the closing price of the last day of the month.*\n",
176 |     "\n",
177 |     "To generate different observations for one month, we will use 10 years of historical data where we scale the data so each month starts at zero. This gives us 120 observations in total that we will use to tune our policy.\n",
178 |     " \n",
179 |     "As an example, we download data of the SAP stock using the package `yfinance` and reshape it to match our needs."
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": null,
185 |    "metadata": {},
186 |    "outputs": [],
187 |    "source": [
188 |     "import yfinance as yf\n",
189 |     "\n",
190 |     "def get_historical_monthly_paths(stock_name, start=\"2014-01-01\", end=\"2023-12-31\"):\n",
191 |     "    stock = yf.Ticker(stock_name)\n",
192 |     "\n",
193 |     "    # Get historical market data (this makes an API call to Yahoo Finance)\n",
194 |     "    hist = stock.history(start=start, end=end, interval=\"1d\")\n",
195 |     "\n",
196 |     "    # We just keep the \"Close\" column\"\n",
197 |     "    hist = hist.drop([\"Open\", \"High\", \"Low\", \"Volume\", \"Dividends\", \"Stock Splits\"], axis=1)\n",
198 |     "\n",
199 |     "    # Enumerate the months from the start and store as a separate column\n",
200 |     "    hist[\"N\"] = hist.index.tz_convert(None).to_period('M')\n",
201 |     "    hist[\"N\"] = hist[\"N\"].apply(lambda x: x.ordinal) - hist[\"N\"].iloc[0].ordinal\n",
202 |     "\n",
203 |     "    # Get the Close price at the beginning of every month and subtract from the Close value \n",
204 |     "    hist_month_start = hist.groupby(\"N\").head(1).rename({\"Close\": \"Close_Month_Start\"}, axis=1)\n",
205 |     "    hist_month_start = pd.merge(hist, hist_month_start, on=\"N\")\n",
206 |     "    hist_month_start[\"price\"] = (hist_month_start[\"Close\"] - hist_month_start[\"Close_Month_Start\"])\n",
207 |     "    \n",
208 |     "    return hist_month_start.drop([\"Close\", \"Close_Month_Start\"], axis=1)\n",
209 |     "\n",
210 |     "hist_prices = get_historical_monthly_paths(stock_name=\"SAP\")"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "markdown",
215 |    "metadata": {},
216 |    "source": [
217 |     "Let's have a look at our historical sample paths $W_{t+1}$:"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "code",
222 |    "execution_count": null,
223 |    "metadata": {},
224 |    "outputs": [],
225 |    "source": [
226 |     "hist_prices"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "markdown",
231 |    "metadata": {},
232 |    "source": [
233 |     "We now create a new model where the exogenous information process is modified so that at each iteration, on month of the historical data is selected. We also create an instance of the high-low policy and run it for 120 iterations."
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": null,
239 |    "metadata": {},
240 |    "outputs": [],
241 |    "source": [
242 |     "model_hist = asm.AssetSellingModelHistorical(hist_data=hist_prices)\n",
243 |     "policy_hist = asp.HighLowPolicy(model=model_hist, theta_low=-10, theta_high=10)\n",
244 |     "policy_hist.run_policy(n_iterations=120)"
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "markdown",
249 |    "metadata": {},
250 |    "source": [
251 |     "Next, we need to optimize `theta_high` and  `theta_low`. To get a feeling for which values we might try, we first have a look at the distribution of monthly price deviations."
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "code",
256 |    "execution_count": null,
257 |    "metadata": {},
258 |    "outputs": [],
259 |    "source": [
260 |     "px.histogram(hist_prices, x=\"price\")"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "markdown",
265 |    "metadata": {},
266 |    "source": [
267 |     "We see that during one month, the difference of the stock price to the price at the beginning of the month is usually between -30 and +30 (with some outliers). We do a grid search on a 16x16 grid to find the best combination of `theta_low` and `theta_high`."
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": null,
273 |    "metadata": {},
274 |    "outputs": [],
275 |    "source": [
276 |     "grid = {\"theta_low\": np.linspace(-30, 0, 16), \"theta_high\": np.linspace(0, 30, 16)}\n",
277 |     "result = util.grid_search(grid, policy_hist, n_iterations=120, ordered=True)\n",
278 |     "\n",
279 |     "print(f\"Best parameters: {result['best_parameters']} with an objective of {result['best_performance']}.\")"
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "markdown",
284 |    "metadata": {},
285 |    "source": [
286 |     "## Exercise 3\n",
287 |     "Go to https://finance.yahoo.com/ to look up names and historical charts of stocks. Repeat the steps above with a stock of your choice (Tesla? Wirecard? ...?). You should be able to use the function `get_historical_monthly_paths` from above to get the data in the appropriate format.Try to find a policy, i.e. \"sell-low\", \"high-low\", or \"track\", with corresponding parameters that maximizes the expected profit.\n",
288 |     "\n",
289 |     "---"
290 |    ]
291 |   }
292 |  ],
293 |  "metadata": {
294 |   "kernelspec": {
295 |    "display_name": "sda",
296 |    "language": "python",
297 |    "name": "python3"
298 |   },
299 |   "language_info": {
300 |    "codemirror_mode": {
301 |     "name": "ipython",
302 |     "version": 3
303 |    },
304 |    "file_extension": ".py",
305 |    "mimetype": "text/x-python",
306 |    "name": "python",
307 |    "nbconvert_exporter": "python",
308 |    "pygments_lexer": "ipython3",
309 |    "version": "3.10.12"
310 |   }
311 |  },
312 |  "nbformat": 4,
313 |  "nbformat_minor": 2
314 | }
315 | 


--------------------------------------------------------------------------------
/AssetSelling/AssetSellingModel.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | sys.path.append("../")
  4 | from BaseClasses.SDPModel import SDPModel
  5 | import pandas as pd
  6 | 
  7 | 
  8 | class AssetSellingModel(SDPModel):
  9 |     def __init__(
 10 |         self,
 11 |         S0: dict,
 12 |         t0: float = 0,
 13 |         T: float = 1,
 14 |         seed: int = 42,
 15 |         alpha: float = 0.7,
 16 |         var: float = 2,
 17 |         bias_df: pd.DataFrame = None,
 18 |         upstep: float = 1,
 19 |         downstep: float = -1,
 20 |     ) -> None:
 21 |         state_names = ["price", "bias", "price_smoothed", "resource"]
 22 | 
 23 |         # Set default values for helper states
 24 |         if "bias" not in S0:
 25 |             S0["bias"] = "Neutral"
 26 |         if "price_smoothed" not in S0:
 27 |             S0["price_smoothed"] = S0["price"]
 28 |         if "resource" not in S0:
 29 |             S0["resource"] = 1
 30 | 
 31 |         decision_names = ["sell"]
 32 |         super().__init__(state_names, decision_names, S0, t0, T, seed)
 33 |         self.alpha = alpha
 34 |         self.var = var
 35 |         if bias_df is None:
 36 |             self.bias_df = pd.DataFrame(
 37 |                 {"Up": [0.9, 0.1, 0], "Neutral": [0.2, 0.6, 0.2], "Down": [0, 0.1, 0.9]}
 38 |             )
 39 |             self.bias_df.index = ["Up", "Neutral", "Down"]
 40 |         else:
 41 |             self.bias_df = bias_df
 42 |         self.upstep = upstep
 43 |         self.downstep = downstep
 44 | 
 45 |     def is_finished(self):
 46 |         """
 47 |         Check if the model run (episode) is finished.
 48 |         This is either the case when the time is over or we no longer hold the asset.
 49 | 
 50 |         Returns:
 51 |             bool: True if the run is finished, False otherwise.
 52 |         """
 53 |         hold_asset = self.state.resource
 54 |         return super().is_finished() or not hold_asset
 55 | 
 56 |     def exog_info_fn(self, decision):
 57 |         """
 58 |         Generates exogenous information for the asset selling model.
 59 | 
 60 |         Args:
 61 |             decision: The decision made (not used).
 62 | 
 63 |         Returns:
 64 |             A dictionary containing the updated price and bias.
 65 | 
 66 |         Notes:
 67 |             - The change in price is assumed to be normally distributed with mean bias and given variance.
 68 |             - The bias changes in every step according to given parameters.
 69 |             - The new price is set to 0 whenever the random process gives a negative price.
 70 |         """
 71 |         biasprob = self.bias_df[self.state.bias]
 72 | 
 73 |         coin = self.prng.uniform()
 74 |         if coin < biasprob["Up"]:
 75 |             new_bias = "Up"
 76 |             bias = self.upstep
 77 |         elif coin >= biasprob["Up"] and coin < biasprob["Neutral"] + biasprob["Up"]:
 78 |             new_bias = "Neutral"
 79 |             bias = 0
 80 |         else:
 81 |             new_bias = "Down"
 82 |             bias = self.downstep
 83 | 
 84 |         price_delta = self.prng.normal(bias, self.var)
 85 |         updated_price = self.state.price + price_delta
 86 |         new_price = 0.0 if updated_price < 0.0 else updated_price
 87 | 
 88 |         return {
 89 |             "price": new_price,
 90 |             "bias": new_bias,
 91 |         }
 92 | 
 93 |     def transition_fn(self, decision, exog_info):
 94 |         alpha = self.alpha
 95 |         new_resource = 0 if decision.sell == 1 else self.state.resource
 96 |         new_price_smoothed = (1 - alpha) * self.state.price_smoothed + alpha * exog_info["price"]
 97 | 
 98 |         return {"resource": new_resource, "price_smoothed": new_price_smoothed}
 99 | 
100 |     def objective_fn(self, decision, exog_info):
101 |         sell_size = 1 if decision.sell == 1 and self.state.resource != 0 else 0
102 |         return self.state.price * sell_size
103 | 
104 | 
105 | class AssetSellingModelHistorical(AssetSellingModel):
106 |     def __init__(
107 |         self,
108 |         hist_data: pd.DataFrame,
109 |         alpha: float = 0.7,
110 |     ) -> None:
111 |         super().__init__(S0={"price": 0.0}, alpha=alpha)
112 |         self.T = 100
113 |         self.hist_data = hist_data
114 | 
115 |     def reset(self, reset_prng: bool = False):
116 |         # Get the subset of the historical data that corresponds to the current episode
117 |         self.episode_data = self.hist_data.loc[self.hist_data["N"] == self.episode_counter, :]
118 |         self.episode_data = self.episode_data["price"].tolist()
119 |         self.episode_data.pop(0)
120 |         self.T = len(self.episode_data)
121 |         super().reset(reset_prng)
122 | 
123 |     def exog_info_fn(self, decision):
124 |         return {"price": self.episode_data.pop(0), "bias": "Neutral"}
125 | 


--------------------------------------------------------------------------------
/AssetSelling/AssetSellingPolicies.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | sys.path.append("../")
 4 | from BaseClasses.SDPModel import SDPModel
 5 | from BaseClasses.SDPPolicy import SDPPolicy
 6 | 
 7 | 
 8 | class SellLowPolicy(SDPPolicy):
 9 |     def __init__(self, model: SDPModel, policy_name: str = "SellLow", theta_low: float = 10):
10 |         super().__init__(model, policy_name)
11 |         self.theta_low = theta_low
12 | 
13 |     def get_decision(self, state, t, T):
14 |         new_decision = {"sell": 1, "hold": 0} if state.price < self.theta_low else {"sell": 0, "hold": 1}
15 | 
16 |         if t == T - 1:
17 |             new_decision = {"sell": 1, "hold": 0}
18 |         return new_decision
19 | 
20 | 
21 | class HighLowPolicy(SDPPolicy):
22 |     def __init__(
23 |         self, model: SDPModel, policy_name: str = "HighLow", theta_low: float = 10, theta_high: float = 30
24 |     ):
25 |         super().__init__(model, policy_name)
26 |         self.theta_low = theta_low
27 |         self.theta_high = theta_high
28 | 
29 |     def get_decision(self, state, t, T):
30 |         new_decision = (
31 |             {"sell": 1, "hold": 0}
32 |             if state.price < self.theta_low or state.price > self.theta_high
33 |             else {"sell": 0, "hold": 1}
34 |         )
35 | 
36 |         if t == T - 1:
37 |             new_decision = {"sell": 1, "hold": 0}
38 | 
39 |         return new_decision
40 | 
41 | 
42 | class TrackPolicy(SDPPolicy):
43 |     def __init__(self, model: SDPModel, policy_name: str = "Track", theta: float = 10):
44 |         super().__init__(model, policy_name)
45 |         self.theta = theta
46 | 
47 |     def get_decision(self, state, t, T):
48 |         new_decision = (
49 |             {"sell": 1, "hold": 0}
50 |             if state.price >= state.price_smoothed + self.theta
51 |             or state.price <= state.price_smoothed - self.theta
52 |             else {"sell": 0, "hold": 1}
53 |         )
54 | 
55 |         if t == T - 1:
56 |             new_decision = {"sell": 1, "hold": 0}
57 |         return new_decision
58 | 


--------------------------------------------------------------------------------
/BaseClasses/Dummy.py:
--------------------------------------------------------------------------------
 1 | from SDPModel import SDPModel
 2 | from SDPPolicy import SDPPolicy
 3 | from copy import deepcopy, copy
 4 | 
 5 | 
 6 | class DummyModel(SDPModel):
 7 |     def exog_info_fn(self, decision):
 8 |         return {"W": self.prng.random()}
 9 | 
10 |     def transition_fn(self, decision, exog_info):
11 |         return {"S": exog_info["W"]}
12 | 
13 |     def objective_fn(self, decision, exog_info):
14 |         return 0.0
15 | 
16 | 
17 | class DummyPolicy(SDPPolicy):
18 |     def get_decision(self, state):
19 |         return {"x": 0}
20 | 
21 | 
22 | model = DummyModel(state_names=["S"], decision_names=["x"], S0={"S": 0.0}, T=10)
23 | # Initialize different policies (different thetas) with a deep copy of the model to guarantee
24 | # that both are run with the same random values from the prng.
25 | policy = DummyPolicy(model=deepcopy(model), policy_name="dummy policy")
26 | policy2 = DummyPolicy(model=deepcopy(model), policy_name="dummy policy2")
27 | print(policy.policy_name)
28 | policy.run_policy(n_iterations=3)
29 | print(policy2.policy_name)
30 | policy2.run_policy(n_iterations=2)
31 | 


--------------------------------------------------------------------------------
/BaseClasses/SDPModel.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple
  2 | import numpy as np
  3 | from abc import ABC, abstractmethod
  4 | 
  5 | 
  6 | class SDPModel(ABC):
  7 |     """
  8 |     Sequential decision problem base class
  9 | 
 10 |     This class represents a base class for sequential decision problems.
 11 |     It provides methods for initializing the problem, resetting the state,
 12 |     performing a single step in the problem, and updating the time index.
 13 | 
 14 |     Attributes:
 15 |         State (namedtuple): Named tuple representing the state variables.
 16 |         Decision (namedtuple): Named tuple representing the decision variables.
 17 |         state_names (list): List of state variable names.
 18 |         decision_names (list): List of decision variable names.
 19 |         initial_state (State): Initial state of the problem.
 20 |         state (State): Current state of the problem.
 21 |         objective (float): Objective value of the problem.
 22 |         t (float): Current time index.
 23 |         T (float): Terminal time.
 24 |         prng (RandomState): Random number generator.
 25 |         episode_counter (int): Which set of historical data (episode) to be used.
 26 | 
 27 |     Methods:
 28 |         __init__: Initializes an instance of the SDPModel class.
 29 |         reset: Resets the SDPModel to its initial state.
 30 |         build_state: Sets the new state values using the provided information.
 31 |         build_decision: Builds a decision object using the provided information.
 32 |         exog_info_fn: Abstract method for generating exogenous information.
 33 |         transition_fn: Abstract method for computing the state transition.
 34 |         objective_fn: Abstract method for computing the objective value.
 35 |         update_t: Updates the value of the time index.
 36 |         step: Performs a single step in the sequential decision problem.
 37 |     """
 38 | 
 39 |     def __init__(
 40 |         self,
 41 |         state_names: list,
 42 |         decision_names: list,
 43 |         S0: dict,
 44 |         t0: float = 0,
 45 |         T: float = 1,
 46 |         seed: int = 42,
 47 |     ) -> None:
 48 |         """
 49 |         Initializes an instance of the SDPModel class.
 50 | 
 51 |         Args:
 52 |             state_names (list): List of state variable names.
 53 |             decision_names (list): List of decision variable names.
 54 |             S0 (dict): Initial state values.
 55 |             t0 (float, optional): Initial time. Defaults to 0.
 56 |             T (float, optional): Terminal time. Defaults to 1.
 57 |             seed (int, optional): Seed for random number generation. Defaults to 42.
 58 |             exog_params (dict, optional): (Static) parameters to be used by the exogenuous information process.
 59 |             state_params (dict, optional): (Static) parameters to be used by the state transition function.
 60 |         """
 61 |         self.State = namedtuple("State", state_names)
 62 |         self.Decision = namedtuple("Decision", decision_names)
 63 | 
 64 |         self.state_names = state_names
 65 |         self.decision_names = decision_names
 66 | 
 67 |         self.initial_state = self.build_state(S0)
 68 |         self.state = self.build_state(S0)
 69 | 
 70 |         self.objective = 0.0
 71 |         self.t0 = t0
 72 |         self.t = t0
 73 |         self.T = T
 74 |         self.seed = seed
 75 |         self.prng = np.random.RandomState(seed)
 76 |         self.episode_counter = 0
 77 | 
 78 |     def reset(self, reset_prng: bool = False):
 79 |         """
 80 |         Resets the SDPModel to its initial state.
 81 | 
 82 |         This method resets the state, objective, and time variables of the SDPModel
 83 |         to their initial values.
 84 | 
 85 |         Parameters:
 86 |             None
 87 | 
 88 |         Returns:
 89 |             None
 90 |         """
 91 |         self.state = self.initial_state
 92 |         self.objective = 0.0
 93 |         self.t = self.t0
 94 |         if reset_prng is True:
 95 |             self.prng = np.random.RandomState(self.seed)
 96 | 
 97 |     def build_state(self, info: dict):
 98 |         """
 99 |         Sets the new state values using the provided information.
100 | 
101 |         Args:
102 |             info (dict): A dictionary containing the new values for all state variables.
103 | 
104 |         Returns:
105 |             State: The updated state object.
106 |         """
107 |         return self.State(*[info[k] for k in self.state_names])
108 | 
109 |     def build_decision(self, info: dict):
110 |         """
111 |         Builds a decision object using the provided information.
112 | 
113 |         Args:
114 |             info (dict): A dictionary containing the new values for all decision variables.
115 | 
116 |         Returns:
117 |             Decision: The decision object.
118 |         """
119 |         return self.Decision(*[info[k] for k in self.decision_names])
120 | 
121 |     @abstractmethod
122 |     def exog_info_fn(self, decision):
123 |         """
124 |         Abstract method for generating exogenous information.
125 | 
126 |         This method should be implemented in the derived classes to generate
127 |         the exogenous information based on the current decision.
128 | 
129 |         Args:
130 |             decision (namedtuple): The current decision.
131 | 
132 |         Returns:
133 |             dict: A dictionary containing the exogenous information.
134 |         """
135 |         pass
136 | 
137 |     @abstractmethod
138 |     def transition_fn(self, decision, exog_info: dict):
139 |         """
140 |         Abstract method for computing the state transition.
141 | 
142 |         This method should be implemented in the derived classes to compute
143 |         the state transition based on the current state, decision, and exogenous information.
144 | 
145 |         Args:
146 |             decision (namedtuple): The current decision.
147 |             exog_info (dict): The exogenous information.
148 | 
149 |         Returns:
150 |             dict: A dictionary containing the updated state variables.
151 |         """
152 |         pass
153 | 
154 |     @abstractmethod
155 |     def objective_fn(self, decision, exog_info: dict):
156 |         """
157 |         Abstract method for computing the objective value.
158 | 
159 |         This method should be implemented in the derived classes to compute
160 |         the objective value contribution based on the current state, decision,
161 |         and exogenous information.
162 | 
163 |         Args:
164 |             decision (namedtuple): The current decision.
165 |             exog_info (dict): The exogenous information.
166 | 
167 |         Returns:
168 |             float: The contribution to the objective.
169 |         """
170 |         pass
171 | 
172 |     def is_finished(self):
173 |         """
174 |         Check if the model is finished. By default, the model runs until the end of the time horizon
175 |         but the method can be overwritten to model episodic tasks where the time horizon ends earlier.
176 | 
177 |         Returns:
178 |             bool: True if the run is finished, False otherwise.
179 |         """
180 |         if self.t >= self.T:
181 |             return True
182 |         else:
183 |             return False
184 | 
185 |     def update_t(self):
186 |         """
187 |         Update the value of the time index t.
188 |         """
189 |         self.t += 1
190 | 
191 |         return self.t
192 | 
193 |     def step(self, decision):
194 |         """
195 |         Performs a single step in the sequential decision problem.
196 | 
197 |         Args:
198 |             decision (namedtuple): The decision made at the current state.
199 | 
200 |         Returns:
201 |             The new state after the step and a flag indicating if the episode is finished.
202 |         """
203 |         # Generate new exogenous information W_t+1
204 |         exog_info = self.exog_info_fn(decision)
205 | 
206 |         # Compute objective C_t based on W_t+1, x_t, S_t (state is not updated yet)
207 |         self.objective += self.objective_fn(decision, exog_info)
208 | 
209 |         # Execute transition function and add new state to exog_info dict
210 |         exog_info.update(self.transition_fn(decision, exog_info))
211 | 
212 |         # Build new state from state variables and (optionally) exog_info variables.
213 |         # This is convenient if some of the exogenous variables are also state variables.
214 |         self.state = self.build_state(exog_info)
215 | 
216 |         # Update time counter
217 |         self.update_t()
218 | 
219 |         # From the returned state S_t+1, the policy generates a new decision
220 |         return self.state
221 | 


--------------------------------------------------------------------------------
/BaseClasses/SDPPolicy.py:
--------------------------------------------------------------------------------
 1 | from copy import copy
 2 | from abc import ABC, abstractmethod
 3 | import pandas as pd
 4 | from . import SDPModel
 5 | 
 6 | 
 7 | class SDPPolicy(ABC):
 8 |     def __init__(self, model: SDPModel, policy_name: str = ""):
 9 |         self.model = model
10 |         self.policy_name = policy_name
11 |         self.results = pd.DataFrame()
12 |         self.performance = pd.NA
13 | 
14 |     @abstractmethod
15 |     def get_decision(self, state, t, T):
16 |         """
17 |         Returns the decision made by the policy based on the given state.
18 | 
19 |         Args:
20 |             state (namedtuple): The current state of the system.
21 |             t (float): The current time step.
22 |             T (float): The end of the time horizon / total number of time steps.
23 | 
24 |         Returns:
25 |             dict: The decision made by the policy.
26 |         """
27 |         pass
28 | 
29 |     def run_policy(self, n_iterations: int = 1):
30 |         """
31 |         Runs the policy over the time horizon [0,T] for a specified number of iterations and return the mean performance.
32 | 
33 |         Args:
34 |             n_iterations (int): The number of iterations to run the policy. Default is 1.
35 | 
36 |         Returns:
37 |             None
38 |         """
39 |         result_list = []
40 |         # Note: the random number generator is not reset when calling copy().
41 |         # When calling deepcopy(), it is reset (then all iterations are exactly the same).
42 |         for i in range(n_iterations):
43 |             model_copy = copy(self.model)
44 |             model_copy.episode_counter = i
45 |             model_copy.reset(reset_prng=False)
46 |             state_t_plus_1 = None
47 |             while model_copy.is_finished() is False:
48 |                 state_t = model_copy.state
49 |                 decision_t = model_copy.build_decision(self.get_decision(state_t, model_copy.t, model_copy.T))
50 | 
51 |                 # Logging
52 |                 results_dict = {"N": i, "t": model_copy.t, "C_t sum": model_copy.objective}
53 |                 results_dict.update(state_t._asdict())
54 |                 results_dict.update(decision_t._asdict())
55 |                 result_list.append(results_dict)
56 | 
57 |                 state_t_plus_1 = model_copy.step(decision_t)
58 | 
59 |             results_dict = {"N": i, "t": model_copy.t, "C_t sum": model_copy.objective}
60 |             if state_t_plus_1 is not None:
61 |                 results_dict.update(state_t_plus_1._asdict())
62 |             result_list.append(results_dict)
63 | 
64 |         # Logging
65 |         self.results = pd.DataFrame.from_dict(result_list)
66 |         # t_end per iteration
67 |         self.results["t_end"] = self.results.groupby("N")["t"].transform("max")
68 | 
69 |         # performance of one iteration is the cumulative objective at t_end
70 |         self.performance = self.results.loc[self.results["t"] == self.results["t_end"], ["N", "C_t sum"]]
71 |         self.performance = self.performance.set_index("N")
72 | 
73 |         # For reporting, convert cumulative objective to contribution per time
74 |         self.results["C_t"] = self.results.groupby("N")["C_t sum"].diff().shift(-1)
75 | 
76 |         if self.results["C_t sum"].isna().sum() > 0:
77 |             print(f"Warning! For {self.results['C_t sum'].isna().sum()} iterations the performance was NaN.")
78 | 
79 |         return self.performance.mean().iloc[0]
80 | 


--------------------------------------------------------------------------------
/BaseClasses/Util.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from itertools import product
 3 | from copy import deepcopy
 4 | from . import SDPPolicy
 5 | 
 6 | 
 7 | def grid_search(grid: dict, policy: SDPPolicy.SDPPolicy, n_iterations: int, ordered: bool = False):
 8 |     if len(grid) != 2 and ordered:
 9 |         ordered = False
10 |         print("Warning: Grid search for ordered parameters only works if there are exactly two parameters.")
11 |     best_performance = 0.0
12 |     best_parameters = None
13 |     rows = []
14 |     params = grid.keys()
15 |     for v in product(*grid.values()):
16 |         if ordered:
17 |             if v[0] >= v[1]:
18 |                 continue
19 | 
20 |         # Do a deep copy so all parameter sets get the same random numbers
21 |         policy_copy = deepcopy(policy)
22 | 
23 |         for param, value in zip(params, v):
24 |             setattr(policy_copy, param, value)
25 | 
26 |         performance = policy_copy.run_policy(n_iterations=n_iterations)
27 | 
28 |         row = dict(zip(params, v))
29 |         row["performance"] = performance
30 |         rows.append(row)
31 |         if performance > best_performance:
32 |             best_performance = performance
33 |             best_parameters = dict(zip(params, v))
34 | 
35 |     return {
36 |         "best_parameters": best_parameters,
37 |         "best_performance": best_performance,
38 |         "all_runs": pd.DataFrame(rows),
39 |     }
40 | 


--------------------------------------------------------------------------------
/BloodManagement/BloodManagementModel.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from collections import (namedtuple, defaultdict)
  3 | 
  4 | class Model():
  5 |     """
  6 |     Base class for model
  7 |     """
  8 | 
  9 |     def __init__(self, state_names, x_names, init_state, Bld_Net,params):
 10 |         
 11 |         self.params = params
 12 |         self.init_state = init_state
 13 |         self.state_names = state_names
 14 |         self.x_names = x_names
 15 |         self.State = namedtuple('State', state_names)
 16 |         self.state = self.build_state(init_state)
 17 |         self.Decision = namedtuple('Decision', x_names)
 18 |         self.obj = 0.0
 19 |         self.Bld_Net = Bld_Net
 20 |         self.bld_inv = init_state['BloodInventory']
 21 |         self.demand = init_state['Demand']
 22 |         self.donation = init_state['Donation']
 23 |         
 24 |         # include initial inventory into the network
 25 |         for i in range(self.params['NUM_BLD_NODES']):
 26 |             self.Bld_Net.bloodamount[i] = self.bld_inv[i]
 27 |         
 28 |         # include initial demand into the network
 29 |         for i in range(self.params['NUM_DEM_NODES']):
 30 |             self.Bld_Net.demandamount[i] = self.demand[i]
 31 | 
 32 |         
 33 |     def build_state(self, info):
 34 |         return self.State(*[info[k] for k in self.state_names])
 35 | 
 36 |     def build_decision(self, info):
 37 |         return self.Decision(*[info[k] for k in self.x_names])
 38 |     
 39 |     # exogenous information = demand from t-1 to t and new donated blood
 40 |     def exog_info_fn(self, exog_info):
 41 |         self.demand  = exog_info.demand
 42 |         # update the demand nodes
 43 |         for i in range(self.params['NUM_DEM_NODES']):
 44 |             self.Bld_Net.demandamount[i] = exog_info.demand[i]
 45 |         # save the donation vector to the model
 46 |         self.donation = exog_info.donation
 47 |         return exog_info
 48 |     
 49 |     def transition_fn(self, decision):
 50 |         # iterate through hold vector
 51 |         hold = decision[0]
 52 |         for i in range(self.params['NUM_BLD_NODES']):
 53 |             self.Bld_Net.holdamount[i] = hold[i]
 54 |             
 55 |         
 56 |         rev_don = list(reversed(self.donation))
 57 |         rev_hld = list(reversed(self.Bld_Net.holdamount))
 58 |         # age the blood at hold node and add in the donations
 59 |         
 60 |         for i in range(self.params['NUM_BLD_NODES']):
 61 |             if (i % self.params['MAX_AGE'] == self.params['MAX_AGE']-1):
 62 |                 # add donation
 63 |                 rev_hld[i] = rev_don[i // self.params['MAX_AGE']]
 64 |             else:
 65 |                 # age
 66 |                 rev_hld[i] = rev_hld[i+1]
 67 |         
 68 |         rev_hld = list(reversed(rev_hld))
 69 |         # amount at blood node = amount at hold node
 70 |         for i in range(self.params['NUM_BLD_NODES']):
 71 |             self.Bld_Net.bloodamount[i] = rev_hld[i]  
 72 |         
 73 |         # updating obj value       
 74 |         self.obj += decision[1] 
 75 |         
 76 |         # update current state
 77 |         self.bld_inv = self.Bld_Net.bloodamount
 78 |         return self.state
 79 | 
 80 |     def objective_fn(self):
 81 |         return self.obj
 82 |     
 83 | ########################################################################################################
 84 |     
 85 | class Exog_Info():
 86 |     def __init__(self, demand, donation):
 87 |         # list consisting of blood demand objects
 88 |         self.demand = demand
 89 |         # list consisting of blood unit objects donated to the blood inventory
 90 |         self.donation = donation
 91 | 
 92 | 
 93 | # function to generate random exogenous information dependent on blood type and time t   
 94 | def generate_exog_info_by_bloodtype(t, Bld_Net, params):
 95 |     # demand
 96 |     demand= []
 97 |     if (t in params['TIME_PERIODS_SURGE'] and np.random.uniform(0, 1) < params['SURGE_PROB']):
 98 |         factor = params['SURGE_FACTOR']
 99 |     else:
100 |         factor = 0
101 |     demand = [round(np.random.uniform(0, params['MAX_DEM_BY_BLOOD'][dmd[0]]*params['SURGERYTYPES_PROP'][dmd[1]]*params['SUBSTITUTION_PROP'][dmd[2]])) + factor*int(np.random.poisson(params['MAX_DEM_BY_BLOOD'][dmd[0]]*params['SURGERYTYPES_PROP'][dmd[1]]*params['SUBSTITUTION_PROP'][dmd[2]]))  for dmd in Bld_Net.demandnodes]    
102 |     
103 |     # donation
104 |     donation = [round(np.random.uniform(0, params['MAX_DON_BY_BLOOD'][i])) for i in params['Bloodtypes']]    
105 |     return Exog_Info(demand, donation)
106 | 
107 | 
108 | # function to generate random exogenous information dependent on blood type and time t 
109 | def generate_exog_info_by_bloodtype_p(t, Bld_Net, params):
110 |     # demand
111 |     if (t in params['TIME_PERIODS_SURGE'] and np.random.uniform(0, 1) < params['SURGE_PROB']):
112 |         factor = params['SURGE_FACTOR']
113 |     else:
114 |         factor = 1
115 |     
116 |     demand = [int(np.random.poisson(factor*params['MAX_DEM_BY_BLOOD'][dmd[0]]*params['SURGERYTYPES_PROP'][dmd[1]]*params['SUBSTITUTION_PROP'][dmd[2]])) for dmd in Bld_Net.demandnodes]   
117 | 
118 |     if False:
119 |         demand=[]
120 |         for dmd in Bld_Net.demandnodes: 
121 |             if dmd[0]=="O-":
122 |                 if dmd[1]=="Urgent":
123 |                     demand.append(1)
124 |                 else:
125 |                     eleDem=max(0,int(np.random.poisson(factor*params['MAX_DEM_BY_BLOOD'][dmd[0]]-1))-1)
126 |                     demand.append(eleDem)
127 | 
128 |             else:
129 |                 demand.append(int(np.random.poisson(factor*params['MAX_DEM_BY_BLOOD'][dmd[0]]*params['SURGERYTYPES_PROP'][dmd[1]]*params['SUBSTITUTION_PROP'][dmd[2]])))  
130 | 
131 | 
132 |     
133 |     #donation
134 |     donation = [int(np.random.poisson(params['MAX_DON_BY_BLOOD'][i])) for i in params['Bloodtypes']]    
135 |     
136 |     return Exog_Info(demand, donation)
137 | 
138 | 
139 | 
140 | ##########################################################################################################
141 | # function to calculate one step contribution 
142 | def contribution(params,bloodnode, demandnode):
143 |     
144 |     # if substutition is not allowed
145 |     if (demandnode[2] == False and bloodnode[0] != demandnode[0]) or (demandnode[2] == True and params['SubMatrix'][(bloodnode[0], demandnode[0])] == False):
146 |         value=params['INFEASIABLE_SUBSTITUTION_PENALTY']
147 |     else:
148 |         # start giving a bonus depending on the age of the blood
149 |         #value = params['AGE_BONUS'][int(bloodnode[1])]
150 |         value=0
151 |         # no substitution
152 |         if bloodnode[0] == demandnode[0]:
153 |             value += params['NO_SUBSTITUTION_BONUS']
154 |         # filling urgent demand
155 |         if demandnode[1] == 'Urgent':
156 |             value += params['URGENT_DEMAND_BONUS']
157 |         # filling elective demand
158 |         else:
159 |             value += params['ELECTIVE_DEMAND_BONUS']
160 | 
161 |         if demandnode[1] == 'Elective':
162 |             value += params['BLOOD_FOR_ELECTIVE_PENALTY']
163 |    
164 |     
165 | 
166 |     return(value)                  


--------------------------------------------------------------------------------
/BloodManagement/BloodManagementNetwork.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from collections import (namedtuple, defaultdict)
  3 | 
  4 | from BloodManagementModel import contribution
  5 | 
  6 | 
  7 | class Graph:
  8 |     def __init__(self):
  9 |         self.bloodnodes = list()
 10 |         self.bloodamount = []
 11 |         
 12 |         self.demandnodes = list()
 13 |         self.demandamount = []
 14 |         self.demcontrib = {}
 15 |         
 16 |         self.demedges = defaultdict(list)
 17 |         self.demweights = {}
 18 |         
 19 |         
 20 |         
 21 |         self.supersink = None
 22 |         
 23 |         self.holdnodes = list()
 24 |         self.holdamount = []
 25 |         self.holdedges = defaultdict(list)
 26 |         self.holdweights = {}
 27 |         self.holdvbar = []
 28 |         
 29 |         self.parallelarr = {}
 30 |         self.varr = {}
 31 |         
 32 |         self.sqGrad = {} #this will store the sum of the squared gradients when using AdaGrad stepsizes.
 33 |         
 34 |     
 35 |     # supersink_node
 36 |     def add_supersinknode(self, name):
 37 |         self.supersink = name
 38 |         self.amount[name] = 0
 39 |     
 40 |     # node of type (bloodtype, age) for current blood inventory
 41 |     def add_bloodnode(self, name):
 42 |         self.bloodnodes.append(name)
 43 |         self.bloodamount.append(0)
 44 |     
 45 |     # node - (bloodtype, age)
 46 |     def add_demandnode(self, name):
 47 |         self.demandnodes.append(name)
 48 |         self.demandamount.append(0)
 49 |     
 50 |     # node - (bloodtype, age)
 51 |     def add_holdnode(self, name):
 52 |         self.holdnodes.append(name)
 53 |         self.holdamount.append(0)
 54 |     
 55 |     # create an edge between two nodes
 56 |     def add_demedge(self, from_node, to_node, weight):
 57 |         self.demedges[from_node].append(to_node)
 58 |         self.demweights[(from_node, to_node)] = weight
 59 |         
 60 |     # create an edge between two nodes
 61 |     def add_holdedge(self, from_node, to_node, weight):
 62 |         self.holdedges[from_node].append(to_node)
 63 |         self.holdweights[(from_node, to_node)] = weight 
 64 |     
 65 |     def add_parallel(self, t, from_node, to_node, parallelarray):
 66 |         self.parallelarr[(t, from_node, to_node)] =  parallelarray
 67 |             
 68 |     def add_varr(self, t, from_node, to_node, varr):
 69 |         self.varr[(t, from_node, to_node)] = varr
 70 |       
 71 |     def add_demcontribArr(self, bldnode,demcontribArr):
 72 |         self.demcontrib[bldnode] = demcontribArr
 73 |     
 74 |     def add_sqGradArr(self, t, bldnode,sqGradArr):
 75 |         self.sqGrad[(t,bldnode)] = sqGradArr
 76 | 
 77 | 
 78 | def create_bld_net(params):
 79 |     # create the network
 80 |     Bl_Net = Graph()
 81 |     Bl_Net.supersink = ('supersink', np.inf)
 82 |     # (BloodUnit, Age) pairs and respective hold nodes
 83 |     for i in params['Bloodtypes']:
 84 |         for j in params['Ages']:
 85 |             Bl_Net.add_bloodnode((i, str(j)))
 86 |             Bl_Net.add_holdnode((i, str(j)))
 87 | 
 88 |     # all possible demand nodes
 89 |     for i in params['Bloodtypes']:
 90 |         for j in params['Surgerytypes']:
 91 |             for k in params['Substitution']:
 92 |                 Bl_Net.add_demandnode((i, j, k))
 93 | 
 94 |     #add edges from (bloodunit, age) pairs to suitable demand nodes
 95 |     for bld in Bl_Net.bloodnodes:
 96 |         for dmd in Bl_Net.demandnodes:
 97 |             weight = contribution(params,bld, dmd)
 98 |             Bl_Net.add_demedge(bld, dmd, weight)
 99 |     
100 |     for bld in Bl_Net.bloodnodes:
101 |         demcontribArr = [contribution(params,bld, dmd) for dmd in Bl_Net.demandnodes]
102 |         Bl_Net.add_demcontribArr(bld,demcontribArr)
103 | 
104 | 
105 |     # add edges from blood nodes to hold nodes
106 |     for bld in Bl_Net.bloodnodes:
107 |         for hld in Bl_Net.holdnodes:
108 |             if bld[0] == hld[0] and bld[1] == hld[1]:
109 |                 Bl_Net.add_holdedge(bld, hld, 0)
110 | 
111 |     # add parallel edges from hold nodes to supersink
112 |     for t in params['Times']:
113 |         for hld in Bl_Net.holdnodes:
114 |             parArr = np.zeros(params['NUM_PARALLEL_LINKS'])
115 |             vArr = np.zeros(params['NUM_PARALLEL_LINKS'])
116 |             Bl_Net.add_parallel(t, hld, Bl_Net.supersink, parArr)
117 |             Bl_Net.add_varr(t, hld, Bl_Net.supersink, vArr)
118 |             
119 |             sqGradArr = np.zeros(params['NUM_PARALLEL_LINKS'])
120 |             Bl_Net.add_sqGradArr(t, hld, sqGradArr)
121 |             
122 |             
123 |             
124 |             
125 |     return(Bl_Net)
126 | 
127 | 


--------------------------------------------------------------------------------
/BloodManagement/BloodManagementPolicy.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import cvxopt
  4 | from collections import (namedtuple, defaultdict)
  5 | 
  6 | 
  7 | def initLPMatrices(params,Bld_Net):
  8 |     #Initializing the matrix for the LP
  9 |     A = np.zeros((params['NUM_BLD_NODES'], params['NUM_BLD_NODES']*(params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS'])))
 10 |     for i in range(params['NUM_BLD_NODES']):
 11 |         for j in range(params['NUM_BLD_NODES']*(params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS'])):
 12 |             if (j < (i+1)*(params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS'])) and (j >= i*(params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS'])):
 13 |                 #Checking for feasibility
 14 |                 k=j-i*(params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS'])
 15 |                 if (k<params['NUM_DEM_NODES']):
 16 |                     bloodnode = Bld_Net.bloodnodes[i]
 17 |                     demandnode = Bld_Net.demandnodes[k]
 18 |                     if (demandnode[2] == False and bloodnode[0] == demandnode[0]) or (demandnode[2] == True and params['SubMatrix'][(bloodnode[0], demandnode[0])] == True):
 19 |                         A[i,j] = 1
 20 |                 else:   
 21 |                     A[i,j] = 1    
 22 | 
 23 | 
 24 |     G = np.zeros((params['NUM_DEM_NODES'] + 2*params['NUM_BLD_NODES']*params['NUM_PARALLEL_LINKS'] + params['NUM_DEM_NODES']*params['NUM_BLD_NODES'], params['NUM_BLD_NODES']*(params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS'])))
 25 |     # ineq constr for sum x_tbd < D_td
 26 |     for i in range(params['NUM_DEM_NODES']):
 27 |         for j in range(params['NUM_BLD_NODES']*(params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS'])):
 28 |             if (j % (params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS']) == i):
 29 |                 G[i,j] = 1.
 30 |     # ineq constr for parallel links <= SLOPE_CAPAC
 31 |     for i in range(params['NUM_BLD_NODES']):
 32 |         for j in range(params['NUM_PARALLEL_LINKS']):
 33 |             G[params['NUM_DEM_NODES'] + i*params['NUM_PARALLEL_LINKS'] + j, (params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS'])*i + params['NUM_DEM_NODES'] + j] = 1
 34 | 
 35 |     # ineq constr for x_tbd >= 0
 36 |     for i in range(params['NUM_BLD_NODES']):
 37 |         for j in range(params['NUM_DEM_NODES']):
 38 |             G[params['NUM_DEM_NODES'] + params['NUM_BLD_NODES']*params['NUM_PARALLEL_LINKS'] + i*params['NUM_DEM_NODES'] + j, (params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS'])*i + j] = -1
 39 | 
 40 | 
 41 |     # ineq constr for x_parallel >= 0
 42 |     for i in range(params['NUM_BLD_NODES']):
 43 |         for j in range(params['NUM_PARALLEL_LINKS']):
 44 |             G[params['NUM_DEM_NODES'] + params['NUM_BLD_NODES']*params['NUM_PARALLEL_LINKS'] + params['NUM_DEM_NODES']*params['NUM_BLD_NODES'] + i*params['NUM_PARALLEL_LINKS'] + j,(params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS'])*i + params['NUM_DEM_NODES'] + j] = -1
 45 | 
 46 | 
 47 |     h = np.ones(params['NUM_DEM_NODES'] + params['NUM_BLD_NODES']*params['NUM_PARALLEL_LINKS'])
 48 |     h[params['NUM_DEM_NODES']::params['NUM_PARALLEL_LINKS']]= params['SLOPE_CAPAC_LAST']
 49 |     h = np.append(h, np.zeros(params['NUM_BLD_NODES']*params['NUM_DEM_NODES'] + params['NUM_BLD_NODES']*params['NUM_PARALLEL_LINKS']))
 50 |         
 51 |     A = cvxopt.matrix(A)
 52 |     G = cvxopt.matrix(G)
 53 | 
 54 |     coeff = [np.concatenate((np.array(Bld_Net.demcontrib[bld]),np.zeros(params['NUM_PARALLEL_LINKS']))) if int(bld[1])< params['MAX_AGE']-1 else np.concatenate((np.array(Bld_Net.demcontrib[bld]),np.add(np.zeros(params['NUM_PARALLEL_LINKS']),params['DISCARD_BLOOD_PENALTY'])))  for bld in Bld_Net.bloodnodes]
 55 |     coeff = [ai for a in coeff for ai in a ]    
 56 |     coeff = np.array(coeff)
 57 | 
 58 |     return (A,G,h,coeff)
 59 | 
 60 | class Policy():
 61 |     """
 62 |     Base class for Static Stochastic Shortest Path Model policy
 63 |     """
 64 | 
 65 |     def __init__(self,params,Bld_Net):
 66 |         """
 67 |         Initializes the policy
 68 |         """
 69 | 
 70 |         self.A,self.G,self.h,self.coeff = initLPMatrices(params,Bld_Net)
 71 |     
 72 | 
 73 |     def getLPSol(self,params,M,iteration,t,solDemList,solHoldList,IS_TRAINING):
 74 |        
 75 |         c_t = [np.concatenate((np.multiply(np.array(M.Bld_Net.demcontrib[bld]),-1),np.multiply(M.Bld_Net.parallelarr[(t, bld, M.Bld_Net.supersink)],-params['DISCOUNT_FACTOR']))) if int(bld[1])< params['MAX_AGE']-1 else np.concatenate((np.multiply(np.array(M.Bld_Net.demcontrib[bld]),-1),np.add(np.multiply(M.Bld_Net.parallelarr[(t, bld, M.Bld_Net.supersink)],-params['DISCOUNT_FACTOR']),-params['DISCARD_BLOOD_PENALTY'])))  for bld in M.Bld_Net.bloodnodes]
 76 |         c = [ai for a in c_t for ai in a ]
 77 |         b = np.array(M.Bld_Net.bloodamount)
 78 |         self.h[:params['NUM_DEM_NODES']] = M.Bld_Net.demandamount
 79 | 
 80 |         
 81 |         c = cvxopt.matrix(c)
 82 |         b = cvxopt.matrix(b,size=(params['NUM_BLD_NODES'],1),tc='d')
 83 |         h = cvxopt.matrix(self.h)
 84 |         
 85 |         cvxopt.solvers.options['show_progress'] = False
 86 |         sol = cvxopt.solvers.lp(c, self.G, h, self.A, b,solver='glpk',options={'glpk':{'msg_lev':'GLP_MSG_OFF'}}) 
 87 |         #sol = cvxopt.solvers.lp(c, self.G, h, self.A, b) 
 88 | 
 89 |         x = sol['x']
 90 |            
 91 |         x = np.array(x)
 92 |         x = np.squeeze(x)
 93 |         
 94 |         val = np.dot(x, self.coeff)
 95 |         
 96 |         
 97 |         
 98 |         xDem = [x[i*(params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS']):i*(params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS'])+params['NUM_DEM_NODES']] for i in list(range(params['NUM_BLD_NODES']))]
 99 |         xDemFlat = [xij for xi in xDem for xij in xi]
100 |         solDemRec=(iteration,t,xDem.copy())
101 |         solDemList.append(solDemRec)
102 |         
103 |         hld=[np.sum(x[i*(params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS'])+params['NUM_DEM_NODES']:(i+1)*(params['NUM_DEM_NODES']+params['NUM_PARALLEL_LINKS'])]) for i in list(range(params['NUM_BLD_NODES']))]
104 |         solHoldRecord = (iteration,t,hld.copy())
105 |         solHoldList.append(solHoldRecord)
106 |         hld = np.array(hld)
107 |         
108 |         invByBlood = [np.sum(M.bld_inv[i*params['MAX_AGE']:(i+1)*params['MAX_AGE']]) for i in list(range(len(params['Bloodtypes']))) ]
109 |         demByBlood = [np.sum(M.Bld_Net.demandamount[i*(len(params['Surgerytypes'])*len(params['Substitution'])):(i+1)*(len(params['Surgerytypes'])*len(params['Substitution']))]) for i in list(range(len(params['Bloodtypes']))) ]
110 |         
111 |         xDemFlat = [xij for xi in xDem for xij in xi]
112 |         xDemMat = np.array(xDemFlat).reshape(params['NUM_BLD_NODES'],params['NUM_DEM_NODES'])
113 |         xDemMatColSum =  xDemMat.sum(axis=0)   
114 |         covByBlood = [ np.sum(xDemMatColSum[i*(len(params['Surgerytypes'])*len(params['Substitution'])):(i+1)*(len(params['Surgerytypes'])*len(params['Substitution']))]) for i in list(range(len(params['Bloodtypes']))) ]
115 |         covByBlood = np.array(covByBlood).astype(int)
116 |         
117 |         hldByBlood = [int(np.sum(hld[i*params['MAX_AGE']:(i+1)*params['MAX_AGE']])) for i in list(range(len(params['Bloodtypes']))) ]
118 |         disByBlood = hld[params['MAX_AGE']-1::params['MAX_AGE']]
119 |         disByBlood = np.array(disByBlood)
120 |         disByBlood = disByBlood.astype(int)
121 |         
122 |         if False:
123 |             print('Iteration = ', iteration)
124 |             print('Time period = ', t)
125 |             print('Demand = ', np.sum(M.Bld_Net.demandamount))
126 |             print('Supply = ', np.sum(M.Bld_Net.bloodamount))
127 |             print('Blood Used = ', np.sum(M.bld_inv) - np.sum(hld))
128 |             print('Blood Held = ', np.sum(hld))
129 |             print('Inventory by BloodType ',invByBlood)
130 |             print('Demand By BloodType ',demByBlood)
131 |             print('Used By BloodType ', list(covByBlood))
132 |             print('Hold By BloodType ',hldByBlood)
133 |             print('Discard By BloodType ', list(disByBlood))
134 |             print('Contribution = ', val)   
135 |             print('Donation = ', np.sum(M.donation)) 
136 |             print('\n')
137 |         
138 |         hld = hld.astype(int)
139 |         
140 |         if IS_TRAINING and params['IS_PERTUB']:
141 |             epsilon = PERTUB_GEN.poisson(LAMBDA_PERTUB, params['NUM_BLD_NODES']) 
142 |             signE = PERTUB_GEN.choice([-1,1], size=params['NUM_BLD_NODES'], replace=True, p=None)
143 |             hld = hld+epsilon*signE
144 |             hld = np.maximum(np.zeros(params['NUM_BLD_NODES']),hld)
145 |             hld = hld.astype(int)
146 |     
147 |         # dual variables
148 |         d = sol['y']
149 |                
150 |         return sol,val,x,hld,d,solDemList,solHoldList
151 |     
152 |     def updateVFAs(self,params,M,iteration,t,d, slopesList,updateVfaList):
153 |         alpha = 0
154 | 
155 |         # set the dual variables to respective parallel arcs
156 |         for i in range(params['NUM_BLD_NODES']):
157 |             # put the value of the dual varible d[i+1] in the parallel arc, associated 
158 |             # with the amount of resource in the inventory associated with holdnode[i]
159 |             # the holdnodes with the oldest age do not get updated 
160 |             
161 |             recordSlopes = (iteration,t,M.Bld_Net.parallelarr[(t, M.Bld_Net.holdnodes[i], M.Bld_Net.supersink)].copy())
162 |             slopesList.append(recordSlopes)
163 |         
164 |             index = M.bld_inv[i]
165 |             if index>=0:
166 |                 if (t>0 and M.Bld_Net.holdnodes[i][1]<str(params['MAX_AGE']-1)):
167 |                     vhat=d[i+1]
168 |                     
169 |                     if index >= params['NUM_PARALLEL_LINKS'] - 1:
170 |                         index = params['NUM_PARALLEL_LINKS'] - 1
171 |                         
172 |                     arr = M.Bld_Net.varr[(t-1,M.Bld_Net.holdnodes[i], M.Bld_Net.supersink)]
173 |                     sqGradArr = M.Bld_Net.sqGrad[(t-1,M.Bld_Net.holdnodes[i])]
174 |                     
175 | 
176 |                     if iteration < params['NUM_ITER_STEP_ONE']:
177 |                             alpha = 1
178 |                     else:
179 |                         if (params['STEPSIZE_RULE'] == 'C'):
180 |                             alpha = params['ALPHA']
181 |                         elif (params['STEPSIZE_RULE'] == 'A'):
182 |                             sqGradArr[index] += np.power(vhat-arr[index],2) 
183 |                             alpha = params['ETA']/(np.sqrt(sqGradArr[index]+params['STEP_EPS']))
184 |                    
185 |                     vbar = arr[index]
186 |                     vnew = alpha*vhat +(1-alpha)*vbar
187 |                     arr[index] = vnew
188 |                     
189 |                     recordUpdateVfa = (iteration,t-1,M.Bld_Net.holdnodes[i][0],M.Bld_Net.holdnodes[i][1],index,vhat,vbar,sqGradArr[index],alpha,vnew)
190 |                     updateVfaList.append(recordUpdateVfa)
191 | 
192 |                     #Projecting back in case the vfa is not concave anymore
193 |                     if (vnew>vbar): #Look to the left
194 |                         indSetL=[i for i in list(range(0,index+1)) if arr[i]<=vnew]
195 |                         if (len(indSetL)>0):
196 |                             if params['PROJECTION_ALGO'] == 'Avg':
197 |                                 avg = np.mean(arr[indSetL])
198 |                                 arr[indSetL]=avg
199 |                             elif params['PROJECTION_ALGO'] == 'Copy':
200 |                                 arr[indSetL]=vnew 
201 |                             else:
202 |                                 if index > 0:
203 |                                     j=index-1
204 |                                     while (j>=0 and arr[j] < arr[j+1]):
205 |                                         arr[j]= alpha*vhat +(1-alpha)*arr[j]
206 |                                         j-=1
207 |                                 else:
208 |                                     arr[index]=vnew 
209 | 
210 | 
211 |                             
212 |                     elif (vnew<vbar): #Look to the right
213 |                         indSetR=[i for i in list(range(index,params['NUM_PARALLEL_LINKS'])) if arr[i]>=vnew]
214 |                         if (len(indSetR)>0):
215 |                             if params['PROJECTION_ALGO'] == 'Avg':
216 |                                 avg = np.mean(arr[indSetR])
217 |                                 arr[indSetR]=avg
218 |                             elif params['PROJECTION_ALGO'] == 'Copy':   
219 |                                 arr[indSetR]=vnew
220 |                             else:
221 |                                 if index < params['NUM_PARALLEL_LINKS']-1:
222 |                                     j=index+1
223 |                                     while (j<params['NUM_PARALLEL_LINKS'] and arr[j] > arr[j-1]):
224 |                                         arr[j] = alpha*vhat +(1-alpha)*arr[j]
225 |                                         j+=1
226 |                                 else:
227 |                                     arr[index]=vnew 
228 | 
229 |         return alpha,slopesList,updateVfaList
230 | 
231 | 


--------------------------------------------------------------------------------
/BloodManagement/OutputAll.txt:
--------------------------------------------------------------------------------
 1 | Instance	AvgContrib	UrgentCoverage	ElectiveCoverage	AvgCoverage	DiscardProportion	Utility
 2 | 
 3 | PolicyMYOPIC_SURGE_0.5_PEN_-4.0_ALPHA_0.00	21447.15	0.91	0.69	0.81	0.00	978.12
 4 | PolicyMYOPIC_SURGE_0.5_PEN_-9.0_ALPHA_0.00	20447.25	0.92	0.64	0.79	0.00	981.20
 5 | PolicyMYOPIC_SURGE_0.5_PEN_-14.0_ALPHA_0.00	23924.20	0.97	0.25	0.61	0.03	981.18
 6 | PolicyMYOPIC_SURGE_0.5_PEN_-14.0_ALPHA_0.00	23924.20	0.97	0.25	0.61	0.03	981.18
 7 | PolicyMYOPIC_SURGE_0.5_PEN_-14.0_ALPHA_0.00	23924.20	0.97	0.25	0.61	0.03	981.18
 8 | PolicyMYOPIC_SURGE_0.5_PEN_-14.0_ALPHA_0.00	23924.20	0.97	0.25	0.61	0.03	981.18
 9 | PolicyMYOPIC_SURGE_0.5_PEN_-14.0_ALPHA_0.00	23924.20	0.97	0.25	0.61	0.03	985.89
10 | PolicyMYOPIC_SURGE_0.5_PEN_-14.0_ALPHA_0.00	23924.20	0.97	0.25	0.61	0.03	986.00
11 | PolicyMYOPIC_SURGE_0.5_PEN_-9.0_ALPHA_0.00	20447.25	0.92	0.64	0.79	0.00	984.00
12 | 


--------------------------------------------------------------------------------
/BloodManagement/Parameters.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/djanka2/stochastic-optimization/8b654e10480705901a6e48e869e2e0a779e97933/BloodManagement/Parameters.xlsx


--------------------------------------------------------------------------------
/ClinicalTrials/ClinicalTrialsDriverScript.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Clinical Trials Driver Script class
 3 | 
 4 | Raluca Cobzaru (c) 2018
 5 | 
 6 | """
 7 | 
 8 | from collections import namedtuple
 9 | import numpy as np
10 | import scipy
11 | import pandas as pd
12 | from ClinicalTrialsModel import ClinicalTrialsModel
13 | from ClinicalTrialsPolicy import ClinicalTrialsPolicy
14 | import matplotlib.pyplot as plt
15 | import time
16 | 
17 | if __name__ == "__main__":
18 | 	time_total = time.time()
19 | 	np.random.seed(2345678173)
20 | 	# initializes a policy object and a model object, then runs the policy on the model
21 | 	policy_names = ['model_A', 'model_B', 'model_C', 'model_C_extension']
22 | 	state_names = ['potential_pop', 'success', 'failure', 'l_response']
23 | 	# extracts data from given data set; defines initial state
24 | 	file = 'Parameters.xlsx'
25 | 	raw_data = pd.ExcelFile(file)
26 | 	data = raw_data.parse('Parameters')
27 | 	initial_state = {'potential_pop': float(data.iat[0, 0]),
28 | 					 'success': data.iat[1, 0],
29 | 					  'failure': float(data.iat[2, 0]),
30 | 					  'l_response': float(data.iat[3, 0]),
31 | 					  'theta_stop_low': data.iat[4, 0],
32 | 					  'theta_stop_high': data.iat[5, 0],
33 | 					  'alpha': data.iat[6, 0],
34 | 					  'K': int(data.iat[7, 0]),
35 | 					  'N': int(data.iat[8, 0]),
36 | 					  'trial_size': int(data.iat[9, 0]),
37 | 					  'patient_cost': data.iat[10, 0],
38 | 					  'program_cost': data.iat[11, 0],
39 | 					  'success_rev': data.iat[12, 0],
40 | 					  'sampling_size': int(data.iat[13, 0]),
41 | 					  'enroll_min': int(data.iat[14, 0]),
42 | 					  'enroll_max': int(data.iat[15, 0]),
43 | 					  'enroll_step': int(data.iat[16, 0]),
44 | 					  'H': int(data.iat[17, 0]),
45 | 					  'true_l_response': data.iat[18, 0],
46 | 					  'true_succ_rate': data.iat[19, 0]}
47 | 	model_name = data.iat[20, 0]
48 | 	numIterations = int(data.iat[21,0])
49 | 	
50 | 	decision_names = ['enroll', 'prog_continue', 'drug_success']
51 | 	
52 | 	#########################################################################
53 | 	#HINT!!!!! Insert the loop here for questions 4 and 5
54 | 	M = ClinicalTrialsModel(state_names, decision_names, initial_state, False)
55 | 	P = ClinicalTrialsPolicy(M, policy_names)
56 | 	t = 0
57 | 	stop = False
58 | 	policy_info = {'model_A': [-1, stop],
59 | 					'model_B': [-1, stop],
60 | 					'model_C': [-1, stop],
61 | 					'model_C_extension': [-1, stop]}
62 | 
63 | 	policy_value=P.run_policy(policy_info, model_name, t)
64 | 	#End HINT!
65 | 	#######################################################################
66 | 
67 | 	
68 | 	
69 | 
70 | 	print("Total elapsed time {:.2f} secs".format(time.time()-time_total))
71 | 	
72 | 	pass


--------------------------------------------------------------------------------
/ClinicalTrials/ClinicalTrialsDriverScriptSolutionQ4.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Clinical Trials Driver Script class
 3 | 
 4 | Raluca Cobzaru (c) 2018
 5 | 
 6 | """
 7 | 
 8 | from collections import namedtuple
 9 | import numpy as np
10 | import scipy
11 | import pandas as pd
12 | from ClinicalTrialsModel import ClinicalTrialsModel
13 | from ClinicalTrialsPolicy import ClinicalTrialsPolicy
14 | import matplotlib.pyplot as plt
15 | import time
16 | 
17 | if __name__ == "__main__":
18 | 	time_total = time.time()
19 | 	np.random.seed(2345678173)
20 | 	# initializes a policy object and a model object, then runs the policy on the model
21 | 	policy_names = ['model_A', 'model_B', 'model_C', 'model_C_extension']
22 | 	state_names = ['potential_pop', 'success', 'failure', 'l_response']
23 | 	# extracts data from given data set; defines initial state
24 | 	file = 'Parameters.xlsx'
25 | 	raw_data = pd.ExcelFile(file)
26 | 	data = raw_data.parse('Parameters')
27 | 	initial_state = {'potential_pop': float(data.iat[0, 0]),
28 | 					 'success': data.iat[1, 0],
29 | 					  'failure': float(data.iat[2, 0]),
30 | 					  'l_response': float(data.iat[3, 0]),
31 | 					  'theta_stop_low': data.iat[4, 0],
32 | 					  'theta_stop_high': data.iat[5, 0],
33 | 					  'alpha': data.iat[6, 0],
34 | 					  'K': int(data.iat[7, 0]),
35 | 					  'N': int(data.iat[8, 0]),
36 | 					  'trial_size': int(data.iat[9, 0]),
37 | 					  'patient_cost': data.iat[10, 0],
38 | 					  'program_cost': data.iat[11, 0],
39 | 					  'success_rev': data.iat[12, 0],
40 | 					  'sampling_size': int(data.iat[13, 0]),
41 | 					  'enroll_min': int(data.iat[14, 0]),
42 | 					  'enroll_max': int(data.iat[15, 0]),
43 | 					  'enroll_step': int(data.iat[16, 0]),
44 | 					  'H': int(data.iat[17, 0]),
45 | 					  'true_l_response': data.iat[18, 0],
46 | 					  'true_succ_rate': data.iat[19, 0]}
47 | 	model_name = data.iat[20, 0]
48 | 	numIterations = int(data.iat[21,0])
49 | 	
50 | 	decision_names = ['enroll', 'prog_continue', 'drug_success']
51 | 	
52 | 	############################################################
53 | 	#Solution Q4
54 | 	avg_policy_value = 0
55 | 	for i in range(0,numIterations):
56 | 
57 | 		M = ClinicalTrialsModel(state_names, decision_names, initial_state, False)
58 | 		P = ClinicalTrialsPolicy(M, policy_names)
59 | 		t = 0
60 | 		stop = False
61 | 		policy_info = {'model_A': [-1, stop],
62 | 						'model_B': [-1, stop],
63 | 						'model_C': [-1, stop],
64 | 						'model_C_extension': [-1, stop]}
65 | 		policy_value = P.run_policy(policy_info, model_name, t)
66 | 		avg_policy_value += policy_value
67 | 		print("Finished run policy for iteration {} - Value: {} and Avg_value: {:,}".format(i,policy_value,avg_policy_value/(i+1)))
68 | 	avg_policy_value = avg_policy_value/numIterations
69 | 	print("Average values after {} iterations is {:,}".format(numIterations,avg_policy_value))
70 | 	#End Solution Q4
71 | 	############################################################
72 | 
73 | 	
74 | 	
75 | 
76 | 	
77 | 
78 | 	print("Total elapsed time {:.2f} secs".format(time.time()-time_total))
79 | 	
80 | 	pass


--------------------------------------------------------------------------------
/ClinicalTrials/ClinicalTrialsDriverScriptSolutionQ5.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Clinical Trials Driver Script class
 3 | 
 4 | Raluca Cobzaru (c) 2018
 5 | 
 6 | """
 7 | 
 8 | from collections import namedtuple
 9 | import numpy as np
10 | import scipy
11 | import pandas as pd
12 | from ClinicalTrialsModel import ClinicalTrialsModel
13 | from ClinicalTrialsPolicy import ClinicalTrialsPolicy
14 | import matplotlib.pyplot as plt
15 | import time
16 | 
17 | if __name__ == "__main__":
18 | 	time_total = time.time()
19 | 	np.random.seed(2345678173)
20 | 	# initializes a policy object and a model object, then runs the policy on the model
21 | 	policy_names = ['model_A', 'model_B', 'model_C', 'model_C_extension']
22 | 	state_names = ['potential_pop', 'success', 'failure', 'l_response']
23 | 	# extracts data from given data set; defines initial state
24 | 	file = 'Parameters.xlsx'
25 | 	raw_data = pd.ExcelFile(file)
26 | 	data = raw_data.parse('Parameters')
27 | 	initial_state = {'potential_pop': float(data.iat[0, 0]),
28 | 					 'success': data.iat[1, 0],
29 | 					  'failure': float(data.iat[2, 0]),
30 | 					  'l_response': float(data.iat[3, 0]),
31 | 					  'theta_stop_low': data.iat[4, 0],
32 | 					  'theta_stop_high': data.iat[5, 0],
33 | 					  'alpha': data.iat[6, 0],
34 | 					  'K': int(data.iat[7, 0]),
35 | 					  'N': int(data.iat[8, 0]),
36 | 					  'trial_size': int(data.iat[9, 0]),
37 | 					  'patient_cost': data.iat[10, 0],
38 | 					  'program_cost': data.iat[11, 0],
39 | 					  'success_rev': data.iat[12, 0],
40 | 					  'sampling_size': int(data.iat[13, 0]),
41 | 					  'enroll_min': int(data.iat[14, 0]),
42 | 					  'enroll_max': int(data.iat[15, 0]),
43 | 					  'enroll_step': int(data.iat[16, 0]),
44 | 					  'H': int(data.iat[17, 0]),
45 | 					  'true_l_response': data.iat[18, 0],
46 | 					  'true_succ_rate': data.iat[19, 0]}
47 | 	model_name = data.iat[20, 0]
48 | 	numIterations = int(data.iat[21,0])
49 | 	
50 | 	decision_names = ['enroll', 'prog_continue', 'drug_success']
51 | 	
52 | 	################################################################
53 | 	#Solution Q5
54 | 	theta_list = list(np.arange(.77,.79,0.005))
55 | 	theta_avg=[]
56 | 	for theta in theta_list:
57 | 		initial_state.update({'theta_stop_low':theta})
58 | 		avg_policy_value = 0
59 | 		for i in range(0,numIterations):
60 | 
61 | 			M = ClinicalTrialsModel(state_names, decision_names, initial_state, False)
62 | 			P = ClinicalTrialsPolicy(M, policy_names)
63 | 			t = 0
64 | 			stop = False
65 | 			policy_info = {'model_A': [-1, stop],
66 | 							'model_B': [-1, stop],
67 | 							'model_C': [-1, stop],
68 | 							'model_C_extension': [-1, stop]}
69 | 			policy_value = P.run_policy(policy_info, model_name, t)
70 | 			avg_policy_value += policy_value
71 | 			print("Finished run policy for iteration {} - Value: {} and Avg_value: {:,}".format(i,policy_value,avg_policy_value/(i+1)))
72 | 
73 | 		avg_policy_value = avg_policy_value/numIterations
74 | 		print("Theta {} - Average values after {} iterations is {:,}".format(initial_state['theta_stop_low'],numIterations,avg_policy_value))
75 | 		theta_avg.append(avg_policy_value)
76 | 
77 | 	print(theta_list)
78 | 	print(theta_avg)
79 | 	plt.plot(theta_list,theta_avg,'bo')
80 | 	plt.show()
81 | 	#End Solution Q5
82 | 	###############################################################
83 | 	
84 | 
85 | 	
86 | 
87 | 	print("Total elapsed time {:.2f} secs".format(time.time()-time_total))
88 | 	
89 | 	pass


--------------------------------------------------------------------------------
/ClinicalTrials/ClinicalTrialsDriverScriptSolutionQ6.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Clinical Trials Driver Script class
 3 | 
 4 | Raluca Cobzaru (c) 2018
 5 | 
 6 | """
 7 | 
 8 | from collections import namedtuple
 9 | import numpy as np
10 | import scipy
11 | import pandas as pd
12 | from ClinicalTrialsModel import ClinicalTrialsModel
13 | from ClinicalTrialsPolicySolutionQ6 import ClinicalTrialsPolicy
14 | import matplotlib.pyplot as plt
15 | import time
16 | 
17 | if __name__ == "__main__":
18 | 	time_total = time.time()
19 | 	np.random.seed(2345678173)
20 | 	# initializes a policy object and a model object, then runs the policy on the model
21 | 	policy_names = ['model_A', 'model_B', 'model_C', 'model_C_extension']
22 | 	state_names = ['potential_pop', 'success', 'failure', 'l_response']
23 | 	# extracts data from given data set; defines initial state
24 | 	file = 'Parameters.xlsx'
25 | 	raw_data = pd.ExcelFile(file)
26 | 	data = raw_data.parse('Parameters')
27 | 	initial_state = {'potential_pop': float(data.iat[0, 0]),
28 | 					 'success': data.iat[1, 0],
29 | 					  'failure': float(data.iat[2, 0]),
30 | 					  'l_response': float(data.iat[3, 0]),
31 | 					  'theta_stop_low': data.iat[4, 0],
32 | 					  'theta_stop_high': data.iat[5, 0],
33 | 					  'alpha': data.iat[6, 0],
34 | 					  'K': int(data.iat[7, 0]),
35 | 					  'N': int(data.iat[8, 0]),
36 | 					  'trial_size': int(data.iat[9, 0]),
37 | 					  'patient_cost': data.iat[10, 0],
38 | 					  'program_cost': data.iat[11, 0],
39 | 					  'success_rev': data.iat[12, 0],
40 | 					  'sampling_size': int(data.iat[13, 0]),
41 | 					  'enroll_min': int(data.iat[14, 0]),
42 | 					  'enroll_max': int(data.iat[15, 0]),
43 | 					  'enroll_step': int(data.iat[16, 0]),
44 | 					  'H': int(data.iat[17, 0]),
45 | 					  'true_l_response': data.iat[18, 0],
46 | 					  'true_succ_rate': data.iat[19, 0]}
47 | 	model_name = data.iat[20, 0]
48 | 	numIterations = int(data.iat[21,0])
49 | 	
50 | 	decision_names = ['enroll', 'prog_continue', 'drug_success']
51 | 	
52 | 
53 | 	M = ClinicalTrialsModel(state_names, decision_names, initial_state, False)
54 | 	P = ClinicalTrialsPolicy(M, policy_names)
55 | 	t = 0
56 | 	stop = False
57 | 	policy_info = {'model_A': [-1, stop],
58 | 					'model_B': [-1, stop],
59 | 					'model_C': [-1, stop],
60 | 					'model_C_extension': [-1, stop]}
61 | 
62 | 	policy_value=P.run_policy(policy_info, model_name, t)
63 | 
64 | 
65 | 	
66 | 
67 | 	
68 | 
69 | 	
70 | 
71 | 	print("Total elapsed time {:.2f} secs".format(time.time()-time_total))
72 | 	
73 | 	pass


--------------------------------------------------------------------------------
/ClinicalTrials/ClinicalTrialsModel.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Clinical Trials Model class
  3 | 
  4 | Raluca Cobzaru (c) 2018
  5 | Adapted from code by Donghun Lee (c) 2018
  6 | 
  7 | """
  8 | from collections import namedtuple
  9 | import numpy as np
 10 | from scipy.stats import binom
 11 | import math
 12 | import pandas as pd
 13 | 
 14 | def trunc_poisson_fn(count, mean):
 15 | 	"""
 16 | 	returns list of truncated Poisson distribution with given mean and values count
 17 | 
 18 | 	:param count: int - maximal value considered by the distribution
 19 | 	:param mean: float - mean of Poisson distribution
 20 | 	:return list(float) - vector of truncated Poisson pmfs 
 21 | 	"""
 22 | 	trunc_probs = []
 23 | 	sum = 0.0
 24 | 	for r in range(0, count):
 25 | 		trunc_probs.insert(r, 1/math.factorial(r)*(mean**r)*np.exp(-mean))
 26 | 		sum += trunc_probs[r]
 27 | 	trunc_probs.insert(count, 1-sum)
 28 | 	return trunc_probs
 29 | 
 30 | def mc_success_fn(count, mean, samples, N, K):
 31 | 	"""
 32 | 	simulates enrollment and success process using Monte Carlo sampling
 33 | 	
 34 | 	:param count: int - count of new potential patients
 35 | 	:param mean: float - mean of the truncated Poisson distribution
 36 | 	:param samples: list(float) - samples for the true success rate (assuming sampled distribution)
 37 | 	:param N: int - number of Monte Carlo samples
 38 | 	:param K: int - number of samples for the true success rate
 39 | 	:return: dict - number of enrollments and successes
 40 | 	"""
 41 | 	enrollment_samples = []
 42 | 	success_samples = []
 43 | 	trunc_probs = trunc_poisson_fn(count, mean)
 44 | 	# simulates enrollment process using truncated Poisson probabilities
 45 | 	for n in range(N):
 46 | 		success_samples.append(0)
 47 | 		MC_r_sample = np.random.choice(range(count+1), size=None, replace=True, p=trunc_probs)
 48 | 		enrollment_samples.append(MC_r_sample)
 49 | 		MC_prob_sample = samples[np.random.randint(0, K)]
 50 | 		# simulates success count using sample probability
 51 | 		for k in range(0, MC_r_sample):
 52 | 			bernoulli_sim = np.random.uniform(0, 1)
 53 | 			if bernoulli_sim < MC_prob_sample:
 54 | 				success_samples[n] += 1
 55 | 	# uniformly chooses enrollments and successes from MC samples
 56 | 	enrolled = np.random.choice(enrollment_samples)
 57 | 	return {"mc_enroll": enrolled, 
 58 | 			"mc_success": success_samples[enrollment_samples.index(enrolled)]}
 59 | 
 60 | class ClinicalTrialsModel():
 61 | 	"""
 62 | 	Base class for model
 63 | 	"""
 64 | 
 65 | 	def __init__(self, state_variables, decision_variables, s_0, simulation, exog_info_fn=None, transition_fn=None,
 66 | 				 objective_fn=None, seed=20180529):
 67 | 		"""
 68 | 		Initializes the model
 69 | 
 70 | 		:param state_variables: list(str) - state variable dimension names
 71 | 		:param decision_variables: list(str) - decision variable dimension names
 72 | 		:param s_0: dict - needs to contain at least the information to populate initial state using state_names
 73 | 		:param simulation: bool - if True, simulates exogenous data; if False, uses data from given dataset
 74 | 		:param exog_info_fn: function - calculates relevant exogenous information
 75 | 		:param transition_fn: function - takes in decision variables and exogenous information to describe how the state
 76 | 			   evolves
 77 | 		:param objective_fn: function - calculates contribution at time t
 78 | 		:param seed: int - seed for random number generator
 79 | 		"""
 80 | 
 81 | 		self.init_args = {seed: seed}
 82 | 		self.prng = np.random.RandomState(seed)
 83 | 		self.initial_state = s_0
 84 | 		self.state_variables = state_variables
 85 | 		self.State = namedtuple('State', state_variables)
 86 | 		self.state = self.build_state(s_0)
 87 | 		self.simulation = simulation
 88 | 		self.decision_variables = decision_variables
 89 | 		self.Decision = namedtuple('Decision', decision_variables)
 90 | 		self.objective = 0.0
 91 | 
 92 | 	def build_state(self, info):
 93 | 		"""
 94 |         returns a state containing all the given state information
 95 | 		
 96 |         :param info: dict - all state information
 97 |         :return: namedtuple - a state object
 98 |         """
 99 | 		return self.State(*[info[k] for k in self.state_variables])
100 | 
101 | 	def build_decision(self, info):
102 | 		"""
103 |         returns a decision containing all the given deicison information
104 |         
105 | 		:param info: dict - all decision info
106 |         :return: namedtuple - a decision object
107 |         """
108 | 		return self.Decision(*[info[k] for k in self.decision_variables])
109 | 
110 | 
111 | 
112 | 	def exog_info_fn(self, decision):
113 | 		"""
114 |         returns the exogenous information dependent on a random process
115 |         :param decision: int - number of new potential patients
116 | 		:return: dict - new enrollments and the number of successes among them
117 |         """
118 | 		if self.simulation == False:
119 | 			exog_patients = math.floor(np.random.poisson(lam=self.initial_state['true_l_response'] * (self.state.potential_pop + decision.enroll), size=None))
120 | 			exog_succ = math.floor(np.random.binomial(exog_patients, self.initial_state['true_succ_rate'], size=None))
121 | 
122 | 
123 | 			#exog_patients = math.floor(self.initial_state['true_l_response'] * (self.state.potential_pop + decision.enroll))
124 | 			#exog_succ = math.floor(self.initial_state['true_succ_rate'] * exog_patients)
125 | 
126 | 			return {"new_patients": exog_patients,
127 | 					"succ_count": exog_succ}
128 | 		else:
129 | 			r_bar = math.floor(self.state.l_response * (self.state.potential_pop + decision.enroll))
130 | 			# implements new patients and success process using Monte Carlo sampling
131 | 			p_true_samples = np.random.beta(self.state.success, self.state.failure, self.initial_state['K'])
132 | 			MC_samples = mc_success_fn(decision.enroll, r_bar, p_true_samples, self.initial_state['N'], self.initial_state['K'])
133 | 			return {"new_patients": MC_samples['mc_enroll'],
134 | 					"succ_count": MC_samples['mc_success']}
135 | 		
136 | 	
137 | 	def transition_fn(self, decision, exog_info):
138 | 		"""
139 |         updates the state given the decision and exogenous information
140 | 		:param decision: namedtuple - contains all decision info
141 |         :param exog_info: contains all exogenous information
142 |         :return: dict - updated state
143 |         """
144 | 		enroll_pop = decision.prog_continue * (self.state.potential_pop + decision.enroll)
145 | 		new_lambda = (1-self.initial_state['alpha']) * self.state.l_response + self.initial_state['alpha'] * exog_info['new_patients']/(self.state.potential_pop + decision.enroll)
146 | 		new_succ = self.state.success + exog_info['succ_count']
147 | 		new_fail = self.state.failure + (exog_info['new_patients'] - exog_info['succ_count'])
148 | 		return {"potential_pop": enroll_pop,
149 | 				"success": new_succ,
150 | 				"failure": new_fail,
151 | 				"l_response": new_lambda}
152 | 
153 | 	def objective_fn(self, decision):
154 | 		"""
155 |         computes contribution of enrollments
156 |         :param decision: namedtuple - contains all decision info
157 |         :param exog_info: contains all exogenous info
158 |         :return: float - calculated contribution
159 |         """
160 | 		obj_part = (1-decision.prog_continue) * decision.drug_success * self.initial_state['success_rev'] - decision.prog_continue * (self.initial_state['program_cost'] + self.initial_state['patient_cost'] * decision.enroll)
161 | 		return obj_part
162 | 	
163 | 	def step(self, decision):
164 | 		"""
165 |         steps the process forward by one time increment by updating the sum of the contributions, the
166 |         exogenous information, and the state variable
167 |         :param decision: namedtuple - contains all decision info
168 |         :return: none
169 |         """
170 | 		exog_info = self.exog_info_fn(decision)
171 | 		self.objective += self.objective_fn(decision)
172 | 		exog_info.update(self.transition_fn(decision, exog_info))
173 | 		self.state = self.build_state(exog_info)
174 | 		
175 | if __name__ == "__main__":
176 | 	# this is an example of creating a model, using a random policy, and running until the drug is declared a success/failure or 
177 | 	# we reach the maximum number of trials
178 | 	t = 0
179 | 	stop = False
180 | 	# extracts data from given data set; defines initial state
181 | 	file = 'Trials Parameters.xlsx'
182 | 	raw_data = pd.ExcelFile(file)
183 | 	data = raw_data.parse('Exogenous Data')
184 | 	state_variables = ['potential_pop', 'success', 'failure', 'l_response']
185 | 	initial_state = {'potential_pop': float(data.iat[0, 0]),
186 | 					 'success': data.iat[1, 0],
187 | 					  'failure': float(data.iat[2, 0]),
188 | 					  'l_response': float(data.iat[3, 0]),
189 | 					  'theta_stop_low': data.iat[4, 0],
190 | 					  'theta_stop_high': data.iat[5, 0],
191 | 					  'alpha': data.iat[6, 0],
192 | 					  'K': int(data.iat[7, 0]),
193 | 					  'N': int(data.iat[8, 0]),
194 | 					  'trial_size': int(data.iat[9, 0]),
195 | 					  'patient_cost': data.iat[10, 0],
196 | 					  'program_cost': data.iat[11, 0],
197 | 					  'success_rev': data.iat[12, 0],
198 | 					  'sampling_size': int(data.iat[13, 0]),
199 | 					  'enroll_min': int(data.iat[14, 0]),
200 | 					  'enroll_max': int(data.iat[15, 0]),
201 | 					  'enroll_step': int(data.iat[16, 0]),
202 | 					  'H': int(data.iat[17, 0]),
203 | 					  'true_l_response': data.iat[18, 0],
204 | 					  'true_succ_rate': data.iat[19, 0]}
205 | 	decision_variables = ['enroll', 'prog_continue', 'drug_success']
206 | 	M = ClinicalTrialsModel(state_variables, decision_variables, initial_state, False)
207 | 	
208 | 	while t <= initial_state['trial_size'] and stop == False:
209 | 		p_belief = M.state.success / (M.state.success + M.state.failure)
210 | 		# drug_success = 1 if successful, 0 if failure, -1 if continue trial
211 | 		if p_belief > initial_state['theta_stop_high']:
212 | 			decision = {'prog_continue': 0, 'drug_success': 1}
213 | 			stop = True
214 | 		elif p_belief < initial_state['theta_stop_low']:
215 | 			decision = {'prog_continue': 0, 'drug_success': 0}
216 | 			stop = True
217 | 		else:
218 | 			decision = {'prog_continue': 1, 'drug_success': -1}
219 | 		decision['enroll'] = np.random.choice(range(initial_state['enroll_min'], initial_state['enroll_max']+initial_state['enroll_step'], initial_state['enroll_step'])) if stop == False else 0
220 | 		x = M.build_decision(decision)
221 | 		print("t={}, obj={}, state.potential_pop={}, state.success={}, state.failure={}, x={}".format(t, M.objective, M.state.potential_pop, M.state.success, M.state.failure, x))
222 | 		M.step(x)
223 | 		t += 1
224 | 		
225 | 	print("\nStopping state: ")			
226 | 	print("t={}, obj={}, state.potential_pop={}, state.success={}, state.failure={}, x={}".format(t, M.objective, M.state.potential_pop, M.state.success, M.state.failure, x))
227 | 	
228 | 	pass


--------------------------------------------------------------------------------
/ClinicalTrials/Parameters.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/djanka2/stochastic-optimization/8b654e10480705901a6e48e869e2e0a779e97933/ClinicalTrials/Parameters.xlsx


--------------------------------------------------------------------------------
/EnergyStorage_I/BackwardDP.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Backward dynamic programming class
  3 | """
  4 | from EnergyStorageModel import EnergyStorageModel as ESM
  5 | import numpy as np
  6 | import pandas as pd
  7 | from bisect import bisect
  8 | import matplotlib.pyplot as plt
  9 | import math
 10 | import time
 11 | from collections import namedtuple,defaultdict
 12 | 
 13 | class BDP():
 14 |     """
 15 |     Base class to implement backward dynamic programming
 16 |     """
 17 | 
 18 |     def __init__(self, discrete_prices, discrete_energy, price_changes, discrete_price_changes,
 19 |                  f_p, stop_time, model):
 20 |         """
 21 |         Initializes the model
 22 | 
 23 |         :param discrete_prices: list - list of discretized prices
 24 |         :param discrete_energy: list - list of discretized energy amounts
 25 |         :param price_changes: list - list of price changes
 26 |         :param discrete_price_changes: list - list of discretized price changes
 27 |         :param f_p: ndarray - contains f(p) values
 28 |         :param stop_time: int - time at which loop terminates
 29 |         :param model: energy storage model
 30 | 
 31 |         """
 32 |         self.discrete_energy = discrete_energy
 33 |         self.discrete_prices = discrete_prices
 34 |         self.price_changes = price_changes
 35 |         self.discrete_price_changes = discrete_price_changes
 36 |         self.f_p = f_p
 37 |         self.time = stop_time - 1
 38 |         self.model = model
 39 |         self.terminal_contribution = 0
 40 |         self.values_dict = None #this will store the vfas - it will be computed by the method bellman_2D or bellman_3D
 41 | 
 42 |     
 43 | 
 44 |     def state_transition(self, state, decision, exog_info):
 45 |         """
 46 |         this function tells us what state we transition to if we are in some state and make a decision
 47 |         (restricted to states in possible_states)
 48 | 
 49 |         :param state: namedtuple - the state of the model at a given time
 50 |         :param decision: namedtuple - contains all decision info
 51 |         :param exog_info: any exogenous info
 52 |         :return: new state object
 53 |         """
 54 | 
 55 | 
 56 |         new_energy = state.energy_amount + (self.model.init_args['eta'] * decision.buy) - decision.sell
 57 |         adjusted_new_energy = math.ceil(new_energy)
 58 | 
 59 | 
 60 |         if len(state) == 2:
 61 |             new_price = state.price + exog_info
 62 |         elif len(state) == 3:
 63 |             new_price = 0.5*state.prev_price + 0.5*state.price + exog_info
 64 |         
 65 |         if new_price <= min(self.discrete_prices):
 66 |             adjusted_new_price = min(self.discrete_prices)
 67 |         elif new_price >= max(self.discrete_prices):
 68 |             adjusted_new_price = max(self.discrete_prices)
 69 |         else:
 70 |             index = bisect(self.discrete_prices, new_price)
 71 |             adjusted_new_price = self.discrete_prices[index]
 72 | 
 73 | 
 74 |         if len(state) == 2:
 75 |             new_state = self.model.build_state({'energy_amount': adjusted_new_energy, 'price': adjusted_new_price})
 76 |         
 77 |         elif len(state) == 3:
 78 |             prev_price = state.price
 79 |             if prev_price <= min(self.discrete_prices):
 80 |                 adjusted_prev_price = min(self.discrete_prices)
 81 |             elif prev_price >= max(self.discrete_prices):
 82 |                 adjusted_prev_price = max(self.discrete_prices)
 83 |             else:
 84 |                 index = bisect(self.discrete_prices, prev_price)
 85 |                 adjusted_prev_price = self.discrete_prices[index]
 86 |             
 87 |             new_state = self.model.build_state({'energy_amount': adjusted_new_energy,
 88 |                                             'price': adjusted_new_price,
 89 |                                             'prev_price': adjusted_prev_price})
 90 | 
 91 |         
 92 |         return new_state
 93 | 
 94 |     def bellman(self):
 95 |         """
 96 |         this function computes the value function using Bellman's equation for a 2D state variable
 97 | 
 98 |         :return: list - list of contribution values
 99 |         """
100 | 
101 |         # make list of all possible 2D states using discretized prices and discretized energy values
102 |         
103 |         self.possible_states = []
104 |         if len(self.model.state_variable) == 2:
105 |             for price in self.discrete_prices:
106 |                 for energy in self.discrete_energy:
107 |                     state = self.model.build_state({'energy_amount': energy,'price': price})
108 |                     self.possible_states.append(state)
109 |         else:
110 |             for p in self.discrete_prices:
111 |                 for prev_p in self.discrete_prices:
112 |                     for energy in self.discrete_energy:
113 |                         state = self.model.build_state({'energy_amount': energy,'price': p,'prev_price': prev_p})
114 |                         self.possible_states.append(state)
115 | 
116 |         print("State dimension: {}. State space size: {}. Exogenous info size: {}".format(len(self.model.state_variable),len(self.possible_states),len(self.discrete_price_changes)))
117 | 
118 | 
119 |         time = self.time
120 |         values = defaultdict(dict)
121 | 
122 |         while time != -1:
123 |             max_list = {}
124 |             for state in self.possible_states:
125 |                 price = state.price
126 |                 energy = state.energy_amount
127 |                 v_list = []
128 |                 for d in self.model.possible_decisions:
129 |                     x = self.model.build_decision(d, energy)
130 |                     contribution = price * (self.model.init_args['eta']*x.sell - x.buy)
131 |                     sum_w = 0
132 |                     w_index = 0
133 |                     for w in self.discrete_price_changes:
134 |                         f = self.f_p[w_index] if w_index == 0 else self.f_p[w_index] - self.f_p[w_index - 1]
135 |                         next_state = self.state_transition(state, x, w)
136 |                         next_v = values[time + 1][next_state] if time < self.time \
137 |                             else self.terminal_contribution
138 |                         sum_w += f * next_v
139 |                         w_index += 1
140 |                     
141 |                     v = contribution + sum_w
142 |                     v_list.append(v)
143 | 
144 |                 max_value = max(v_list)
145 |                 decList=["Buy","Sell","Hold"]
146 |                 #print("Time: {} State: price={:.2f}, energy={:.2f} - Buy: {:.2f} Sell: {:.2f} Hold: {:.2f} - Max_value {:.2f} - maxDec {} ".format(time,price, energy,v_list[0],v_list[1],v_list[2],max_value,decList[v_list.index(max(v_list))]))
147 |                 max_list.update({state: max_value})
148 |             values[time]=max_list
149 |             time -= 1
150 |         pass
151 |         
152 |         self.values_dict=values
153 |         return values
154 | 
155 |     
156 | 


--------------------------------------------------------------------------------
/EnergyStorage_I/EnergyStorageDriverScript.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Energy storage driver script
  3 | 
  4 | """
  5 | import time
  6 | from collections import namedtuple
  7 | import pandas as pd
  8 | import numpy as np
  9 | from EnergyStorageModel import EnergyStorageModel as ESM
 10 | from EnergyStoragePolicy import EnergyStoragePolicy
 11 | from BackwardDP import BDP
 12 | import matplotlib.pyplot as plt
 13 | from copy import copy
 14 | from scipy.ndimage.interpolation import shift
 15 | import pickle
 16 | from bisect import bisect
 17 | 
 18 | 
 19 | def process_raw_price_data(file, params):
 20 |     DISC_TYPE = "FROM_CUM"
 21 |     # DISC_TYPE = "OTHER"
 22 | 
 23 |     print(
 24 |         "Processing raw price data. Constructing price change list and cdf using {}".format(
 25 |             DISC_TYPE
 26 |         )
 27 |     )
 28 |     tS = time.time()
 29 | 
 30 |     # load energy price data from the Excel spreadsheet
 31 |     raw_data = pd.read_excel(file, sheet_name="Raw Data")
 32 | 
 33 |     # look at data spanning a week
 34 |     data_selection = raw_data.iloc[0 : params["T"], 0:5]
 35 | 
 36 |     # rename columns to remove spaces (otherwise we can't access them)
 37 |     cols = data_selection.columns
 38 |     cols = cols.map(lambda x: x.replace(" ", "_") if isinstance(x, str) else x)
 39 |     data_selection.columns = cols
 40 | 
 41 |     # sort prices in ascending order
 42 |     sort_by_price = data_selection.sort_values("PJM_RT_LMP")
 43 |     # print(sort_by_price.head())
 44 | 
 45 |     hist_price = np.array(data_selection["PJM_RT_LMP"].tolist())
 46 |     # print(hist_price[0])
 47 | 
 48 |     max_price = sort_by_price["PJM_RT_LMP"].max()
 49 |     min_price = sort_by_price["PJM_RT_LMP"].min()
 50 |     print("Min price {:.2f} and Max price {:.2f}".format(min_price, max_price))
 51 | 
 52 |     # sort prices in ascending order
 53 |     sort_by_price = data_selection.sort_values("PJM_RT_LMP")
 54 | 
 55 |     # calculate change in price and sort values of change in price in ascending order
 56 |     data_selection["Price_Shift"] = data_selection.PJM_RT_LMP.shift(1)
 57 |     data_selection["Price_Change"] = (
 58 |         data_selection["PJM_RT_LMP"] - data_selection["Price_Shift"]
 59 |     )
 60 |     sort_price_change = data_selection.sort_values("Price_Change")
 61 | 
 62 |     # discretize change in price and obtain f(p) for each price change
 63 |     max_price_change = sort_price_change["Price_Change"].max()
 64 |     min_price_change = sort_price_change["Price_Change"].min()
 65 |     print(
 66 |         "Min price change {:.2f} and Max price change {:.2f}".format(
 67 |             min_price_change, max_price_change
 68 |         )
 69 |     )
 70 | 
 71 |     # there are 191 values for price change
 72 |     price_changes_sorted = sort_price_change["Price_Change"].tolist()
 73 |     # remove the last NaN value
 74 |     price_changes_sorted.pop()
 75 | 
 76 |     if DISC_TYPE == "FROM_CUM":
 77 |         # discretize price change  by interpolating from cumulative distribution
 78 |         xp = price_changes_sorted
 79 |         fp = np.arange(len(price_changes_sorted) - 1) / (len(price_changes_sorted) - 1)
 80 |         cum_fn = np.append(fp, 1)
 81 | 
 82 |         # obtain 30 discrete prices
 83 |         discrete_price_change_cdf = np.linspace(0, 1, params["nPriceChangeInc"])
 84 |         discrete_price_change_list = []
 85 |         for i in discrete_price_change_cdf:
 86 |             interpolated_point = np.interp(i, cum_fn, xp)
 87 |             discrete_price_change_list.append(interpolated_point)
 88 |     else:
 89 |         price_change_range = max_price_change - min_price_change
 90 |         price_change_increment = price_change_range / params["nPriceChangeInc"]
 91 |         discrete_price_change = np.arange(
 92 |             min_price_change, max_price_change, price_change_increment
 93 |         )
 94 |         discrete_price_change_list = list(
 95 |             np.append(discrete_price_change, max_price_change)
 96 |         )
 97 | 
 98 |         f_p = np.arange(len(price_changes_sorted) - 1) / (len(price_changes_sorted) - 1)
 99 |         cum_fn = np.append(f_p, 1)
100 |         discrete_price_change_cdf = []
101 |         for c in discrete_price_change_list:
102 |             interpolated_point = np.interp(c, price_changes_sorted, cum_fn)
103 |             discrete_price_change_cdf.append(interpolated_point)
104 | 
105 |     price_changes_sorted = np.array(price_changes_sorted)
106 |     discrete_price_change_list = np.array(discrete_price_change_list)
107 |     discrete_price_change_cdf = np.array(discrete_price_change_cdf)
108 |     discrete_price_change_pdf = discrete_price_change_cdf - shift(
109 |         discrete_price_change_cdf, 1, cval=0
110 |     )
111 | 
112 |     mean_price_change = np.dot(discrete_price_change_list, discrete_price_change_pdf)
113 | 
114 |     # print("discrete_price_change_list ",discrete_price_change_list)
115 |     # print("discrete_price_change_cdf",discrete_price_change_cdf)
116 |     # print("discrete_price_change_pdf",discrete_price_change_pdf)
117 | 
118 |     print(
119 |         "Finishing processing raw price data in {:.2f} secs. Expected price change is {:.2f}. Hist_price len is {}".format(
120 |             time.time() - tS, mean_price_change, len(hist_price)
121 |         )
122 |     )
123 |     # input("enter any key to continue...")
124 | 
125 |     exog_params = {
126 |         "hist_price": hist_price,
127 |         "price_changes_sorted": price_changes_sorted,
128 |         "discrete_price_change_list": discrete_price_change_list,
129 |         "discrete_price_change_cdf": discrete_price_change_cdf,
130 |     }
131 | 
132 |     return exog_params
133 | 
134 | 
135 | if __name__ == "__main__":
136 |     file = "Parameters.xlsx"
137 |     seed = 189654913
138 | 
139 |     # Reading the algorithm pars
140 |     parDf = pd.read_excel(file, sheet_name="ParamsModel")
141 |     parDict = parDf.set_index("Index").T.to_dict("list")
142 |     params = {key: v for key, value in parDict.items() for v in value}
143 |     params["seed"] = seed
144 |     params["T"] = min(params["T"], 192)
145 | 
146 |     parDf = pd.read_excel(file, sheet_name="GridSearch")
147 |     parDict = parDf.set_index("Index").T.to_dict("list")
148 |     paramsPolicy = {key: v for key, value in parDict.items() for v in value}
149 |     params.update(paramsPolicy)
150 | 
151 |     parDf = pd.read_excel(file, sheet_name="BackwardDP")
152 |     parDict = parDf.set_index("Index").T.to_dict("list")
153 |     paramsPolicy = {key: v for key, value in parDict.items() for v in value}
154 |     params.update(paramsPolicy)
155 | 
156 |     if isinstance(params["priceDiscSet"], str):
157 |         price_disc_list = params["priceDiscSet"].split(",")
158 |         price_disc_list = [float(e) for e in price_disc_list]
159 |     else:
160 |         price_disc_list = [float(params["priceDiscSet"])]
161 |     params["price_disc_list"] = price_disc_list
162 | 
163 |     print("Parameters ", params)
164 |     # input("enter any key to continue...")
165 | 
166 |     # exog_params  is a dictionary with  three lists: hist_price, price_changes_list, discrete_price_change_cdf
167 |     exog_params = process_raw_price_data(file, params)
168 | 
169 |     # create a model and a policy
170 |     policy_names = ["buy_low_sell_high_policy", "bellman_policy"]
171 |     state_variable = ["price", "energy_amount"]
172 |     initial_state = {
173 |         "price": exog_params["hist_price"][0],
174 |         "energy_amount": params["R0"],
175 |     }
176 |     decision_variable = ["buy", "hold", "sell"]
177 |     possible_decisions = [
178 |         {"buy": 1, "hold": 0, "sell": 0},
179 |         {"buy": 0, "hold": 0, "sell": 1},
180 |         {"buy": 0, "hold": 1, "sell": 0},
181 |     ]
182 |     M = ESM(
183 |         state_variable,
184 |         decision_variable,
185 |         initial_state,
186 |         params,
187 |         exog_params,
188 |         possible_decisions,
189 |     )
190 |     P = EnergyStoragePolicy(M, policy_names)
191 | 
192 |     ##########################################################################
193 |     # GridSearch
194 |     if params["Algorithm"] == "GridSearch":
195 |         # obtain the theta values to carry out a full grid search
196 |         grid_search_theta_values = P.grid_search_theta_values(params)
197 |         print(grid_search_theta_values)
198 |         # input("enter any key to continue...")
199 | 
200 |         # use those theta values to calculate corresponding contribution values
201 |         contribution_values_dict = P.perform_grid_search(
202 |             params, grid_search_theta_values[0]
203 |         )
204 | 
205 |         # plot those contribution values on a heat map, with theta_buy on the horizontal axis and theta_sell on the
206 |         # vertical axis
207 |         P.plot_heat_map(
208 |             contribution_values_dict,
209 |             grid_search_theta_values[1],
210 |             grid_search_theta_values[2],
211 |         )
212 |     ##################################################################################
213 | 
214 |     #################################################################################
215 |     # BackwardDP
216 |     if params["Algorithm"] == "BackwardDP":
217 |         # Constructing the state space
218 |         # make list of possible energy amount stored at a time
219 |         discrete_energy = np.array([0.0, 1.0])
220 | 
221 |         # make list of prices with different increments
222 |         min_price = np.min(exog_params["hist_price"])
223 |         max_price = np.max(exog_params["hist_price"])
224 | 
225 |         for inc in params["price_disc_list"]:
226 |             discrete_prices = np.arange(min_price, max_price + inc, inc)
227 | 
228 |             print("\nStarting BackwardDP 2D")
229 |             test_2D = BDP(
230 |                 discrete_prices,
231 |                 discrete_energy,
232 |                 exog_params["price_changes_sorted"],
233 |                 exog_params["discrete_price_change_list"],
234 |                 exog_params["discrete_price_change_cdf"],
235 |                 params["T"],
236 |                 copy(M),
237 |             )
238 | 
239 |             # 2D states - time the process with a 2D state variable
240 |             t0 = time.time()
241 |             value_dict = test_2D.bellman()
242 |             t1 = time.time()
243 |             time_elapsed = t1 - t0
244 |             print("Time_elapsed_2D_model={:.2f} secs.".format(time_elapsed))
245 | 
246 |             print("Starting policy evaluation for the actual sample path")
247 |             tS = time.time()
248 |             contribution = P.run_policy(test_2D, "bellman_policy", params["T"])
249 |             print(
250 |                 "Contribution using BackwardDP 2D is {:.2f}. Finished in {:.2f}s".format(
251 |                     contribution, time.time() - tS
252 |                 )
253 |             )
254 | 
255 |             if params["run3D"]:
256 |                 print("\nStarting BackwardDP 3D")
257 | 
258 |                 state_variable_3 = ["price", "energy_amount", "prev_price"]
259 | 
260 |                 index = bisect(discrete_prices, exog_params["hist_price"][1])
261 |                 adjusted_p1 = discrete_prices[index]
262 |                 index = bisect(discrete_prices, exog_params["hist_price"][0])
263 |                 adjusted_p0 = discrete_prices[index]
264 |                 initial_state_3 = {
265 |                     "price": adjusted_p1,
266 |                     "energy_amount": params["R0"],
267 |                     "prev_price": adjusted_p0,
268 |                 }
269 | 
270 |                 M3 = ESM(
271 |                     state_variable_3,
272 |                     decision_variable,
273 |                     initial_state_3,
274 |                     params,
275 |                     exog_params,
276 |                     possible_decisions,
277 |                 )
278 |                 P3 = EnergyStoragePolicy(M3, policy_names)
279 | 
280 |                 test_3D = BDP(
281 |                     discrete_prices,
282 |                     discrete_energy,
283 |                     exog_params["price_changes_sorted"],
284 |                     exog_params["discrete_price_change_list"],
285 |                     exog_params["discrete_price_change_cdf"],
286 |                     params["T"],
287 |                     copy(M3),
288 |                 )
289 | 
290 |                 t0 = time.time()
291 |                 value_dict = test_3D.bellman()
292 |                 t1 = time.time()
293 |                 time_elapsed = t1 - t0
294 |                 print("Time_elapsed_3D_model={:.2f} secs.".format(time_elapsed))
295 | 
296 |                 print("Starting policy evaluation for the actual sample path")
297 |                 tS = time.time()
298 |                 contribution = P3.run_policy(test_3D, "bellman_policy", params["T"])
299 |                 print(
300 |                     "Contribution using BackwardDP 3D is {:.2f}. Finished in {:.2f}s".format(
301 |                         contribution, time.time() - tS
302 |                     )
303 |                 )
304 | 
305 |     #########################################################################
306 | 


--------------------------------------------------------------------------------
/EnergyStorage_I/EnergyStorageModel.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Energy storage model class
  3 | Adapted from code by Donghun Lee (c) 2018
  4 | 
  5 | """
  6 | from collections import namedtuple
  7 | import numpy as np
  8 | import pandas as pd
  9 | 
 10 | 
 11 | class EnergyStorageModel:
 12 |     """
 13 |     Base class for energy storage model
 14 |     """
 15 | 
 16 |     def __init__(
 17 |         self,
 18 |         state_variable,
 19 |         decision_variable,
 20 |         state_0,
 21 |         params,
 22 |         exog_params,
 23 |         possible_decisions,
 24 |         exog_info_fn=None,
 25 |         transition_fn=None,
 26 |         objective_fn=None,
 27 |     ):
 28 |         """
 29 |         Initializes the model
 30 | 
 31 |         :param state_variable: list(str) - state variable dimension names
 32 |         :param decision_variable: list(str) - decision variable dimension names
 33 |         :param state_0: dict - contains the information to populate initial state, including eta (the fraction of
 34 |                energy maintained when charging or discharging the battery) and battery capacity
 35 |         :param params: all the parameters including DataFrame (exog_data) containning the price information
 36 |         :param possible_decisions: list - list of possible decisions we could make
 37 |         :param exog_info_fn: function - calculates relevant exogenous information
 38 |         :param transition_fn: function - takes in decision variables and exogenous information to describe how the state
 39 |                evolves
 40 |         :param objective_fn: function - calculates contribution at time t
 41 |         """
 42 | 
 43 |         self.init_args = params
 44 |         self.prng = np.random.RandomState(params["seed"])
 45 |         self.exog_params = exog_params
 46 | 
 47 |         self.initial_state = state_0
 48 |         self.state_variable = state_variable
 49 |         self.decision_variable = decision_variable
 50 | 
 51 |         self.possible_decisions = possible_decisions
 52 |         self.State = namedtuple("State", state_variable)
 53 |         self.state = self.build_state(self.initial_state)
 54 |         self.Decision = namedtuple("Decision", decision_variable)
 55 |         self.objective = 0.0
 56 | 
 57 |         # This will keep a list of states visited
 58 |         self.states = [self.state]
 59 | 
 60 |     def reset(self):
 61 |         self.objective = 0.0
 62 |         self.state = self.build_state(self.initial_state)
 63 |         self.states = [self.state]
 64 | 
 65 |     def build_state(self, info):
 66 |         """
 67 |         this function returns a state containing all the state information needed
 68 | 
 69 |         :param info: dict - contains all state information
 70 |         :return: namedtuple - a state object
 71 |         """
 72 |         return self.State(*[info[k] for k in self.state_variable])
 73 | 
 74 |     def build_decision(self, info, energy_amount):
 75 |         """
 76 |         this function returns a decision
 77 | 
 78 |         :param info: dict - contains all decision info
 79 |         :param energy_amount: float - amount of energy
 80 |         :return: namedtuple - a decision object
 81 | 
 82 |         """
 83 |         info_copy = {"buy": 0, "hold": 0, "sell": 0}
 84 |         # the amount of power that can be bought or sold is limited by constraints
 85 |         for k in self.decision_variable:
 86 |             if k == "buy" and info[k] > 0:
 87 |                 info_copy[k] = (
 88 |                     self.init_args["Rmax"] - energy_amount
 89 |                 ) / self.init_args["eta"]
 90 |             elif k == "sell" and info[k] > energy_amount:
 91 |                 info_copy[k] = energy_amount
 92 |             else:
 93 |                 info_copy[k] = info[k]
 94 |         return self.Decision(*[info_copy[k] for k in self.decision_variable])
 95 | 
 96 |     def exog_info_fn(self, time):
 97 |         next_price = self.exog_params["hist_price"][time]
 98 | 
 99 |         return next_price
100 | 
101 |     def transition_fn(self, time, decision):
102 |         """
103 |         this function takes in the decision and exogenous information to update the state
104 | 
105 |         :param time: int - time at which the state is at
106 |         :param decision: namedtuple - contains all decision info
107 |         :return: updated state
108 |         """
109 |         new_price = self.exog_info_fn(time)
110 |         new_energy_amount = (
111 |             self.state.energy_amount
112 |             + (self.init_args["eta"] * decision.buy)
113 |             - decision.sell
114 |         )
115 | 
116 |         if len(self.state_variable) == 2:
117 |             state = self.build_state(
118 |                 {"energy_amount": new_energy_amount, "price": new_price}
119 |             )
120 | 
121 |         elif len(self.state_variable) == 3:
122 |             state = self.build_state(
123 |                 {
124 |                     "energy_amount": new_energy_amount,
125 |                     "price": new_price,
126 |                     "prev_price": self.state.price,
127 |                 }
128 |             )
129 | 
130 |         return state
131 | 
132 |     def objective_fn(self, decision):
133 |         """
134 |         this function calculates the contribution, which depends on the decision and the price
135 | 
136 |         :param decision: namedtuple - contains all decision info
137 |         :return: float - calculated contribution
138 |         """
139 |         obj_part = self.state.price * (
140 |             self.init_args["eta"] * decision.sell - decision.buy
141 |         )
142 |         return obj_part
143 | 
144 |     def step(self, time, decision):
145 |         """
146 |         this function steps the process forward by one time increment by updating the sum of the contributions
147 |         and the state variable
148 | 
149 |         :param time: int - time at which the state is at
150 |         :param decision: decision: namedtuple - contains all decision info
151 |         :return: none
152 |         """
153 |         self.objective += self.objective_fn(decision)
154 |         self.state = self.transition_fn(time, decision)
155 |         self.states.append(self.state)
156 | 


--------------------------------------------------------------------------------
/EnergyStorage_I/EnergyStoragePolicy.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Energy storage policy class
  3 | 
  4 | """
  5 | from collections import namedtuple
  6 | import pandas as pd
  7 | import numpy as np
  8 | from EnergyStorageModel import EnergyStorageModel as ESM
  9 | import matplotlib.pyplot as plt
 10 | from copy import copy
 11 | import time
 12 | 
 13 | 
 14 | class EnergyStoragePolicy:
 15 |     """
 16 |     Base class for decision policy
 17 |     """
 18 | 
 19 |     def __init__(self, model, policy_names):
 20 |         """
 21 |         Initializes the policy
 22 | 
 23 |         :param model: EnergyStorageModel - the model that the policy is being implemented on
 24 |         :param policy_names: list(str) - list of policies
 25 |         """
 26 | 
 27 |         self.model = model
 28 |         self.policy_names = policy_names
 29 |         self.Policy = namedtuple("Policy", policy_names)
 30 | 
 31 |     def buy_low_sell_high_policy(self, time, state, theta):
 32 |         """
 33 |         this function implements the buy low, sell high policy for the ESM
 34 | 
 35 |         :param state: namedtuple - the state of the model at a given time
 36 |         :param theta: tuple - contains the parameters needed to run the policy
 37 |         :return: a decision made based on the policy
 38 |         """
 39 |         lower_limit = theta[0]
 40 |         upper_limit = theta[1]
 41 |         if state.price <= lower_limit:
 42 |             new_decision = self.model.possible_decisions[0]
 43 |         elif state.price >= upper_limit:
 44 |             new_decision = self.model.possible_decisions[1]
 45 |         else:
 46 |             new_decision = self.model.possible_decisions[2]
 47 |         return new_decision
 48 | 
 49 |     def bellman_policy(self, time, state, bellman_model):
 50 |         price = state.price
 51 |         energy = state.energy_amount
 52 | 
 53 |         maxValue = -np.inf
 54 |         maxDec = None
 55 |         for d in self.model.possible_decisions:
 56 |             x = self.model.build_decision(d, energy)
 57 |             contribution = price * (x.sell - x.buy)
 58 | 
 59 |             sum_w = 0
 60 |             w_index = 0
 61 |             for w in bellman_model.discrete_price_changes:
 62 |                 f = (
 63 |                     bellman_model.f_p[w_index]
 64 |                     if w_index == 0
 65 |                     else bellman_model.f_p[w_index] - bellman_model.f_p[w_index - 1]
 66 |                 )
 67 |                 next_state = bellman_model.state_transition(state, x, w)
 68 |                 next_v = (
 69 |                     bellman_model.values_dict[time + 1][next_state]
 70 |                     if time < bellman_model.time
 71 |                     else bellman_model.terminal_contribution
 72 |                 )
 73 |                 sum_w += f * next_v
 74 | 
 75 |                 w_index += 1
 76 |             # print("w_index={}".format(w_index))
 77 |             v = contribution + sum_w
 78 |             if v > maxValue:
 79 |                 maxValue = v
 80 |                 maxDec = d
 81 |         return maxDec
 82 | 
 83 |     def run_policy(self, policy_info, policy, stop_time):
 84 |         """
 85 |         this function runs the model with a selected policy
 86 | 
 87 |         :param policy_info: dict - dictionary of policies and their associated parameters
 88 |         :param policy: str - the name of the chosen policy
 89 |         :param stop_time: float - stop time
 90 |         :return: float - calculated contribution
 91 |         """
 92 |         time = 0
 93 |         model_copy = copy(self.model)
 94 |         nTrades = {"buy": 0, "sell": 0, "hold": 0}
 95 |         buy_list = []
 96 |         sell_list = []
 97 | 
 98 |         while time != model_copy.init_args["T"]:
 99 |             decision = getattr(self, policy)(time, model_copy.state, policy_info)
100 | 
101 |             # Last time period - we are going to sell energy
102 |             if time == model_copy.init_args["T"] - 1:
103 |                 decision = {"buy": 0, "hold": 0, "sell": 1}
104 | 
105 |             x = model_copy.build_decision(decision, model_copy.state.energy_amount)
106 | 
107 |             nTrades["buy"] += x.buy
108 |             nTrades["sell"] += x.sell
109 |             nTrades["hold"] += model_copy.state.energy_amount
110 |             if x.buy > 0:
111 |                 buy_list.append((time, model_copy.state.price))
112 |             elif x.sell > 0:
113 |                 sell_list.append((time, model_copy.state.price))
114 | 
115 |             # print("time={}, obj={}, state.energy_amount={}, state.price={}, x={}".format(time, model_copy.objective,model_copy.state.energy_amount, model_copy.state.price, x))
116 | 
117 |             # step the model forward one iteration
118 |             model_copy.step(time, x)
119 |             # increment time
120 |             time += 1
121 |         contribution = model_copy.objective
122 | 
123 |         print(
124 |             "Energy traded - Sell: {:.2f} - Buy: {:.2f} - Hold % : {:.2f}".format(
125 |                 nTrades["sell"],
126 |                 nTrades["buy"],
127 |                 nTrades["hold"] / model_copy.init_args["T"],
128 |             )
129 |         )
130 |         print("Sell times and prices ")
131 |         for i in range(len(sell_list)):
132 |             print(
133 |                 "t = {:.2f} and price = {:.2f}".format(sell_list[i][0], sell_list[i][1])
134 |             )
135 |         print("Buy times and prices ")
136 |         for i in range(len(buy_list)):
137 |             print(
138 |                 "t = {:.2f} and price = {:.2f}".format(buy_list[i][0], buy_list[i][1])
139 |             )
140 | 
141 |         return contribution
142 | 
143 |     def perform_grid_search(self, params, theta_values):
144 |         """
145 |         this function calculates the contribution for each theta value in a list
146 | 
147 |         :param policy_info: dict - dictionary of policies and their associated parameters
148 |         :param policy: str - the name of the chosen policy
149 |         :param stop_time: float - stop time
150 |         :param theta_values: list - list of all possible thetas to be tested
151 |         :return: list - list of contribution values corresponding to each theta
152 |         """
153 | 
154 |         tS = time.time()
155 |         contribution_values_dict = {}
156 | 
157 |         bestTheta = None
158 |         bestContribution = -np.inf
159 | 
160 |         for theta in theta_values:
161 |             # print("Starting theta {}".format(theta))
162 |             if theta[0] >= theta[1]:
163 |                 contribution_values_dict[theta] = 0
164 |             else:
165 |                 contribution = self.run_policy(
166 |                     theta, "buy_low_sell_high_policy", params["T"]
167 |                 )
168 |                 contribution_values_dict[theta] = contribution
169 |                 best_theta = max(
170 |                     contribution_values_dict, key=contribution_values_dict.get
171 |                 )
172 |                 print(
173 |                     "Finishing theta {} with contribution {:.2f}. Best theta so far {}. Best contribution {:.2f}".format(
174 |                         theta,
175 |                         contribution,
176 |                         best_theta,
177 |                         contribution_values_dict[best_theta],
178 |                     )
179 |                 )
180 | 
181 |         print("Finishing GridSearch in {:.2f} secs".format(time.time() - tS))
182 |         return contribution_values_dict
183 | 
184 |     def grid_search_theta_values(self, params):
185 |         """
186 |         this function gives a list of theta values needed to run a full grid search
187 | 
188 |         """
189 |         theta_buy_values = np.arange(
190 |             params["theta_buy_min"], params["theta_buy_max"], params["theta_inc"]
191 |         )
192 |         theta_sell_values = np.arange(
193 |             params["theta_sell_min"], params["theta_sell_max"], params["theta_inc"]
194 |         )
195 | 
196 |         theta_values = [(x, y) for x in theta_buy_values for y in theta_sell_values]
197 | 
198 |         return theta_values, theta_buy_values, theta_sell_values
199 | 
200 |     def plot_heat_map(self, contribution_dict, theta_buy_values, theta_sell_values):
201 |         """
202 |         this function plots a heat map
203 | 
204 |         :param contribution_dict:  dict of contribution values
205 |         :param theta_buy_values: list - list of theta_buy_values
206 |         :param theta_sell_values: list - list of theta_sell_values
207 |         :return: none (plots a heat map)
208 |         """
209 | 
210 |         contribution_values = [
211 |             contribution_dict[(theta_buy, theta_sell)]
212 |             for theta_sell in theta_sell_values
213 |             for theta_buy in theta_buy_values
214 |         ]
215 |         contributions = np.array(contribution_values)
216 |         increment_count = len(theta_buy_values)
217 |         contributions = np.reshape(contributions, (-1, increment_count))
218 | 
219 |         fig, ax = plt.subplots()
220 |         im = ax.imshow(contributions, cmap="hot", origin="lower", aspect="auto")
221 |         # create colorbar
222 |         cbar = ax.figure.colorbar(im, ax=ax)
223 |         # cbar.ax.set_ylabel(cbarlabel, rotation=-90, va="bottom")
224 |         # we want to show all ticks...
225 |         ax.set_xticks(np.arange(0, len(theta_buy_values), 5))
226 |         ax.set_yticks(np.arange(0, len(theta_sell_values), 5))
227 |         # ... and label them with the respective list entries
228 |         ax.set_xticklabels(theta_buy_values[::5])
229 |         ax.set_yticklabels(theta_sell_values[::5])
230 |         # rotate the tick labels and set their alignment.
231 |         # plt.setp(ax.get_xticklabels(), rotation=45, ha="right",rotation_mode="anchor")
232 |         ax.set_title("Heatmap of contribution values across different values of theta")
233 | 
234 |         ax.set_ylabel("Theta sell high values")
235 |         ax.set_xlabel("Theta buy low  values")
236 | 
237 |         # fig.tight_layout()
238 |         plt.show()
239 |         return True
240 | 


--------------------------------------------------------------------------------
/EnergyStorage_I/Parameters.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/djanka2/stochastic-optimization/8b654e10480705901a6e48e869e2e0a779e97933/EnergyStorage_I/Parameters.xlsx


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 donghun2018
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MedicalDecisionDiabetes/MedicalDecisionDiabetes.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Learning the Best Diabetes Medication\n",
  8 |     "\n",
  9 |     "Here we implement the Bayesian belief model from Chapter 4 to find the best Diabetes medication. The following inputs are needed to fully specify the statistical model:\n",
 10 |     "\n",
 11 |     "1. `S0`: For every drug, we maintain a *belief* about its A1C reduction. The beliefs are modelled as a set of normal distributions (i.e., two parameters per drug) that evolve as we make observations. The initial belief is specified with a mean and standard deviation derived from, e.g., the efficacy of each drug over an entire population (many individuals).\n",
 12 |     "2. `mu_truth`: When simulating the model, the true (but unknown) value for the A1C reduction of every drug must be simulated as well. We do that by directly passing a `scipy.stats.uniform` object, from which the model can draw samples. Caution: The two arguments of `scipy.stats.uniform`, `loc` and `scale` are not the upper and lower bound, but the lower and upper bounds will be `[loc, loc+scale]`. It is also possible to select fixed values for `mu_truth`. In this case, select a uniform distribution where the lower and upper bound are equal. \n",
 13 |     "3. `sigma_W`: This is the standard deviation of an observation. Observations are sampled from a normal distribution with mean `mu_truth` and standard deviation `sigma_W`.\n",
 14 |     "\n",
 15 |     "We first create a model where `mu_truth` is fixed, that means we have only one random process (observational uncertainty)."
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "import scipy.stats\n",
 25 |     "import numpy as np\n",
 26 |     "import plotly.express as px\n",
 27 |     "import plotly.graph_objects as go\n",
 28 |     "import MedicalDecisionDiabetesModel as mddm\n",
 29 |     "import MedicalDecisionDiabetesPolicies as mddp\n",
 30 |     "import BaseClasses.Util as util\n",
 31 |     "\n",
 32 |     "S0 = {\n",
 33 |     "    \"M\":    [0.32, 0.12],\n",
 34 |     "    \"Sens\": [0.28, 0.09],\n",
 35 |     "    \"Secr\": [0.3, 0.17],\n",
 36 |     "    \"AGI\":  [0.26, 0.15],\n",
 37 |     "    \"PA\":   [0.21, 0.11],\n",
 38 |     "}\n",
 39 |     "\n",
 40 |     "mu_truth = {\n",
 41 |     "    \"M\":    scipy.stats.uniform(loc=0.3, scale=0.0),\n",
 42 |     "    \"Sens\": scipy.stats.uniform(loc=0.2, scale=0.0),\n",
 43 |     "    \"Secr\": scipy.stats.uniform(loc=0.4, scale=0.0),\n",
 44 |     "    \"AGI\":  scipy.stats.uniform(loc=0.33, scale=0.0),\n",
 45 |     "    \"PA\":   scipy.stats.uniform(loc=0.35, scale=0.0),\n",
 46 |     "}\n",
 47 |     "\n",
 48 |     "model = mddm.MedicalDecisionDiabetesModel(S0=S0, mu_truth=mu_truth, sigma_W=0.05, T=20)"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "Next, we create an upper confidence bound policy and run it for 1000 iterations. In each iteration, we have 20 observations/trials. The objective is to maximize the total level of A1C reduction with these 20 trials. "
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {},
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "policy = mddp.UCB(model, theta=1)\n",
 65 |     "policy.run_policy(n_iterations=1000)"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "metadata": {},
 71 |    "source": [
 72 |     "We have a closer look at one sample run. The state variable for every drug are given as triples. We create one column for mean, standard deviation, and $N_x$. "
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "for drug in S0.keys():\n",
 82 |     "    policy.results[drug + \"_mu\"] = policy.results[drug].apply(lambda x: x[0])\n",
 83 |     "    policy.results[drug + \"_sigma\"] = 1.0/policy.results[drug].apply(lambda x: np.sqrt(x[1]))\n",
 84 |     "    policy.results[drug + \"_N\"] = policy.results[drug].apply(lambda x: x[2])\n",
 85 |     "    policy.results[drug + \"_chosen\"] = policy.results.groupby(\"N\")[drug + \"_N\"].diff()"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "Now we plot a random iteration of 20 trials. We plot\n",
 93 |     "- the current belief $\\mu^n_x$ for every drug\n",
 94 |     "- the current uncertainty in the belief $\\sigma^n_x$ as errorbars\n",
 95 |     "\n",
 96 |     "Only when a drug is chosen, $\\mu^n_x$ and $\\sigma^n_x$ will change. $\\sigma^n_x$ will be monotonically decreasing (we are getting more certain the more often we try a drug)."
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "metadata": {},
103 |    "outputs": [],
104 |    "source": [
105 |     "sample_paths = np.random.choice(1000, size=1, replace=False)\n",
106 |     "df = policy.results.loc[policy.results.N.isin(sample_paths), :]\n",
107 |     "\n",
108 |     "long_df = df.melt(id_vars=[\"t\",\"N\"], value_vars=[\"M_mu\", \"Sens_mu\", \"Secr_mu\", \"AGI_mu\", \"PA_mu\"], value_name=\"mu\", var_name=\"drug\")\n",
109 |     "long_df[\"sigma\"] = df[[\"M_sigma\", \"Sens_sigma\", \"Secr_sigma\", \"AGI_sigma\", \"PA_sigma\"]].unstack().values\n",
110 |     "long_df[\"chosen\"] = df[[\"M_chosen\", \"Sens_chosen\", \"Secr_chosen\", \"AGI_chosen\", \"PA_chosen\"]].unstack().values\n",
111 |     "\n",
112 |     "px.line(data_frame=long_df, x=\"t\", y=\"mu\", color=\"drug\", error_y=\"sigma\", facet_row=\"N\", hover_data=\"chosen\", markers=True)"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "metadata": {},
118 |    "source": [
119 |     "## Exercise 1\n",
120 |     "Perform a grid search for the UCB policy with values $\\theta=0.0,0.2,\\dots,2.0$ and plot the performance for each value of $\\theta$. What do you learn from this plot?"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "metadata": {},
126 |    "source": [
127 |     "---"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "metadata": {},
133 |    "source": [
134 |     "## Exercise 2\n",
135 |     "In this exercise we investigate the interval estimation policy. We are going to evaluate it for three different sets of thruths. For each of the three cases run a grid search for $\\theta=0.0,0.2,\\dots,2.0$ with 10000 iterations and plot the average performance against the value of $\\theta$.\n",
136 |     "\n",
137 |     "1. Use the values for `S0` and `mu_truth` just as given at the beginning of the notebook (you can even reuse the model object). \n",
138 |     "2. Let $\\mu_x^0$ be your initial belief about the performance of drug $x$. Use $\\mu_x^0$ as given in `S0` above but simulate the truth by taking a sample of a uniform distribution on the interval $[0.5\\mu_x^0, 1.5\\mu_x^0]$. This is an example of having a prior distribution of belief (in this case, that is normally distributed) but sampling the truth from a different distribution (that is uniformly distributed around the mean).\n",
139 |     "3. Set `S0` such that the prior is $\\mu_x^0=0.3$ for all five drugs $x$ with initial standard deviation $\\sigma_x^0=0.1$. Sample `mu_truth` for all five drugs uniformly from the interval $[0.15,0.45]$.\n",
140 |     "\n",
141 |     "What conclusions can you draw from each of the plots?"
142 |    ]
143 |   }
144 |  ],
145 |  "metadata": {
146 |   "kernelspec": {
147 |    "display_name": "sda",
148 |    "language": "python",
149 |    "name": "python3"
150 |   },
151 |   "language_info": {
152 |    "codemirror_mode": {
153 |     "name": "ipython",
154 |     "version": 3
155 |    },
156 |    "file_extension": ".py",
157 |    "mimetype": "text/x-python",
158 |    "name": "python",
159 |    "nbconvert_exporter": "python",
160 |    "pygments_lexer": "ipython3",
161 |    "version": "3.10.12"
162 |   }
163 |  },
164 |  "nbformat": 4,
165 |  "nbformat_minor": 2
166 | }
167 | 


--------------------------------------------------------------------------------
/MedicalDecisionDiabetes/MedicalDecisionDiabetesModel.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | sys.path.append("../")
 4 | from BaseClasses.SDPModel import SDPModel
 5 | 
 6 | 
 7 | class MedicalDecisionDiabetesModel(SDPModel):
 8 |     def __init__(
 9 |         self,
10 |         mu_truth: dict,
11 |         sigma_W: float,
12 |         S0: dict,
13 |         t0: float = 0,
14 |         T: float = 1,
15 |         seed: int = 42,
16 |     ) -> None:
17 |         state_names = list(S0.keys())
18 |         self.sigma_W = sigma_W
19 |         self.beta_W = 1 / self.sigma_W**2
20 |         self.mu_truth = mu_truth
21 | 
22 |         # For each drug, add the number of times the drug has been prescribed as a state
23 |         for state in S0:
24 |             if len(S0[state]) < 3:
25 |                 S0[state].append(0)
26 |             if len(S0[state]) != 3:
27 |                 print(f"Need to provide prior mu and sigma for drug {state}!")
28 |                 # TODO: proper error handling
29 |                 return
30 |             else:
31 |                 # S0 contains mu and sigma, but we model the states as mu and beta
32 |                 mu, sigma, N = S0[state]
33 |                 S0[state] = [mu, 1 / sigma**2, N]
34 | 
35 |         # Create one sample of the truth
36 |         self.mu_truth_sample = {}
37 |         for state in self.mu_truth:
38 |             self.mu_truth_sample[state] = self.mu_truth[state].rvs()
39 | 
40 |         decision_names = ["choice"]
41 | 
42 |         super().__init__(state_names, decision_names, S0, t0, T, seed)
43 | 
44 |     def reset(self, reset_prng: bool = False):
45 |         super().reset(reset_prng)
46 | 
47 |         # When the model is reset, create a new sample of the truth
48 |         for state in self.mu_truth:
49 |             self.mu_truth_sample[state] = self.mu_truth[state].rvs()
50 | 
51 |     # this function gives the exogenous information that is dependent on a random process
52 |     # In our case, exogeneous information: W^(n+1) = mu_x + eps^(n+1),
53 |     # Where eps^(n+1) is normally distributed with mean 0 and known variance (here s.d. 0.05)
54 |     # W^(n+1)_x : reduction in A1C level
55 |     # self.prng.normal takes two values, mu and sigma.
56 |     def exog_info_fn(self, decision):
57 |         x = decision.choice
58 |         W = self.prng.normal(self.mu_truth_sample[x], self.sigma_W)
59 | 
60 |         return {"reduction": W}
61 | 
62 |     # this function takes in the decision and exogenous information to return\
63 |     # the new mu and beta values corresponding to the decision.
64 |     def transition_fn(self, decision, exog_info):
65 |         # For all states except one the state values do not change.
66 |         new_state = {state: getattr(self.state, state) for state in self.state_names}
67 | 
68 |         # Update the state for the drug that was prescribed in this step
69 |         x = decision.choice
70 |         mu_x, beta_x, N_x = getattr(self.state, x)
71 |         mu_x = (beta_x * mu_x + self.beta_W * exog_info["reduction"]) / (beta_x + self.beta_W)
72 |         beta_x = beta_x + self.beta_W
73 |         N_x += 1  # count of no. times drug x was given.
74 | 
75 |         new_state[x] = [mu_x, beta_x, N_x]
76 | 
77 |         return new_state
78 | 
79 |     # contribution is W (reduction in A1C level)
80 |     def objective_fn(self, decision, exog_info):
81 |         W = exog_info["reduction"]
82 |         return W
83 | 


--------------------------------------------------------------------------------
/MedicalDecisionDiabetes/MedicalDecisionDiabetesPolicies.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | sys.path.append("../")
 4 | from BaseClasses.SDPModel import SDPModel
 5 | from BaseClasses.SDPPolicy import SDPPolicy
 6 | from math import sqrt, log
 7 | import numpy as np
 8 | 
 9 | 
10 | class UCB(SDPPolicy):
11 |     def __init__(self, model: SDPModel, policy_name: str = "UCB", theta: float = 1):
12 |         self.theta = theta
13 |         super().__init__(model, policy_name)
14 | 
15 |     def get_decision(self, state, t, T):
16 |         # this method implements the Upper Confidence Bound policy
17 |         # N.B: can't implement this at time t=0 (from t=1 at least).
18 |         # Also can't divide by zero, which means we need each drug to have been tested at least once.
19 |         #
20 |         # Note that state has a list of 3 entries, for each key(type of drug) in the dictionary
21 |         # {"drug" : [mu_empirical, beta, number of times drug given to patient]}
22 |         obj_approx = {}
23 |         for s in state._fields:
24 |             mu, beta, N = getattr(state, s)
25 |             obj_approx[s] = mu + self.theta * sqrt(log(t + 1) / (N + 1))
26 | 
27 |         optimal_decision = max(obj_approx, key=obj_approx.get)
28 | 
29 |         return {"choice": optimal_decision}
30 | 
31 | 
32 | class IE(SDPPolicy):
33 |     def __init__(self, model: SDPModel, policy_name: str = "IE", theta: float = 1):
34 |         self.theta = theta
35 |         super().__init__(model, policy_name)
36 | 
37 |     def get_decision(self, state, t, T):
38 |         obj_approx = {}
39 |         for s in state._fields:
40 |             mu, beta, N = getattr(state, s)
41 |             sigma = 1 / sqrt(beta)
42 |             obj_approx[s] = mu + self.theta * sigma
43 | 
44 |         optimal_decision = max(obj_approx, key=obj_approx.get)
45 | 
46 |         return {"choice": optimal_decision}
47 | 
48 | 
49 | class PureExploitation(SDPPolicy):
50 |     def __init__(self, model: SDPModel, policy_name: str = "PureExploitation"):
51 |         super().__init__(model, policy_name)
52 | 
53 |     def get_decision(self, state, t, T):
54 |         obj_approx = {}
55 |         for s in state._fields:
56 |             mu, beta, N = getattr(state, s)
57 |             obj_approx[s] = mu
58 | 
59 |         optimal_decision = max(obj_approx, key=obj_approx.get)
60 | 
61 |         return {"choice": optimal_decision}
62 | 
63 | 
64 | class PureExploration(SDPPolicy):
65 |     def __init__(self, model: SDPModel, policy_name: str = "PureExploration", seed: int = 42):
66 |         self.prng = np.random.RandomState(seed)
67 |         super().__init__(model, policy_name)
68 | 
69 |     def get_decision(self, state, t, T):
70 |         optimal_decision = self.prng.choice(state._fields)
71 | 
72 |         return {"choice": optimal_decision}
73 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Sequential Decision Problem Modeling Library
 2 | 
 3 | This is a refactoring and evolution of the Sequential Decision Problem Modeling Library from Castle Lab, Princeton Univ. The goal is to make the problem code more structured, easily extendable, and more readable.
 4 | 
 5 | The major changes are:
 6 | - Introduction of abstract base classes `SDPModel` and `SDPPolicy` from which all sequential decision problems and policies inherit
 7 | - Jupyter Notebook with plotly as frontend
 8 | 
 9 | Furthermore, the code was cleaned up for readability and exercises were added to some of the Notebooks. 
10 | 
11 | ## Installation
12 | 
13 | Requires Python 3 and the following packages:
14 | - numpy
15 | - scipy
16 | - pandas
17 | - plotly.express
18 | - yfinance (for AssetSelling)
19 | - osmnx (for StochasticShortestPath)
20 | - networkx (for StochasticShortestPath)
21 | 
22 | ## Included Problem Models
23 | 
24 | This is work in progress. For now, new models exist for 
25 | - `AssetSelling`
26 | - `MedicalDecisionDiabetes`
27 | - `StochasticShortestPath_static`
28 | 
29 | Further models will be added in the future. The other folders contain the models from the original repository [https://github.com/wbpowell328/stochastic-optimization].
30 | 
31 | There is an `ipynb`-file in each problem folder which is the starting point for running the models.
32 | 


--------------------------------------------------------------------------------
/StochasticShortestPath_Dynamic/Driver.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Stochastic Shortest Paths - Learning the costs
  3 | Dynamic Model - search for the parameter theta, 
  4 | which represents the percentile of the distribution 
  5 | of each cost to use to make sure we get a penalty as
  6 | small as possible. Run it using python command.
  7 | 
  8 | Author: Andrei Graur 
  9 | """
 10 | 
 11 | from collections import namedtuple
 12 | import math
 13 | from copy import copy
 14 | import numpy as np
 15 | import matplotlib.pyplot as plt
 16 | import pandas as pd
 17 | import xlrd
 18 | 
 19 | from Model import StaticModel
 20 | from GraphGenerator import GraphGenerator
 21 | from Policy import LookaheadPolicy
 22 | 
 23 | 
 24 | if __name__ == "__main__":
 25 |     file = "Parameters.xlsx"
 26 |     seed = 189654913
 27 |     METRIC = "PERCENTILE"
 28 | 
 29 |     # Reading the algorithm pars
 30 |     parDf = pd.read_excel(file, sheet_name="Parameters")
 31 |     parDict = parDf.set_index("Index").T.to_dict("list")
 32 |     params = {key: v for key, value in parDict.items() for v in value}
 33 |     params["seed"] = seed
 34 |     theta_list = params["theta_cost_set"].split()
 35 | 
 36 |     print("Parameters ", params)
 37 | 
 38 |     # Initializing the network
 39 |     G = GraphGenerator(params)
 40 |     if params["networkType"] == "Steps":
 41 |         nTries = G.createNetworkSteps()
 42 |     else:
 43 |         nTries = G.createNetworkChance()
 44 | 
 45 |     print(
 46 |         "Created network in {} tries. From origin {} to destination {}. Number of steps is {} and the average cost is {:.2f}. The deadline to define lateness will be {:.2f}".format(
 47 |             nTries,
 48 |             G.start_node,
 49 |             G.end_node,
 50 |             G.steps,
 51 |             G.get_avg_cost_paths(),
 52 |             G.get_deadline(),
 53 |         )
 54 |     )
 55 |     # input("Press Enter to continue...")
 56 | 
 57 |     # Initializing the model
 58 |     state_names = ["node"]
 59 |     init_state = {"node": G.start_node}
 60 |     decision_names = ["nextNode"]
 61 | 
 62 |     M = StaticModel(state_names, decision_names, init_state, params, G)
 63 | 
 64 |     # Initialing the lists that will hold the results
 65 |     x = []
 66 |     avgCostList = []
 67 |     avgPenaltyList = []
 68 |     avgStepsList = []
 69 | 
 70 |     # Iterating over theta
 71 |     for theta in theta_list:
 72 |         theta = float(theta)
 73 |         M.start_new_theta(theta)
 74 |         x.append(theta)
 75 | 
 76 |         cost, penalty, steps = M.runTrials(params["nIterations"], G.get_deadline())
 77 | 
 78 |         avgCostList.append(cost)
 79 |         avgPenaltyList.append(penalty)
 80 |         avgStepsList.append(steps)
 81 | 
 82 |         print(
 83 |             "Avg total cost with parameter {0} is {1:.3f}. Probability of being late is {2:.2f} and avg number of steps is {3:.2f}\n ".format(
 84 |                 theta, cost, penalty, steps
 85 |             )
 86 |         )
 87 | 
 88 |     print("ThetaCost ", x)
 89 |     print("AvgCost ", avgCostList)
 90 |     print("ProbLateness ", avgPenaltyList)
 91 |     print("AvgSteps ", avgStepsList)
 92 | 
 93 |     # Ploting the results
 94 |     fig1, axsubs = plt.subplots(1, 2)
 95 |     fig1.suptitle(
 96 |         "Comparison of theta^cost -  origin {}, destination {}, dist {} - deadline {} and number of iterations {}".format(
 97 |             M.G.start_node,
 98 |             M.G.end_node,
 99 |             M.G.steps,
100 |             G.get_deadline(),
101 |             params["nIterations"],
102 |         )
103 |     )
104 | 
105 |     axsubs[0].plot(x, avgCostList)
106 |     axsubs[0].set_title("Average Cost")
107 |     axsubs[0].set_xlabel("Percentile")
108 |     axsubs[0].set_ylabel("$")
109 | 
110 |     axsubs[1].plot(x, avgPenaltyList)
111 |     axsubs[1].set_title("Probability of being late (Risk) ")
112 |     axsubs[1].set_xlabel("Percentile")
113 |     axsubs[1].set_ylabel("%")
114 | 
115 |     plt.show()
116 | 
117 |     pass
118 | 


--------------------------------------------------------------------------------
/StochasticShortestPath_Dynamic/GraphGenerator.py:
--------------------------------------------------------------------------------
  1 | """ 
  2 | This program generates a graph with 35 vertices that will be used for 
  3 | comparing the dynamic and static lookahead approaches on it. 
  4 | 
  5 | Run without any arguements. 
  6 | 
  7 | Author: Andrei Graur 
  8 | 
  9 | """
 10 | 
 11 | import numpy as np
 12 | import networkx as nx
 13 | import pandas as pd
 14 | import math
 15 | from collections import namedtuple, defaultdict
 16 | 
 17 | 
 18 | class GraphGenerator:
 19 |     """
 20 |     Base class for the static model
 21 |     """
 22 | 
 23 |     def __init__(self, params):
 24 |         self.init_args = params
 25 |         self.prng = np.random.RandomState(params["seed"])
 26 |         self.meanCosts = defaultdict(dict)
 27 |         self.dist = defaultdict(dict)
 28 |         self.spreads = defaultdict(dict)
 29 |         self.neighbors = defaultdict(list)
 30 |         self.vertices = []
 31 | 
 32 |         # The start and end node will change based on the network graph that is going to be constructed -
 33 |         # we are going to select the pair with the longest shortest path
 34 |         self.start_node = 0
 35 |         self.end_node = 0
 36 |         self.steps = 0
 37 |         self.vertexCount = 1
 38 |         self.Horizon = self.vertexCount + 1
 39 |         self.mPathsList = []
 40 |         self.nPaths = 0
 41 | 
 42 |     def createNetworkSteps(self):
 43 |         filename = "Network_Steps.xlsx"
 44 |         nSteps = self.init_args["nSteps"]
 45 | 
 46 |         G = nx.DiGraph()
 47 | 
 48 |         nodeCount = 0
 49 |         nodesPerLevel = defaultdict(list)
 50 |         midGraph = math.ceil(nSteps / 2)
 51 | 
 52 |         for level in range(nSteps):
 53 |             if level < midGraph:
 54 |                 nNodes = level * 2 + 1
 55 |             else:
 56 |                 nNodes = (nSteps - level - 1) * 2 + 1
 57 | 
 58 |             for i in range(nNodes):
 59 |                 nodesPerLevel[level].append(nodeCount)
 60 |                 nodeCount += 1
 61 | 
 62 |         for level in range(nSteps - 1):
 63 |             for i in nodesPerLevel[level]:
 64 |                 G.add_node(i)
 65 |                 edge_set = list(
 66 |                     self.prng.choice(
 67 |                         nodesPerLevel[level + 1],
 68 |                         min(3, len(nodesPerLevel[level + 1])),
 69 |                         replace=False,
 70 |                     )
 71 |                 )
 72 |                 for j in edge_set:
 73 |                     meanWeight = 1
 74 |                     G.add_edge(i, j, weight=meanWeight)
 75 | 
 76 |         self.construct_network_objects(G, filename, 0, nodeCount - 1)
 77 | 
 78 |         return 1
 79 | 
 80 |     def get_deadline(self):
 81 |         return (
 82 |             self.init_args["costMin"]
 83 |             + (self.init_args["costMax"] - self.init_args["costMin"])
 84 |             * self.init_args["deadlinePerc"]
 85 |         ) * (self.steps)
 86 | 
 87 |     def get_avg_cost_paths(self, shouldPrintPaths=False):
 88 |         # Printing the length and the costs of all paths
 89 |         totalCostList = []
 90 |         if shouldPrintPaths:
 91 |             print(
 92 |                 "*************Printing the length and the costs of all paths************"
 93 |             )
 94 |         p = 0
 95 |         for path in self.mPathsList:
 96 |             nSteps = len(path)
 97 |             totalCost = 0
 98 |             p += 1
 99 |             pathString = "Path {}:  ".format(p)
100 |             for n in range(nSteps - 1):
101 |                 fromNode = path[n]
102 |                 toNode = path[n + 1]
103 |                 totalCost += self.meanCosts[fromNode][toNode]
104 |                 # edge = " ({}, {}, {:.2f}, {:.2f}) ".format(fromNode,toNode,self.meanCosts[fromNode][toNode],totalCost)
105 |                 # pathString += edge
106 |             pathString += " - {} steps and {:.2f} total mean cost".format(
107 |                 nSteps, totalCost
108 |             )
109 |             totalCostList.append(totalCost)
110 |             if shouldPrintPaths:
111 |                 print(pathString)
112 |             avgTotalCost = np.array(totalCostList).mean()
113 |         return avgTotalCost
114 | 
115 |     def construct_network_objects(self, G, filename, start_node, end_node):
116 |         size = G.number_of_nodes()
117 |         recordList = []
118 |         for fromNode in range(size):
119 |             self.vertices.append(fromNode)
120 | 
121 |             for toNode in G.neighbors(fromNode):
122 |                 self.neighbors[fromNode].append(toNode)
123 |                 self.meanCosts[fromNode][toNode] = self.prng.uniform(
124 |                     self.init_args["costMin"], self.init_args["costMax"]
125 |                 )
126 |                 self.spreads[fromNode][toNode] = self.prng.uniform(
127 |                     0, self.init_args["maxSpreadPerc"]
128 |                 )
129 |                 self.dist[fromNode][toNode] = 1
130 | 
131 |                 record = (
132 |                     fromNode,
133 |                     toNode,
134 |                     self.meanCosts[fromNode][toNode],
135 |                     self.spreads[fromNode][toNode],
136 |                     size,
137 |                 )
138 |                 recordList.append(record)
139 | 
140 |         if self.init_args["printGraph"]:
141 |             headerDf = ["From", "To", "Cost", "Spread", "Graph_size"]
142 |             df = pd.DataFrame.from_records(recordList, columns=headerDf)
143 |             df.to_excel(filename, sheet_name="Network", index=False)
144 | 
145 |         self.start_node = start_node
146 |         self.end_node = end_node
147 |         self.steps = nx.shortest_path_length(G, start_node, end_node)
148 |         self.vertexCount = size
149 |         self.Horizon = self.vertexCount + 1
150 | 
151 |         # We need to add the dummy link of cost 0 to the destination node
152 |         r = self.end_node
153 |         self.spreads[r][r] = 0
154 |         self.neighbors[r].append(r)
155 |         self.meanCosts[r][r] = 0
156 |         self.dist[r][r] = 0
157 | 
158 |         self.mPathsList = list(
159 |             nx.all_simple_paths(G, source=self.start_node, target=self.end_node)
160 |         )
161 |         self.nPaths = len(self.mPathsList)
162 | 
163 |     def createNetworkChance(self):
164 |         filename = "Network_Chance.xlsx"
165 |         chance = self.init_args["edgeProb"]
166 |         size = self.init_args["nNodes"]
167 | 
168 |         G = nx.DiGraph()
169 |         nbIterations = 0
170 |         done = 0
171 | 
172 |         while done == 0:
173 |             for i in range(size):
174 |                 G.add_node(i)
175 | 
176 |             for i in range(size):
177 |                 for j in range(size):
178 |                     if self.prng.uniform() < chance:
179 |                         if i != j:
180 |                             meanWeight = 1
181 |                             G.add_edge(i, j, weight=meanWeight)
182 | 
183 |             maxLength = 0
184 |             mSource = None
185 |             mDest = None
186 |             mPaths = 0
187 |             mPathsList = []
188 | 
189 |             breakLoop = False
190 | 
191 |             for i in range(size):
192 |                 for j in range(size):
193 |                     if nx.has_path(G, i, j):
194 |                         length = nx.shortest_path_length(G, i, j)
195 |                         if length >= maxLength:
196 |                             paths = list(nx.all_simple_paths(G, source=i, target=j))
197 |                             nPaths = len(paths)
198 | 
199 |                             if nPaths > mPaths:
200 |                                 maxLength = length
201 |                                 mSource = i
202 |                                 mDest = j
203 |                                 mPaths = nPaths
204 |                                 mPathsList = paths
205 | 
206 |                             if length > self.init_args["lengthThreshold"]:
207 |                                 breakLoop = True
208 |                                 break
209 |                     else:
210 |                         pass
211 |                 if breakLoop:
212 |                     break
213 | 
214 |             print(
215 |                 "Iteration {}, Source {}, Dest {}, Length {}, number of paths {}".format(
216 |                     nbIterations, mSource, mDest, maxLength, mPaths
217 |                 )
218 |             )
219 | 
220 |             if (
221 |                 maxLength > self.init_args["lengthThreshold"]
222 |                 and mPaths > self.init_args["numberPathsThreshold"]
223 |             ):
224 |                 # the graph is good and we will use it and stop the loop
225 |                 done = 1
226 |                 self.construct_network_objects(G, filename, mSource, mDest)
227 |             else:
228 |                 G.clear()
229 |                 nbIterations += 1
230 | 
231 |         return nbIterations + 1
232 | 


--------------------------------------------------------------------------------
/StochasticShortestPath_Dynamic/Model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Stochastic Shortest Paths - Dynamic
  3 | Static Model
  4 | 
  5 | The code implementing the basic model for the Static 
  6 | Version. This implements the class, do not try to run 
  7 | this code. Run the DynamicModel_main instead. 
  8 | 
  9 | Author: Andrei Graur 
 10 | 
 11 | """
 12 | from collections import namedtuple, defaultdict
 13 | 
 14 | import math
 15 | import numpy as np
 16 | import pandas as pd
 17 | import xlrd
 18 | 
 19 | from Policy import LookaheadPolicy
 20 | 
 21 | 
 22 | class StaticModel:
 23 |     """
 24 |     Base class for the static model
 25 |     """
 26 | 
 27 |     def __init__(self, state_names, x_names, s_0, params, G):
 28 |         """
 29 |         Initializes the model
 30 | 
 31 |         :param state_names: list(str) - state variable dimension names
 32 |         :param x_names: list(str) - decision variable dimension names
 33 |         :param s_0: dict - contains the inital state information
 34 |         :param s_0[meanCosts]: dict- meanCosts[k][l] is the mean of the cost on the link k-l
 35 |         :param s_0[spreads]: dict - spreads[k][l] represents the spread of the distribution of
 36 |         cost on link k-l
 37 |         :param Horizon: int - the horizon over which we are looking ahead
 38 |         :param vertexCount - the number of nodes in our network
 39 |         :param seed: int - seed for random number generator
 40 |         """
 41 | 
 42 |         self.init_args = params
 43 | 
 44 |         self.init_state = s_0
 45 |         self.state_names = state_names
 46 |         self.State = namedtuple("State", state_names)
 47 |         self.state = self.build_state(self.init_state)
 48 | 
 49 |         self.x_names = x_names
 50 |         self.Decision = namedtuple("Decision", x_names)
 51 | 
 52 |         self.G = G
 53 | 
 54 |         self.theta = 0.5
 55 |         self.n = 0
 56 |         self.time = 1
 57 |         self.obs = 1
 58 |         self.estimated_costs = defaultdict(dict)
 59 |         self.prng = np.random.RandomState(params["seed"])
 60 | 
 61 |     def start_new_theta(self, theta):
 62 |         self.theta = theta
 63 |         self.estimated_costs = defaultdict(dict)
 64 |         self.n = 0
 65 |         self.obs = 1
 66 |         self.prng = np.random.RandomState(self.init_args["seed"])
 67 |         print("*****************Reseting model for theta {:.2f}".format(self.theta))
 68 | 
 69 |     def update_estimated_costs(self):
 70 |         for k in range(self.G.vertexCount):
 71 |             for l in self.G.neighbors[k]:
 72 |                 m_hat = self.sample_from_uniform(k, l)
 73 |                 alpha = self.get_step_size()
 74 |                 if alpha < 1:
 75 |                     self.estimated_costs[k][l] = (1 - alpha) * self.estimated_costs[k][
 76 |                         l
 77 |                     ] + alpha * m_hat
 78 |                 else:
 79 |                     self.estimated_costs[k][l] = m_hat
 80 | 
 81 |         self.estimated_costs[self.G.end_node][self.G.end_node] = 0
 82 | 
 83 |     def sample_from_uniform(self, fromNode, toNode):
 84 |         spread = self.G.spreads[fromNode][toNode]
 85 |         deviation = (
 86 |             self.prng.uniform(-spread, spread) * self.G.meanCosts[fromNode][toNode]
 87 |         )
 88 |         m_hat = self.G.meanCosts[fromNode][toNode] + deviation
 89 |         return m_hat
 90 | 
 91 |     def get_step_size(self):
 92 |         # alpha = 1/self.n
 93 |         # alpha = 1./self.time
 94 |         alpha = 1.0 / self.obs
 95 |         return alpha
 96 | 
 97 |     def build_state(self, info):
 98 |         return self.State(*[info[k] for k in self.state_names])
 99 | 
100 |     def build_decision(self, info):
101 |         return self.Decision(*[info[k] for k in self.x_names])
102 | 
103 |     # exog_info_fn: function - returns the real experienced cost of traversing a link
104 |     # from 'fromNode' to 'toNode'
105 |     def exog_info_fn(self, fromNode, toNode):
106 |         cost_hat = self.sample_from_uniform(fromNode, toNode)
107 |         return cost_hat
108 | 
109 |     # transition_fn: function - updates the state within the model and returns new state
110 |     def transition_fn(self, decision):
111 |         self.state = self.build_state({"node": decision})
112 |         self.time += 1
113 |         self.obs += 1
114 | 
115 |     # :param objective_fn: function - returns the cost we would experience by taking 'decision'
116 |     # as our next node from the current state 'state'
117 |     def objective_fn(self, decision):
118 |         cost = self.exog_info_fn(self.state.node, decision)
119 |         return cost
120 | 
121 |     """
122 | 	the function for running trials; it simulates solving the problem a bunch of 
123 | 	times (nbTrials times), then takes the squared mean of the costs incurred, 
124 | 	and then returns that mean value
125 | 	"""
126 | 
127 |     def runTrials(self, nbTrials, deadline):
128 |         # variables to store values along iterations
129 |         totalPenalty = 0.0
130 |         totalCost = 0.0
131 |         totalTime = 0.0
132 | 
133 |         for i in range(nbTrials):
134 |             self.state = self.build_state(self.init_state)
135 |             self.time = 1
136 |             self.n += 1
137 |             cost = 0.0
138 |             # print("Theta {:.2f} Iteration {}".format(self.theta,self.n))
139 | 
140 |             # Following a path  - the policy function is a lookahead
141 |             while self.state.node != self.G.end_node:
142 |                 self.update_estimated_costs()
143 |                 P = LookaheadPolicy(self)
144 |                 decision = P.get_decision("PERCENTILE")
145 |                 # self.build_decision({'nextNode':decision})
146 |                 stepCost = self.objective_fn(decision)
147 |                 cost += stepCost
148 |                 # print("\t Theta {:.2f}, Iteration {}, Time {}, CurrNode {}, Decision {}, Step Cost {:.2f} Cum Cost {:.2f}".format(self.theta,self.n,self.time,self.state.node,decision,stepCost,cost))
149 |                 self.transition_fn(decision)
150 | 
151 |             # end of path calculations
152 |             totalCost += cost
153 |             if cost > deadline:
154 |                 # latenessSquared = (cost - deadline) ** 2
155 |                 latenessSquared = 1
156 |                 totalPenalty += latenessSquared
157 |             else:
158 |                 latenessSquared = 0
159 |             totalTime += self.time - 1
160 |             print(
161 |                 "End of Theta {:.2f}, Iteration {}. Cost: {:.2f}, Lateness: {:.2f} and number of steps {}".format(
162 |                     self.theta, self.n, cost, math.sqrt(latenessSquared), self.time - 1
163 |                 )
164 |             )
165 | 
166 |         # end of trials
167 |         avgCost = totalCost / nbTrials
168 |         avgPenalty = totalPenalty / nbTrials
169 |         avgTime = totalTime / nbTrials
170 | 
171 |         return avgCost, avgPenalty, avgTime
172 | 


--------------------------------------------------------------------------------
/StochasticShortestPath_Dynamic/Network_Steps.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/djanka2/stochastic-optimization/8b654e10480705901a6e48e869e2e0a779e97933/StochasticShortestPath_Dynamic/Network_Steps.xlsx


--------------------------------------------------------------------------------
/StochasticShortestPath_Dynamic/Parameters.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/djanka2/stochastic-optimization/8b654e10480705901a6e48e869e2e0a779e97933/StochasticShortestPath_Dynamic/Parameters.xlsx


--------------------------------------------------------------------------------
/StochasticShortestPath_Dynamic/Policy.py:
--------------------------------------------------------------------------------
 1 | """ 
 2 | 
 3 | The code for the lookahead policy we use in our 
 4 | Static Model
 5 | 
 6 | """
 7 | 
 8 | import numpy as np
 9 | 
10 | 
11 | # the lookahead policy
12 | class LookaheadPolicy:
13 |     def __init__(self, model):
14 |         self.model = model
15 | 
16 |     # function returning the decision x_t from the current state
17 |     # and current time. The argument decisions is given to
18 |     # use a local variable rather than for getting outside information
19 |     def get_decision(self, METRIC):
20 |         # the matrix with decisions to be made for each node and each time
21 |         decisions = [
22 |             ([0] * self.model.G.vertexCount) for row in range(self.model.G.Horizon + 1)
23 |         ]
24 | 
25 |         # initialize the value costs at different nodes at different times to infinity
26 |         V = np.ones((self.model.G.Horizon + 1, self.model.G.vertexCount)) * np.inf
27 |         # make the costs at the destination 0
28 |         for t_prime in range(self.model.G.Horizon + 1):
29 |             V[t_prime][self.model.G.end_node] = 0
30 | 
31 |         # the algortihm that uses the "stepping backwards in time" method
32 |         lookAheadTime = self.model.G.Horizon - 1
33 |         while lookAheadTime >= 0:
34 |             for k in range(self.model.G.vertexCount):
35 |                 # find the solutions to Bellman's eq. that are shown
36 |                 # in 5.22 and 5.23
37 |                 argMin = -1
38 |                 minVal = np.inf
39 |                 for l in self.model.G.neighbors[k]:
40 |                     if METRIC == "PERCENTILE":
41 |                         spread = self.model.G.spreads[k][l]
42 |                         mean = self.model.estimated_costs[k][l]
43 |                         if minVal >= V[lookAheadTime + 1][l] + self.use_percentile_val(
44 |                             self.model.theta, spread, mean
45 |                         ):
46 |                             argMin = l
47 |                             minVal = V[lookAheadTime + 1][l] + self.use_percentile_val(
48 |                                 self.model.theta, spread, mean
49 |                             )
50 |                     else:
51 |                         if minVal >= V[lookAheadTime + 1][l] + dist[k][l]:
52 |                             argMin = l
53 |                             minVal = V[lookAheadTime + 1][l] + dist[k][l]
54 | 
55 |                 # updating the solutions to the equations
56 |                 V[lookAheadTime][k] = minVal
57 |                 decisions[lookAheadTime][k] = argMin
58 |             lookAheadTime -= 1
59 | 
60 |         return decisions[0][self.model.state.node]
61 | 
62 |     """
63 | 	the function that takes as arguments the percentile we are going to
64 | 	use, theta (espressed as a value in [0,1]), the spread for a link and
65 | 	the mean cost of that link and returns the value corresponding to
66 | 	the theta precentile of the interval [(1 - spread) * mean, (1 + spread) * mean]
67 | 	"""
68 | 
69 |     def use_percentile_val(self, theta, spread, mean):
70 |         point_val = 1 - spread + (2 * spread) * theta
71 |         used_cost = mean * point_val
72 |         return used_cost
73 | 


--------------------------------------------------------------------------------
/StochasticShortestPath_Static/SSPStaticModel.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | sys.path.append("../")
  4 | from BaseClasses.SDPModel import SDPModel
  5 | import networkx as nx
  6 | import numpy as np
  7 | from collections import namedtuple
  8 | 
  9 | 
 10 | class SSPStatic(SDPModel):
 11 |     def __init__(
 12 |         self,
 13 |         seed: int = 42,
 14 |         G: nx.Graph = None,
 15 |         origin: int = None,
 16 |         destination: int = None,
 17 |         edge_weight: str = "travel_time",
 18 |         lower_bound: float = 0.8,
 19 |         upper_bound: float = 2.0,
 20 |         T: int = 300,
 21 |     ) -> None:
 22 | 
 23 |         # Weighted digraph
 24 |         self.G = G
 25 |         self.origin = origin
 26 |         self.path = [self.origin]
 27 |         self.actual_costs = []
 28 |         self.destination = destination
 29 |         self.edge_weight = edge_weight
 30 | 
 31 |         # Edge weight will follow a triangular distribution
 32 |         self.lower_bound = lower_bound
 33 |         self.upper_bound = upper_bound
 34 | 
 35 |         state_names = ["CurrentNode", "CurrentNodeLinkCosts"]
 36 |         decision_names = ["NextNode"]
 37 |         t0 = 0
 38 | 
 39 |         # Create random sample out of origin node
 40 |         self.prng = np.random.RandomState(seed)
 41 |         S0 = self.sample_initial_state()
 42 | 
 43 |         super().__init__(state_names, decision_names, S0, t0, T, seed)
 44 | 
 45 |         self.reset_VFA()
 46 | 
 47 |         # TODO: catch if meaningless parameters for graph are passed
 48 | 
 49 |     def sample_initial_state(self):
 50 |         # Create random link costs for origin node
 51 |         Decision = namedtuple("Decision", "NextNode")
 52 |         start_decision = Decision(self.origin)
 53 |         S0 = self.exog_info_fn(decision=start_decision)
 54 |         S0["CurrentNode"] = self.origin
 55 | 
 56 |         return S0
 57 | 
 58 |     def calc_path_length(self, list_of_nodes):
 59 |         path_length = 0.0
 60 |         current_node = list_of_nodes[0]
 61 |         for i in range(1, len(list_of_nodes)):
 62 |             path_length += self.G.edges[(current_node, list_of_nodes[i], 0)][self.edge_weight]
 63 |             current_node = list_of_nodes[i]
 64 |         return path_length
 65 | 
 66 |     def exog_info_fn(self, decision):
 67 |         cost_dict = {}
 68 |         i = decision.NextNode
 69 |         for edge in self.G.out_edges(i):
 70 | 
 71 |             # This would be a more realistic stochastic model:
 72 |             # Travel time is random, but proportional to nominal travel time of the edge.
 73 |             # edge_data = self.G.edges[edge + (0,)]
 74 |             # left = edge_data[self.edge_weight] * self.lower_bound
 75 |             # right = edge_data[self.edge_weight] * self.upper_bound
 76 |             # mode = edge_data[self.edge_weight]
 77 |             # if np.abs(left - right) < 1e-4:
 78 |             #    cost_dict[edge] = mode
 79 | 
 80 |             # Just choose a random number between 0 and 20s according to a triangular distribution
 81 |             left = 0
 82 |             right = 10
 83 |             mode = 5
 84 | 
 85 |             cost_dict[edge] = self.prng.triangular(left=left, mode=mode, right=right)
 86 | 
 87 |         return {"CurrentNodeLinkCosts": cost_dict}
 88 | 
 89 |     def reset_VFA(self):
 90 |         # Initialize VFA with deterministic shortest paths from all nodes to target node
 91 |         shortest_path = nx.shortest_path(self.G, target=self.destination, weight=self.edge_weight)
 92 |         self.V_t = {}
 93 |         for node in self.G.nodes:
 94 |             if node in shortest_path:
 95 |                 self.V_t[node] = self.calc_path_length(shortest_path[node])
 96 |             else:
 97 |                 self.V_t[node] = np.inf
 98 | 
 99 |     def reset(self, reset_prng: bool = False):
100 |         # Note: VFA is not reset
101 |         # Sample a new initial state on reset (random costs from starting node)
102 |         S0 = self.sample_initial_state()
103 |         self.initial_state = self.build_state(S0)
104 |         super().reset(reset_prng)
105 |         self.path = [self.origin]
106 |         self.actual_costs = []
107 | 
108 |     def is_finished(self):
109 |         """
110 |         Check if the model run (episode) is finished.
111 |         This is the case when we reached the destination or the maximum number of nodes have been visited.
112 | 
113 |         Returns:
114 |             bool: True if the run is finished, False otherwise.
115 |         """
116 |         if self.t == self.T:
117 |             self.objective = np.nan
118 | 
119 |         return self.state.CurrentNode == self.destination or self.t >= self.T
120 | 
121 |     def transition_fn(self, decision, exog_info: dict):
122 |         return {"CurrentNode": decision.NextNode}
123 | 
124 |     def objective_fn(self, decision, exog_info: dict):
125 |         return self.state.CurrentNodeLinkCosts[(self.state.CurrentNode, decision.NextNode)]
126 | 
127 |     def update_VFA(self, vhat, alpha):
128 |         self.V_t[self.state.CurrentNode] = (1 - alpha) * self.V_t[self.state.CurrentNode] + alpha * vhat
129 |         return self.V_t[self.state.CurrentNode]
130 | 


--------------------------------------------------------------------------------
/StochasticShortestPath_Static/SSPStaticPolicy.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | sys.path.append("../")
 4 | from BaseClasses import SDPModel
 5 | from BaseClasses.SDPModel import SDPModel
 6 | from BaseClasses.SDPPolicy import SDPPolicy
 7 | 
 8 | 
 9 | class SSPStaticPolicy(SDPPolicy):
10 |     def __init__(self, model: SDPModel, policy_name: str = "", theta_step: float = 1.0):
11 |         self.theta_step = theta_step
12 |         super().__init__(model, policy_name)
13 | 
14 |     def get_decision(self, state, t, T):
15 |         i = state.CurrentNode
16 |         costs = {
17 |             j: state.CurrentNodeLinkCosts[(i, j)] + self.model.V_t[j] for j in self.model.G.successors(i)
18 |         }
19 | 
20 |         next_node = min(costs, key=costs.get)
21 | 
22 |         return {"NextNode": next_node}
23 | 
24 |     def train_value_function_paths(self, n_iterations: int = 1):
25 | 
26 |         V_t_origin = []
27 |         for i in range(n_iterations):
28 |             self.model.reset()
29 | 
30 |             V_t_origin.append(self.model.V_t[self.model.origin])
31 | 
32 |             # Create one path with the current value function approximation
33 |             self.run_policy(1)
34 | 
35 |             # Read sampled costs from path
36 |             k = len(self.model.path) - 1
37 |             vhats = {self.model.path[k]: 0.0}
38 |             while k > 0:
39 |                 k -= 1
40 |                 vhats[self.model.path[k]] = self.model.actual_costs[k] + vhats[self.model.path[k + 1]]
41 | 
42 |             # Update value function approximations for nodes on the path
43 |             alpha = self.theta_step / (self.theta_step + i)
44 |             for node in vhats.keys():
45 |                 self.model.V_t[node] = (1 - alpha) * self.model.V_t[node] + alpha * vhats[node]
46 | 
47 |     def train_value_function(self, n_iterations: int = 1):
48 | 
49 |         V_t_origin = []
50 |         for i in range(n_iterations):
51 |             alpha = self.theta_step / (self.theta_step + i)
52 |             self.model.reset()
53 | 
54 |             V_t_origin.append(self.model.V_t[self.model.origin])
55 | 
56 |             while self.model.is_finished() is False:
57 |                 state_t = self.model.state
58 |                 decision_t = self.model.build_decision(self.get_decision(state_t, self.model.t, self.model.T))
59 | 
60 |                 actual_costs = state_t.CurrentNodeLinkCosts[(state_t.CurrentNode, decision_t.NextNode)]
61 |                 vhat = actual_costs + self.model.V_t[decision_t.NextNode]
62 | 
63 |                 self.model.update_VFA(vhat, alpha)
64 |                 self.model.step(decision_t)
65 | 
66 |         return V_t_origin
67 | 


--------------------------------------------------------------------------------
/TwoNewsvendor/Parameters.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/djanka2/stochastic-optimization/8b654e10480705901a6e48e869e2e0a779e97933/TwoNewsvendor/Parameters.xlsx


--------------------------------------------------------------------------------
/TwoNewsvendor/TwoNewsvendor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 
  3 | This program implements the basic model for the two newsvendor problem. 
  4 | This code does not belong to the driverscript 
  5 | 
  6 | 
  7 | """
  8 | from collections import namedtuple
  9 | 
 10 | import numpy as np
 11 | import pandas as pd
 12 | import math
 13 | import xlrd
 14 | 
 15 | 
 16 | class Exogenous_Information():
 17 | 
 18 |     def __init__(self, params):
 19 |         self.init_args = params
 20 |         self.prng = np.random.RandomState(params['seed'])
 21 |         self.n=0
 22 |         self.demand = None
 23 |         self.estimate_field = None
 24 |         self.estimate_central = None
 25 | 
 26 |     
 27 | 
 28 |     def generate_New_Round(self):
 29 |         self.n = self.n + 1
 30 |         self.demand = int(round(self.prng.uniform(self.init_args['dem_lower_bound'], self.init_args['dem_upper_bound'])))
 31 | 
 32 |         self.estimate_field = max(0,int(round(self.demand + self.prng.normal(loc = self.init_args['est_bias_field'], scale = self.init_args['est_std_field']))))
 33 |         self.estimate_central = max(0,int(round(self.demand + self.prng.normal(loc = self.init_args['est_bias_central'], scale = self.init_args['est_std_central']))))
 34 | 
 35 | 
 36 |     def get_Estimate_Field(self):
 37 |         return self.estimate_field
 38 | 
 39 |     def get_Estimate_Central(self):
 40 |         return self.estimate_central
 41 | 
 42 |     def get_Demand(self):
 43 |         return self.demand
 44 | 
 45 |     def get_Round_Number(self):
 46 |         return self.n
 47 | 
 48 | 
 49 | 
 50 |          
 51 | 
 52 | 
 53 | 
 54 | class Model_Field():
 55 |     """
 56 |     Base class for model
 57 |     """
 58 | 
 59 |     def __init__(self, state_names, x_names, s_0, params):
 60 |         """
 61 |         Initializes the model
 62 | 
 63 |         :param state_names: list(str) - state variable dimension names
 64 |         :param x_names: list(str) - decision variable dimension names
 65 |         :param s_0: dict - contains the information needed to populate the state names 
 66 |         with the initial state values 
 67 |         :params params: other initial information such as unit costs 
 68 |         for overage or underage and the smoothing constants 
 69 |         """
 70 | 
 71 |         self.init_args = params
 72 |         self.prng = np.random.RandomState(params['seed'])
 73 |         self.init_state = s_0
 74 |         self.state_names = state_names
 75 |         self.x_names = x_names
 76 |         self.State = namedtuple('State', state_names)
 77 |         self.Decision = namedtuple('Decision', x_names)
 78 |         self.pen_incurred = 0
 79 |         
 80 |         self.state = self.build_state(self.init_state)
 81 |         self.decision = None
 82 |         self.n=0
 83 |         self.exog_info = {}
 84 | 
 85 | 
 86 | 
 87 |     def resetModel(self,theta):
 88 |         
 89 |         self.state = self.build_state(self.init_state)
 90 |         self.decision = None
 91 |         self.n=0
 92 |         self.exog_info = {}
 93 |     
 94 |         
 95 | 
 96 |     def build_state(self, info):
 97 |         return self.State(*[info[k] for k in self.state_names])
 98 | 
 99 |     def build_decision(self, info):
100 |         self.decision = self.Decision(*[info[k] for k in self.x_names])
101 |         return self.decision
102 | 
103 |     def exog_info_fn(self, decision_central, demand):
104 |         exog_info = []
105 |         exog_info.append(decision_central) 
106 |         exog_info.append(demand) 
107 |         return exog_info
108 | 
109 |     def get_alpha_bias(self):
110 |         return self.init_args['alpha_bias']
111 | 
112 |     def updateState(self,estimate):
113 |         state_dict = self.state._asdict()
114 |         state_dict['estimate']=estimate
115 |         self.state = self.build_state(state_dict)
116 |         
117 | 
118 | 
119 | 
120 | 
121 |     def transition_fn(self, exog_info):
122 | 
123 |         self.n +=1
124 | 
125 |         state_dict = self.state._asdict()
126 |         
127 |         source_bias = self.state.estimate - exog_info['demand']
128 |         central_bias = exog_info['allocated_quantity'] - self.decision.quantity_requested
129 | 
130 |         for state_desc in ['central_bias','source_bias']:
131 |             state_dict[state_desc] =  (1 - self.get_alpha_bias()) *  state_dict[state_desc] +  self.get_alpha_bias() * eval(state_desc)
132 | 
133 |         self.state = self.build_state(state_dict)
134 |         
135 | 
136 |     def objective_fn(self, exog_info):
137 |         allocated = exog_info['allocated_quantity']
138 |         demand = exog_info['demand']
139 |         self.pen_incurred = (self.init_args['o_field'] * max(allocated - demand, 0) + 
140 |                   self.init_args['u_field'] * max(demand - allocated, 0))
141 |         return -self.pen_incurred
142 | 
143 |     def showState(self,state_desc):
144 |         return getattr(self.state,state_desc)
145 | 
146 | 
147 | class Model_Central():
148 |     """
149 |     Base class for model
150 |     """
151 | 
152 |     def __init__(self, state_names, x_names, s_0, params):
153 |         """
154 |         Initializes the model
155 | 
156 |         :param state_names: list(str) - state variable dimension names
157 |         :param x_names: list(str) - decision variable dimension names
158 |         :param s_0: dict - contains the information needed to populate the state names 
159 |         with the initial state values and other initial information such as unit costs 
160 |         for overage or underage and the smoothing constants 
161 |         :param seed: int - seed for random number generator
162 |         """
163 | 
164 |         self.init_args = params
165 |         self.prng = np.random.RandomState(self.init_args['seed'])
166 |         self.init_state = s_0
167 |         self.state_names = state_names
168 |         self.x_names = x_names
169 |         self.State = namedtuple('State', state_names)
170 |         self.Decision = namedtuple('Decision', x_names)
171 |         self.pen_incurred =0
172 | 
173 |         self.state = self.build_state(self.init_state)
174 |         self.decision = None
175 |         self.n=0
176 |         self.beta_field = 0
177 |         self.beta_source = 0
178 |         self.delta_field = 0
179 |         self.delta_source = 0
180 |         self.lambda_field = 0
181 |         self.lambda_source = 0
182 | 
183 |     def resetModel(self,theta):
184 |        
185 |         self.state = self.build_state(self.init_state)
186 |         self.decision = None
187 |         self.n=0
188 |         self.beta_field = 0
189 |         self.beta_source = 0
190 |         self.delta_field = 0
191 |         self.delta_source = 0
192 |         self.lambda_field = 0
193 |         self.lambda_source = 0
194 | 
195 |         
196 | 
197 |     def build_state(self, info):
198 |         return self.State(*[info[k] for k in self.state_names])
199 | 
200 |     def build_decision(self, info):
201 |         self.decision = self.Decision(*[info[k] for k in self.x_names])
202 |         return self.decision
203 | 
204 |     def exog_info_fn(self, req_quantity, demand):
205 |         return demand
206 | 
207 |     def updateState(self,field_request,estimate):
208 |         state_dict = self.state._asdict()
209 |         state_dict['field_request']=field_request
210 |         state_dict['estimate']=estimate
211 |         self.state = self.build_state(state_dict)
212 | 
213 |     def get_alpha_bias(self):
214 |         return self.init_args['alpha_bias']
215 | 
216 |     def get_alpha_learning(self):
217 |         return self.init_args['alpha_learning']
218 | 
219 | 
220 | 
221 |     def transition_fn(self, exog_info):
222 | 
223 |         self.n +=1
224 | 
225 |         state_dict = self.state._asdict()
226 |         
227 | 
228 |         field_bias = self.state.field_request - exog_info['demand']
229 |         source_bias = self.state.estimate - exog_info['demand']
230 |         
231 | 
232 |         self.beta_field = (1 - self.get_alpha_learning()) *  self.beta_field +  self.get_alpha_learning() * (field_bias - state_dict['field_bias'])
233 |         self.beta_source = (1 - self.get_alpha_learning()) *  self.beta_source +  self.get_alpha_learning() * (source_bias - state_dict['source_bias'])
234 | 
235 |         self.delta_field = (1 - self.get_alpha_learning()) *  self.delta_field +  self.get_alpha_learning() * ((field_bias - state_dict['field_bias'])**2)
236 |         self.delta_source = (1 - self.get_alpha_learning()) *  self.delta_source +  self.get_alpha_learning() * ((source_bias - state_dict['source_bias'])**2)
237 | 
238 |         self.var_field = (self.delta_field-(self.beta_field**2))/(1-self.lambda_field)
239 |         self.var_source = (self.delta_source-(self.beta_source**2))/(1-self.lambda_source)
240 | 
241 |         dem_field = self.var_field + (self.beta_field)**2
242 |         dem_source = self.var_source + (self.beta_field)**2
243 | 
244 |         if dem_field < 0.001:
245 |             field_w = 1
246 |             source_w = 0
247 |         elif dem_source < 0.001:
248 |             field_w = 0
249 |             source_w = 1
250 |         else:
251 |             field_w = 1/dem_field
252 |             source_w = 1/dem_source
253 |         
254 |         sum_w = field_w + source_w
255 | 
256 |         state_dict['field_weight'] = field_w/sum_w
257 |         state_dict['source_weight'] = source_w/sum_w
258 | 
259 |         state_dict['field_bias_hat'] = field_bias
260 | 
261 | 
262 |         if self.n > 1:
263 |             self.lambda_field = ((1 - self.get_alpha_bias())**2)*self.lambda_field + self.get_alpha_bias()**2
264 |             self.lambda_source = ((1 - self.get_alpha_bias())**2)*self.lambda_source + self.get_alpha_bias()**2
265 |         else:
266 |             self.lambda_field = self.get_alpha_bias()
267 |             self.lambda_source = self.get_alpha_bias()
268 | 
269 |         for state_desc in ['field_bias','source_bias']:
270 |             state_dict[state_desc] =  (1 - self.get_alpha_bias()) *  state_dict[state_desc] +  self.get_alpha_bias() * eval(state_desc)
271 | 
272 |         self.state = self.build_state(state_dict)
273 |         
274 | 
275 | 
276 |     def objective_fn(self,  exog_info):
277 |         allocated = exog_info['allocated_quantity']
278 |         demand = exog_info['demand']
279 |         self.pen_incurred  = (self.init_args['o_central'] * max(allocated - demand, 0) + 
280 |                   self.init_args['u_central'] * max(demand - allocated, 0))
281 |         return -self.pen_incurred 
282 | 
283 |     def showState(self,state_desc):
284 |         return getattr(self.state,state_desc)
285 | 
286 |     
287 | 
288 | 
289 | 


--------------------------------------------------------------------------------
/TwoNewsvendor/TwoNewsvendorLearning.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Two Newsvendor as a Learning Problem 
  3 | 
  4 | Author: Andrei Graur
  5 | 
  6 | This program implements a model for the two newsvendor 
  7 | problem where the field agent and the central command
  8 | both view the problem of choosing the right bias to add
  9 | or substract as a learning problem. Run the code with the
 10 | python command, no arguments given. 
 11 | 
 12 | """
 13 | 
 14 | import numpy as np
 15 | import pandas as pd
 16 | import math
 17 | import xlrd
 18 | 
 19 | from TwoNewsvendor import Model_Field
 20 | from TwoNewsvendor import Model_Central
 21 | 
 22 | # the class implementing the objects that represent the 
 23 | # avaiable choices of the two agents, which are the biases to add
 24 | class Choice:
 25 | 	def __init__(self, quantity, util_estimate, W_precision_estimate, theta, nu_bar = 0.5):
 26 | 		'''
 27 | 		The function that initializes the choice object
 28 | 
 29 | 		param: quantity - int: the quantity in units equal to the bias
 30 | 		param: util_estimate - float: the estimate of what the utility will 
 31 | 		be when we use this bias; we initialize it to 0 in main 
 32 | 		param: precision_estimate - float: the estimate of what the 
 33 | 		precision of next experiment of using this bias will be
 34 | 		param: theta - float: the tunable parameter. It can be for the  UCB policy or for the IE polidy
 35 | 		'''
 36 | 		
 37 | 		self.n = 0
 38 | 
 39 | 		self.quantity = quantity
 40 | 		self.util_estimate = util_estimate
 41 | 		self.accumulated_precision = W_precision_estimate
 42 | 
 43 | 		self.theta = theta
 44 | 		
 45 | 
 46 | 		#Variables to compute the variance of W
 47 | 		self.W_precision = W_precision_estimate 
 48 | 		self.W_variance = 1 / float(self.W_precision) 
 49 | 		
 50 | 		self.nu_bar = nu_bar
 51 | 		self.W_bar = util_estimate
 52 | 		self.W_beta = 0
 53 | 		self.W_delta = 0
 54 | 		self.W_lambda = 0
 55 | 		self.nu = 1
 56 | 		
 57 | 		
 58 | 		
 59 | 
 60 | 
 61 | 
 62 | 	# the function that uploads the results of the experiment of trying
 63 | 	# this bias and updates the corresponding beliefs about this choice
 64 | 	def upload_results(self, W):
 65 | 		self.n += 1
 66 | 
 67 | 		self.nu = (self.nu)/(1+self.nu-self.nu_bar)
 68 | 		self.W_beta = (1-self.nu)*self.W_beta + self.nu * (W - self.W_bar)
 69 | 		self.W_delta = (1-self.nu)*self.W_delta + self.nu * ((W - self.W_bar)**2)
 70 | 		
 71 | 	
 72 | 		# update the variance
 73 | 		if self.n > 1:
 74 | 			#self.W_variance = (((self.n - 2.0) / float(self.n - 1)) * self.W_variance +(1.0 / self.n) * ((W - self.util_estimate) ** 2))
 75 | 			  
 76 | 			self.W_variance = (self.W_delta - (self.W_beta**2))/(1+self.W_lambda)
 77 | 			if self.W_variance < 0.0001:
 78 | 				self.W_precision = 10
 79 | 			else:
 80 | 				self.W_precision = 1 / float(self.W_variance)
 81 | 			
 82 | 		alpha = self.W_precision / (self.accumulated_precision + self.W_precision)
 83 | 
 84 | 		if self.n >1:
 85 | 			self.W_lambda = ((1-alpha)**2)*self.W_lambda + (alpha)**2
 86 | 		else:
 87 | 			self.W_lambda = (alpha)**2
 88 | 			
 89 | 		self.W_bar = (1-alpha)*self.W_bar + alpha*W
 90 | 		
 91 | 
 92 | 		# update estimate and experiment precision 
 93 | 		self.util_estimate = ((self.util_estimate * self.accumulated_precision +
 94 | 						W * self.W_precision) / 
 95 | 						(self.accumulated_precision + self.W_precision))
 96 | 		self.accumulated_precision += self.W_precision
 97 | 
 98 | 
 99 | 	# the function that returns the bias attribute of this object
100 | 	def get_choice_quantity(self):
101 | 		return self.quantity
102 | 
103 | 	# the cost function approximation for this choice of bias
104 | 	def get_UCB_value(self, time):
105 | 		if self.n == 0:
106 | 			UCB_val =  np.inf
107 | 
108 | 		else:
109 | 			UCB_val = (self.util_estimate + self.theta * math.sqrt(math.log(time) / self.n))
110 | 		return UCB_val
111 | 
112 | 	def get_IE_value(self):
113 | 		
114 | 		IE_val = (self.util_estimate + self.theta * math.sqrt(1/self.accumulated_precision))
115 | 		return IE_val
116 | 	
117 | 	def get_nb_experiments(self):
118 | 		return self.n
119 | 
120 | 	def getAllParametersHeaderList(self):
121 | 		outL="bias_choice n mu_bar_estimate Beta sigma IE_value UCB_value W_nu  W_beta W_delta  W_variance alpha W_bar W_lambda W_precision "
122 | 		return outL.split()
123 | 
124 | 	def getAllParametersList(self,time):
125 | 		outL="{:.2f} {} {:.2f} {:.2f} {:.2f} {:.2f} {:.2f} {:.2f} {:.2f} {:.2f} {:.2f} {:.2f} {:.2f} {:.2f} ".format(self.quantity,self.n,self.util_estimate,self.accumulated_precision,math.sqrt(1/self.accumulated_precision),self.get_IE_value(),self.get_UCB_value(time),self.nu,self.W_beta,self.W_delta,self.W_variance,self.getOSA(),self.W_bar,self.W_lambda,self.W_precision)
126 | 		return outL.split()
127 | 
128 | 	def getMainParametersHeaderList(self):
129 | 		outL="bias_choice n mu_bar_estimate Beta sigma W_bar W_precision W_variance "
130 | 		return outL.split()
131 | 
132 | 	def getMainParametersList(self):
133 | 		return [self.quantity,self.n,self.util_estimate,self.accumulated_precision,math.sqrt(1/self.accumulated_precision),self.W_bar,self.W_precision,self.W_variance]
134 | 
135 | 	def printChoiceParameters(self,n):
136 | 		valuesList = self.getAllParametersList(n)
137 | 		headerList = self.getAllParametersHeaderList()
138 | 		outStr=""
139 | 		for i in range(len(valuesList)):
140 | 			outStr += "{}: {}, ".format(headerList[i],valuesList[i])
141 | 		return outStr
142 | 
143 | # the model for the field agent treating the problem as a 
144 | # learning problem 
145 | class Learning_model_field(Model_Field):
146 | 	def __init__(self, theta, *args, **kwargs):
147 | 		super(Learning_model_field, self).__init__(*args, **kwargs)
148 | 		range_list = self.init_args['bias_interval_field'].split(",")
149 | 		range_list = [int(e) for e in range_list]
150 | 		self.choice_range =range(range_list[0],range_list[1]+1)
151 | 		self.resetModel(theta)
152 | 
153 | 	def resetModel(self,theta):
154 | 		self.choices = {}	
155 | 		for value in self.choice_range:
156 | 
157 | 			self.choices[value] = Choice(value, 0, 0.01, theta)
158 | 
159 | 		super(Learning_model_field, self).resetModel(None)
160 | 
161 | 
162 | 
163 | 	# the new transition function for the learning approach
164 | 	def transition_fn(self, exog_info):
165 | 		
166 | 		# update the results of having tried out the used choice 
167 | 		choice_used = self.choices[self.decision.bias_applied]
168 | 		
169 | 		#print("Field Choice state pre update")
170 | 		#outStr = choice_used.printChoiceParameters(self.n+1)
171 | 		#print(outStr)
172 | 
173 | 		choice_used.upload_results(-self.pen_incurred)
174 | 		# update beliefs about the external source
175 | 		super(Learning_model_field, self).transition_fn(exog_info)
176 | 		
177 | 		#print("Field Choice state post update")
178 | 		#outStr = choice_used.printChoiceParameters(self.n+1)
179 | 		#print(outStr)
180 | 
181 | 	def getMainParametersList(self):
182 | 		listPar = [self.choices[x].getMainParametersList() for x in self.choice_range]
183 | 		listParFlat = [elem for l in listPar for elem in l]
184 | 		return listParFlat
185 | 
186 | 	def getMainParametersHeaderList(self):
187 | 		listPar = [self.choices[x].getMainParametersHeaderList() for x in self.choice_range]
188 | 		listParFlat = [str(x)+"_field_"+elem for x,l in zip(self.choice_range,listPar) for elem in l]
189 | 		return listParFlat
190 | 
191 | 	def getMainParametersDf(self):
192 | 		dictPar = {x:self.choices[x].getMainParametersList() for x in self.choice_range}
193 | 		pdPar = pd.DataFrame(dictPar)
194 | 		pdPar = pdPar.transpose()
195 | 		pdPar.columns = self.choices[self.choice_range[0]].getMainParametersHeaderList()
196 | 
197 | 		print(pdPar)
198 | 		return pdPar
199 | 
200 | 
201 | 
202 | # the model for the central command treating the problem as a 
203 | # learning problem
204 | class Learning_model_central(Model_Central):
205 | 	def __init__(self, theta, *args, **kwargs):
206 | 		super(Learning_model_central, self).__init__(*args, **kwargs)
207 | 		range_list = self.init_args['bias_interval_central'].split(",")
208 | 		range_list = [int(e) for e in range_list]
209 | 		self.choice_range=range(range_list[0],range_list[1]+1)
210 | 		self.resetModel(theta)
211 | 
212 | 	def resetModel(self,theta):
213 | 		self.choices = {}
214 | 		for value in self.choice_range:
215 | 
216 | 			self.choices[value] = Choice(value, 0, 0.01, theta)
217 | 
218 | 		super(Learning_model_central, self).resetModel(None)
219 | 
220 | 
221 | 	def transition_fn(self, exog_info):
222 | 		# update the results of having tried out the used choice 
223 | 		choice_used = self.choices[self.decision.bias_applied]
224 | 
225 | 		#print("Central Choice state pre update")
226 | 		#outStr = choice_used.printChoiceParameters(self.n+1)
227 | 		#print(outStr)
228 | 		
229 | 		choice_used.upload_results(-self.pen_incurred)
230 | 		# update beliefs about the external source
231 | 		super(Learning_model_central, self).transition_fn(exog_info)
232 | 		
233 | 		#print("Central Choice state pos update - W = {:.2f}".format(-self.pen_incurred))
234 | 		#outStr = choice_used.printChoiceParameters(self.n+1)
235 | 		#print(outStr)
236 | 
237 | 	def getMainParametersList(self):
238 | 		listPar = [self.choices[x].getMainParametersList() for x in self.choice_range]
239 | 		listParFlat = [elem for l in listPar for elem in l]
240 | 		return listParFlat
241 | 
242 | 	def getMainParametersHeaderList(self):
243 | 		listPar = [self.choices[x].getMainParametersHeaderList() for x in self.choice_range]
244 | 		listParFlat = [str(x)+"_central_"+elem for x,l in zip(self.choice_range,listPar) for elem in l]
245 | 		return listParFlat
246 | 
247 | 	def getMainParametersDf(self):
248 | 		dictPar = {x:self.choices[x].getMainParametersList() for x in self.choice_range}
249 | 		pdPar = pd.DataFrame(dictPar)
250 | 		pdPar = pdPar.transpose()
251 | 		pdPar.columns = self.choices[self.choice_range[0]].getMainParametersHeaderList()
252 | 
253 | 		print(pdPar)
254 | 		return pdPar
255 | 
256 | 
257 | 
258 | 
259 | 


--------------------------------------------------------------------------------
/TwoNewsvendor/TwoNewsvendorPolicy.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | 
  3 | The policy for the two agent newsvendor game.
  4 | 
  5 | '''
  6 | 
  7 | from TwoNewsvendor import Model_Field
  8 | from TwoNewsvendor import Model_Central
  9 | 
 10 | import numpy as np
 11 | import math
 12 | 
 13 | import matplotlib.pyplot as plt
 14 | 
 15 | def printTuple(a):
 16 |     printStr = ""
 17 |     for f in a._fields:
 18 |         printStr += " {}: {:.2f}".format(f,getattr(a, f))
 19 |     return printStr 
 20 | 
 21 | def printTupleValues(a):
 22 |     printStr = ""
 23 |     for f in a._fields:
 24 |         printStr += "{:.2f} ".format(getattr(a, f))
 25 |     return printStr
 26 | 
 27 | 
 28 | def formatFloatList(L,p):
 29 |     sFormat = "{{:.{}f}} ".format(p) * len(L) 
 30 |     outL = sFormat.format(*L)
 31 |     return outL.split()
 32 | 
 33 | 
 34 | 
 35 | def plot_heat_map(ax,contribution_dict, params,theta_field_values, theta_central_values,titleString,player_sorted_by_value):
 36 |         """
 37 |         this function plots a heat map
 38 | 
 39 |         
 40 |         """
 41 | 
 42 | #       
 43 |         textcolors=["black", "white"]
 44 | 
 45 |         contribution_values = [contribution_dict[(theta_field,theta_central)]  for theta_central in theta_central_values for theta_field in theta_field_values]
 46 |         contributions = np.array(contribution_values)
 47 |         increment_count = len(theta_field_values)
 48 |         contributions = np.reshape(contributions, (-1, increment_count))
 49 | 
 50 |         
 51 |         
 52 |         
 53 |         im = ax.imshow(contributions, cmap='hot',origin='lower',aspect='auto',alpha=.9)
 54 |         threshold = im.norm(contributions.max())/2
 55 |         # create colorbar
 56 |         cbar = ax.figure.colorbar(im, ax=ax)
 57 |         # cbar.ax.set_ylabel(cbarlabel, rotation=-90, va="bottom")
 58 |         # we want to show all ticks...
 59 |         ax.set_xticks(np.arange(len(theta_field_values)))
 60 |         ax.set_yticks(np.arange(len(theta_central_values)))
 61 |         # ... and label them with the respective list entries
 62 |         ax.set_xticklabels(theta_field_values)
 63 |         ax.set_yticklabels(theta_central_values)
 64 |         # rotate the tick labels and set their alignment.
 65 |         #plt.setp(ax.get_xticklabels(), rotation=45, ha="right",rotation_mode="anchor")
 66 |         ax.set_title(titleString)
 67 | 
 68 |         for rank_player,dict_entry_player in enumerate(player_sorted_by_value):
 69 | 
 70 |             y_ind = [i for i,y in enumerate(theta_central_values) if dict_entry_player[0][1] == y]
 71 |             x_ind = [i for i,x in enumerate(theta_field_values) if dict_entry_player[0][0] == x]
 72 | 
 73 |             text = ax.text(x_ind[0], y_ind[0], "{}\n {:.0f}".format(rank_player,dict_entry_player[1][-1]), ha="center", va="center", color=textcolors[im.norm(dict_entry_player[1][-1]) < threshold],fontsize=7)
 74 | 
 75 |             #text = ax.text(x_ind[0], y_ind[0], "{}_{}".format(dict_entry_player[0][0], dict_entry_player[0][1]), ha="center", va="center", color=textcolors[im.norm(dict_entry_player[1][-1]) < threshold],fontsize=7)
 76 | 
 77 |         if params['policy_central']=='regular' or params['policy_central']=='punishing':
 78 |             ax.set_ylabel(r'$bias^{central}$',fontsize=14) 
 79 |         elif 'learning' in params['policy_central']:
 80 |             ax.set_ylabel(r'$\theta^{central}$',fontsize=14) 
 81 | 
 82 | 
 83 |         if params['policy_field']=='regular':
 84 |             ax.set_xlabel(r'$bias^{field}$',fontsize=14)
 85 |         elif 'learning' in params['policy_field']:
 86 |             ax.set_xlabel(r'$\theta^{field}$',fontsize=14)
 87 |         
 88 | 
 89 |         
 90 | 
 91 |         #fig.tight_layout()
 92 |        
 93 |         return True
 94 | 
 95 | 
 96 | def create_theta_grid(params):
 97 |     
 98 |     #Field
 99 |     if params['policy_field']=='regular':
100 |         range_list = params['bias_interval_field'].split(",")
101 |         range_list = [int(e) for e in range_list]
102 |         theta_field_list=list(range(range_list[0],range_list[1]+1))
103 |     
104 |     elif 'learning' in params['policy_field']:
105 |         if isinstance(params['theta_set_field'], str):
106 |             theta_field_list = params['theta_set_field'].split(",")
107 |             theta_field_list = [float(e) for e in theta_field_list]
108 |         else:
109 |             theta_field_list = [float(params['theta_set_field'])] 
110 | 
111 |     #Central
112 |     if params['policy_central']=='regular' or params['policy_central']=='punishing':
113 |         range_list = params['bias_interval_central'].split(",")
114 |         range_list = [int(e) for e in range_list]
115 |         theta_central_list=list(range(range_list[0],range_list[1]+1))
116 |     
117 |     elif 'learning' in params['policy_central']:
118 |         if isinstance(params['theta_set_central'], str):
119 |             theta_central_list = params['theta_set_central'].split(",")
120 |             theta_central_list = [float(e) for e in theta_central_list]
121 |         else:
122 |             theta_central_list = [float(params['theta_set_central'])] 
123 | 
124 |     theta_grid = []
125 |     for theta_field in theta_field_list:
126 |         for theta_central in theta_central_list:
127 |             theta_grid.append((theta_field,theta_central))
128 | 
129 |     return theta_grid,theta_field_list,theta_central_list
130 | 
131 | 
132 | def run_policies(ite,record_budget,params,exog_info_gen,theta_field,theta_central,M_field,P_field,M_central,P_central):
133 | 
134 |     cost_ite_field = []
135 |     cost_ite_central = []
136 | 
137 |     accum_util_field = 0
138 |     accum_util_central = 0
139 | 
140 |     accum_request_field = 0
141 |     accum_allocated_central=0
142 | 
143 |     record_sample_ite = [params['policy_field'],params['policy_central'],"{}_{}".format(theta_field,theta_central),ite]
144 |     
145 |     for n in range(params['N']):
146 |         #Generate exogenous info - estimates and demand - but we are not observing the demand
147 |         exog_info_gen.generate_New_Round()
148 |         #print("Round {} - Estimate for the field {}, estimate for central {} and true demand {}".format(exog_info_gen.get_Round_Number(),exog_info_gen.get_Estimate_Field(),exog_info_gen.get_Estimate_Central(),exog_info_gen.get_Demand()))
149 |         record_sample_t = [n,exog_info_gen.get_Round_Number(),exog_info_gen.get_Estimate_Field(),exog_info_gen.get_Estimate_Central(),exog_info_gen.get_Demand()]
150 | 
151 |         #Field  updates its state variable with an estimate
152 |         M_field.updateState(exog_info_gen.get_Estimate_Field())
153 |         #print("Field State {}".format(printTuple(M_field.state)))
154 |         record_sample_t += list(M_field.state)
155 | 
156 |         #Field makes a decision
157 |         field_request,bias_field = P_field.getDecision(M_field)
158 |         M_field.build_decision({'quantity_requested': field_request,'bias_applied':bias_field})
159 |         accum_request_field += field_request
160 |         #print("Field Decision {}".format(printTuple(M_field.decision)))
161 |         record_sample_t += list(M_field.decision)
162 | 
163 |         #Central updates its state with field request and (possibly) an external estimate
164 |         M_central.updateState(field_request,exog_info_gen.get_Estimate_Central())
165 |         #print("Central State {}".format(printTuple(M_central.state)))
166 |         record_sample_t += list(M_central.state)
167 | 
168 |         #Central makes a decision
169 |         decision_central,bias_central = P_central.getDecision(M_central)
170 |         M_central.build_decision({'quantity_allocated': decision_central,'bias_applied':bias_central})
171 |         accum_allocated_central += decision_central
172 |         #print("Central Decision {}".format(printTuple(M_central.decision)))
173 |         record_sample_t += list(M_central.decision)
174 | 
175 |         #True demand is revelead
176 |         demand = exog_info_gen.get_Demand()
177 |         exog_info_pos_dec = {'allocated_quantity': decision_central, 'demand': demand}
178 |         
179 |         #Costs/penalties for field and central are computed
180 |         util_field = M_field.objective_fn(exog_info_pos_dec)
181 |         util_central = M_central.objective_fn(exog_info_pos_dec)
182 |         #print("Field utility {:.2f} - Central utility {:.2f}".format(util_field,util_central))
183 |         
184 |         accum_util_field += util_field
185 |         accum_util_central += util_central
186 |         
187 |         #record_sample_t += formatFloatList([util_field,accum_util_field,util_central,accum_util_central],2)
188 |         util_company = util_field + util_central
189 |         accum_util_company = accum_util_field + accum_util_central
190 | 
191 |         record_sample_t += [util_field,accum_util_field,util_central,accum_util_central,util_company,accum_util_company]
192 | 
193 | 
194 |         cost_ite_field.append(accum_util_field)
195 |         cost_ite_central.append(accum_util_central)
196 | 
197 |         
198 | 
199 |         #Field and Central transition to next round updating all the stats
200 |         M_field.transition_fn(exog_info_pos_dec)
201 |         M_central.transition_fn(exog_info_pos_dec)
202 | 
203 |         if "learning" in params['policy_field']:
204 |             record_sample_t +=  M_field.getMainParametersList()
205 |         if "learning" in params['policy_central']:
206 |             record_sample_t +=  M_central.getMainParametersList()
207 | 
208 |         record_budget.append(record_sample_ite+record_sample_t)
209 |         
210 |     return cost_ite_field,cost_ite_central,record_budget,accum_request_field/params['N'],accum_allocated_central/params['N']
211 | 
212 | 
213 | 
214 | 
215 | class Policy_Field():
216 | 
217 | 
218 |     def __init__(self, params,theta):
219 |         self.init_args = params
220 |         self.theta = theta
221 | 
222 | 
223 |     def getDecision(self,model):
224 |         decision=getattr(self,self.init_args['policy_field'])
225 |         return decision(model)
226 | 
227 |     def getLearningBias(self,model):
228 |         
229 |         if ("UCB" in model.init_args['policy_field']):
230 |             stats = {x:model.choices[x].get_UCB_value(model.n + 1) for x in model.choice_range}
231 |         else:
232 |             stats = {x:model.choices[x].get_IE_value() for x in model.choice_range}
233 | 
234 |         bias = max(stats,key=stats.get)
235 |         return bias
236 | 
237 | 
238 | 
239 |     def regular(self, model):
240 |         #ATTENTION! In this policy, self.theta is the bias that  field is adding - one of the values in the parameter interval "bias_interval_field"
241 |         decision = round(model.state.estimate - model.state.source_bias - model.state.central_bias + self.theta)
242 |         #bias = decision - (model.state.estimate - model.state.source_bias)
243 |         bias = self.theta
244 |         return decision, bias
245 | 
246 | 
247 |     def learning_UCB(self,model):
248 |         bias = self.getLearningBias(model)
249 |         decision = round(model.state.estimate - model.state.source_bias + bias)
250 |         return decision,bias
251 |         
252 | 
253 |         return decision,bias
254 | 
255 |     def learning_IE(self, model):
256 |         # This method implements the Interval Estimation policy
257 | 
258 |         bias = self.getLearningBias(model)
259 |         decision = round(model.state.estimate - model.state.source_bias  + bias)
260 | 
261 |         return decision,bias
262 | 
263 |     
264 | 
265 | class Policy_Central():
266 | 
267 |     def __init__(self, params,theta):
268 |         self.init_args = params
269 |         self.theta = theta
270 | 
271 | 
272 |     def getDecision(self,model):
273 |         decision=getattr(self,self.init_args['policy_central'])
274 |         return decision(model)
275 | 
276 |     def getLearningBias(self,model):
277 | 
278 |         if ("UCB" in model.init_args['policy_central']):
279 |             stats = {x:model.choices[x].get_UCB_value(model.n + 1) for x in model.choice_range}
280 |         else:
281 |             stats = {x:model.choices[x].get_IE_value() for x in model.choice_range}
282 | 
283 |         bias = max(stats,key=stats.get)
284 |         return bias
285 | 
286 | 
287 |     def regular(self, model):
288 |         #ATTENTION! In this policy, self.theta is the bias that  central is adding - one of the values in the parameter interval "bias_interval_central"
289 |         decision = round(model.state.field_request - model.state.field_bias  + self.theta)
290 |         decision = max(0,decision)
291 |         #bias = decision - model.state.field_request
292 |         bias = self.theta
293 |         return decision, bias
294 | 
295 |     def punishing(self, model):
296 |         if model.state.field_bias_hat >0:
297 |             decision = round(model.state.field_request - 2 * model.state.field_bias_hat)
298 |             bias = - 2 * model.state.field_bias_hat
299 |         else:
300 |             #decision = round(model.state.field_request - model.state.field_bias  + self.theta)
301 |             decision = round(model.state.field_request  + self.theta)
302 |             bias = self.theta
303 | 
304 |         decision = max(0,decision)
305 |         #bias = decision - model.state.field_request
306 |         return decision, bias
307 | 
308 |     def learning_UCB(self,model):
309 |         bias = self.getLearningBias(model)
310 |         decision = round(model.state.field_request + bias)
311 |         return max(0,decision),bias
312 |         
313 | 
314 |     def learning_IE(self, model):
315 |         # This method implements the Interval Estimation policy
316 | 
317 |         bias = self.getLearningBias(model)
318 |         decision = round(model.state.field_request  + bias)
319 |         decision = max(0,decision)
320 |         return decision,bias
321 | 
322 |     def learning_IE_two_estimates(self, model):
323 |         bias = self.getLearningBias(model)
324 |         decision = round(model.state.field_weight * (model.state.field_request) + model.state.source_weight * (model.state.estimate  -  model.state.source_bias) + bias)
325 | 
326 |         return max(0,decision),bias
327 | 
328 | 
329 | 
330 | 
331 | 
332 | 


--------------------------------------------------------------------------------