├── __init__.py
├── logs
    └── temp.log
├── utils
    ├── __init__.py
    └── weights_logger.py
├── algorithms
    ├── __init__.py
    ├── deterministic.py
    ├── semi_gradient_sarsa_algorithm.py
    ├── reinforce_algorithm.py
    ├── true_online_sarsa_lambda.py
    ├── actor_critic_eligibility_trace_algorithm_nn.py
    ├── actor_critic_eligibility_trace_algorithm_linear.py
    ├── optimal_algorithm.py
    ├── actor_critic_eligibility_trace_algorithm_tc.py
    ├── actor_critic_one_step.py
    └── semi_gradient_n_step_sarsa_algorithm.py
├── policy_approximations
    ├── __init__.py
    └── linear_policy_approximation.py
├── replacement_policies
    ├── __init__.py
    ├── policy_base.py
    ├── lru.py
    ├── fifo.py
    └── lfu.py
├── state_approximations
    ├── __init__.py
    ├── state_approximation.py
    ├── linear_v_approximation.py
    ├── nn_v_approximation.py
    └── one_d_tc.py
├── state_action_approximations
    ├── __init__.py
    ├── state_action_approximation.py
    ├── linear_q_approximation.py
    ├── nn_q_approximation.py
    ├── tile_coding_state_action.py
    └── one_d_tc.py
├── experiments
    ├── graphs
    │   ├── lru.png
    │   ├── reinforce.png
    │   ├── semi_gradient_sarsa.png
    │   ├── true_online_sarsa_lambda.png
    │   └── semi_gradient_n_step_sarsa.png
    └── scratch
├── RLCar_Reinforcement_Learning_for_Cache_admission_and_Replacement.pdf
├── requirements.txt
├── LICENSE
├── .gitignore
├── README.md
├── replacement_agent.py
├── trace_generator.py
├── trace_loader.py
├── main.py
├── run.sh
└── cache.py


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/logs/temp.log:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/algorithms/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/policy_approximations/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/replacement_policies/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/state_approximations/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/state_action_approximations/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiments/graphs/lru.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Godric877/RLCaR/HEAD/experiments/graphs/lru.png


--------------------------------------------------------------------------------
/experiments/graphs/reinforce.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Godric877/RLCaR/HEAD/experiments/graphs/reinforce.png


--------------------------------------------------------------------------------
/experiments/graphs/semi_gradient_sarsa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Godric877/RLCaR/HEAD/experiments/graphs/semi_gradient_sarsa.png


--------------------------------------------------------------------------------
/experiments/graphs/true_online_sarsa_lambda.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Godric877/RLCaR/HEAD/experiments/graphs/true_online_sarsa_lambda.png


--------------------------------------------------------------------------------
/experiments/graphs/semi_gradient_n_step_sarsa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Godric877/RLCaR/HEAD/experiments/graphs/semi_gradient_n_step_sarsa.png


--------------------------------------------------------------------------------
/RLCar_Reinforcement_Learning_for_Cache_admission_and_Replacement.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Godric877/RLCaR/HEAD/RLCar_Reinforcement_Learning_for_Cache_admission_and_Replacement.pdf


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | cloudpickle==2.0.0
 2 | cycler==0.11.0
 3 | fonttools==4.32.0
 4 | gym==0.23.1
 5 | gym-notices==0.0.6
 6 | importlib-metadata==4.11.3
 7 | joblib==1.1.0
 8 | kiwisolver==1.4.2
 9 | matplotlib==3.5.1
10 | numpy==1.22.3
11 | packaging==21.3
12 | pandas==1.4.2
13 | Pillow==9.1.0
14 | pyparsing==3.0.8
15 | python-dateutil==2.8.2
16 | pytz==2022.1
17 | scikit-learn==1.0.2
18 | scipy==1.8.0
19 | six==1.16.0
20 | sklearn==0.0
21 | threadpoolctl==3.1.0
22 | torch==1.11.0
23 | typing-extensions==4.1.1
24 | zipp==3.8.0
25 | 


--------------------------------------------------------------------------------
/replacement_policies/policy_base.py:
--------------------------------------------------------------------------------
 1 | class PolicyBase:
 2 | 
 3 |     def __init__(self, capacity: int):
 4 |         pass
 5 | 
 6 |     def update(self, key: int, val: int) -> bool:
 7 |         pass
 8 | 
 9 |     def get_remove_candidate(self):
10 |         pass
11 | 
12 |     def update_history(self):
13 |         pass
14 | 
15 |     def remove(self):
16 |         pass
17 | 
18 |     def put(self, key: int, value : int) -> None:
19 |         pass
20 | 
21 |     def remove_key(self, key):
22 |         pass
23 | 
24 |     def reset(self):
25 |         pass


--------------------------------------------------------------------------------
/utils/weights_logger.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | class WeightLogger():
 3 |     def __init__(self):
 4 |         self.weights = []
 5 | 
 6 |     def update_weights(self, weight, tick):
 7 |         #print("weights = ", weight)
 8 |         list_weight = list(weight)
 9 |         list_weight.insert(0, tick)
10 |         list_str_weight = [str(w) for w in list_weight]
11 |         self.weights.append(list_str_weight)
12 |         #print("weights in list  = ", list_weight)
13 | 
14 |     def end(self, index):
15 |         file = "weights_{}.csv".format(index)
16 |         #print("weights = ", self.weights)
17 |         with open(file, "w", newline="") as f:
18 |             writer = csv.writer(f)
19 |             writer.writerows(self.weights)
20 |         self.weights =[]
21 | 


--------------------------------------------------------------------------------
/state_approximations/state_approximation.py:
--------------------------------------------------------------------------------
 1 | class StateApproximation(object):
 2 |     def __call__(self,s) -> float:
 3 |         """
 4 |         return the value of given state; \hat{v}(s)
 5 | 
 6 |         input:
 7 |             state
 8 |         output:
 9 |             value of the given state
10 |         """
11 |         raise NotImplementedError()
12 | 
13 |     def update(self,alpha,G,state):
14 |         """
15 |         Implement the update rule;
16 |         w <- w + \alpha[G- \hat{v}(s_tau;w)] \nabla\hat{v}(s_tau;w)
17 | 
18 |         input:
19 |             alpha: learning rate
20 |             G: TD-target
21 |             s_tau: target state for updating (yet, update will affect the other states)
22 |         ouptut:
23 |             None
24 |         """
25 |         raise NotImplementedError()


--------------------------------------------------------------------------------
/state_action_approximations/state_action_approximation.py:
--------------------------------------------------------------------------------
 1 | class StateActionApproximation(object):
 2 |     def __call__(self,s, a) -> float:
 3 |         """
 4 |         return the value of given state; \hat{v}(s)
 5 | 
 6 |         input:
 7 |             state
 8 |         output:
 9 |             value of the given state
10 |         """
11 |         raise NotImplementedError()
12 | 
13 |     def update(self,alpha,G,state, action):
14 |         """
15 |         Implement the update rule;
16 |         w <- w + \alpha[G- \hat{v}(s_tau;w)] \nabla\hat{v}(s_tau;w)
17 | 
18 |         input:
19 |             alpha: learning rate
20 |             G: TD-target
21 |             s_tau: target state for updating (yet, update will affect the other states)
22 |         ouptut:
23 |             None
24 |         """
25 |         raise NotImplementedError()


--------------------------------------------------------------------------------
/algorithms/deterministic.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def always_evict(env, episodes):
 4 |     bhr_metric = {}
 5 |     rewards = {}
 6 |     for i in episodes:
 7 |         env.reset(i)
 8 |         done = False
 9 |         episode_rewards = []
10 |         while not done:
11 |             act = 1
12 |             obs, reward, done, info = env.step(act)
13 |             episode_rewards.append(reward)
14 |             if done:
15 |                 bhr_metric[i] = info[2]
16 |         rewards[i] =  episode_rewards
17 |     return rewards, bhr_metric
18 | 
19 | def random_eviction(env, episodes, p=[0.5, 0.5]):
20 |     bhr_metric = {}
21 |     rewards = {}
22 |     for i in episodes:
23 |         env.reset(i)
24 |         done = False
25 |         episode_rewards = []
26 |         while not done:
27 |             act = np.random.choice(np.arange(2), p=p)
28 |             obs, reward, done, info = env.step(act)
29 |             episode_rewards.append(reward)
30 |             if done:
31 |                 bhr_metric[i] = info[2]
32 |         rewards[i] =  episode_rewards
33 |     return rewards, bhr_metric
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Syamantak Kumar
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/replacement_policies/lru.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict, Counter
 2 | from replacement_policies.policy_base import PolicyBase
 3 | 
 4 | class LRUCache(PolicyBase):
 5 | 
 6 |     def __init__(self, capacity: int):
 7 |         self.capacity = capacity
 8 |         self.cache=OrderedDict()
 9 |         self.history = []
10 |         self.history_dict = Counter()
11 | 
12 |     def update(self, key: int, val : int):
13 |         self.cache.move_to_end(key)
14 |         is_present = (key in self.history_dict)
15 |         return is_present
16 | 
17 |     def get_remove_candidate(self):
18 |         return next(iter(self.cache))
19 | 
20 |     def update_history(self):
21 |         candidate = self.get_remove_candidate()
22 |         if (len(self.history) >= self.capacity):
23 |             remove_item = self.history.pop(0)
24 |             if(self.history_dict[remove_item] == 1):
25 |                 del self.history_dict[remove_item]
26 |             else:
27 |                 self.history_dict[remove_item] -= 1
28 |         self.history.append(candidate)
29 |         self.history_dict[candidate] += 1
30 | 
31 |     def remove(self):
32 |         self.update_history()
33 |         return self.cache.popitem(last=False)
34 | 
35 |     def put(self, key: int, value : int) -> None:
36 |         if len(self.cache) >= self.capacity:
37 |             self.remove()
38 |         self.cache[key]=value
39 |         self.cache.move_to_end(key)
40 | 
41 |     def remove_key(self, key):
42 |         self.update_history()
43 |         if key in self.cache:
44 |             del self.cache[key]
45 | 
46 |     def reset(self):
47 |         self.cache = OrderedDict()
48 |         self.history = []
49 |         self.history_dict = Counter()


--------------------------------------------------------------------------------
/experiments/scratch:
--------------------------------------------------------------------------------
 1 | 
 2 | Average BHR on test data :  0.32431377571990927 LFU 20 episodes semi-gradient-sarsa
 3 | Average BHR on test data :  0.3048781347374865 LRU 20 episodes semi-gradient-sarsa
 4 | Average BHR on test data :  0.2069968126235923 FIFO 20 episodes semi-gradient-sarsa
 5 | 
 6 | Average BHR on test data :  0.31541169628008753 LFU 20 episodes semi-gradient-sarsa
 7 | Average BHR on test data :  0.30545926748786245 LRU 20 episodes semi-gradient-sarsa
 8 | Average BHR on test data :  0.21447233150446082 FIFO 20 episodes semi-gradient-sarsa
 9 | Average BHR on test data :  0.21905908945736902 LRU with action = 1
10 | Average BHR on test data :  0.34458940077996336 LFU with action = 1
11 | Average BHR on test data :  0.17755621299854102 FIFO with action = 1
12 | 
13 | Average BHR on test data :  0.31167647066116966 LRU+LFU 20 episodes semi-gradient-sarsa
14 | 
15 | Average BHR on test data :  0.339925737554025 LFU with random action
16 | Average BHR on test data :  0.2376831244487361 LRU with random action
17 | 
18 | Average BHR on test data :  0.3367723665646943
19 | 
20 | max_frequency = 1200
21 | 
22 | TODO :
23 | 
24 | 
25 | 1) 1-d Tiling
26 | 2) Actor-critic
27 | 3) tune n-step sarsa and reinforce
28 | 4) try zipf distribution
29 | 
30 | 
31 | 
32 | 
33 | 2) Q-learning
34 | 
35 | {'always_evict': [0.22056036436
36 | 37227, 0.2198801156035919, 0.22439159912212325, 0.22364186974022077, 0.2211926687800138], 'semi_gradient_sarsa_1': [0.32004784380764345, 0.30892408845407265, 0.32025700429202836, 0.25863111078096196, 0.2979258752894557], 'semi_gradient_sarsa_5': [0.2950395667965652, 0.24151138777404516, 0.2820994152510916, 0.29906585138712416, 0.26672195128955045], 'reinforce': [0.2719402382715548, 0.27601263900307127, 0.23157174971041872, 0.258354871289979, 0.2662656307278579]}
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/replacement_policies/fifo.py:
--------------------------------------------------------------------------------
 1 | from collections import Counter
 2 | from replacement_policies.policy_base import PolicyBase
 3 | 
 4 | class FifoCache(PolicyBase):
 5 | 
 6 |     def __init__(self, capacity: int):
 7 |         self.capacity = capacity
 8 |         self.cache_stack = []
 9 |         self.value_dict = {}
10 |         self.history = []
11 |         self.history_dict = Counter()
12 | 
13 |     def update(self, key: int, val: int):
14 |         return (key in self.history_dict)
15 | 
16 |     def get_remove_candidate(self):
17 |         return self.cache_stack[-1]
18 | 
19 |     def update_history(self):
20 |         candidate = self.get_remove_candidate()
21 |         if (len(self.history) >= self.capacity):
22 |             remove_item = self.history.pop(0)
23 |             if(self.history_dict[remove_item] == 1):
24 |                 del self.history_dict[remove_item]
25 |             else:
26 |                 self.history_dict[remove_item] -= 1
27 |         self.history.append(candidate)
28 |         self.history_dict[candidate] += 1
29 | 
30 |     def remove(self):
31 |         self.update_history()
32 |         key = self.cache_stack.pop()
33 |         val = self.value_dict[key]
34 |         del self.value_dict[key]
35 |         return key ,val
36 | 
37 |     def put(self, key: int, value : int) -> None:
38 |         if len(self.cache_stack) >= self.capacity:
39 |             self.remove()
40 |         self.cache_stack.append(key)
41 |         self.value_dict[key] = value
42 | 
43 |     def remove_key(self, key):
44 |         self.update_history()
45 |         if key in self.cache_stack:
46 |             self.cache_stack.remove(key)
47 |             del self.value_dict[key]
48 | 
49 |     def reset(self):
50 |         self.cache_stack = []
51 |         self.value_dict = {}
52 |         self.history = []
53 |         self.history_dict = Counter()


--------------------------------------------------------------------------------
/algorithms/semi_gradient_sarsa_algorithm.py:
--------------------------------------------------------------------------------
 1 | # One step Semi Gradient Sarsa
 2 | 
 3 | import numpy as np
 4 | 
 5 | from state_action_approximations.state_action_approximation import StateActionApproximation
 6 | 
 7 | def epsilon_greedy(Q:StateActionApproximation, epsilon, state, actions):
 8 |     random = np.random.binomial(1, epsilon)
 9 |     max_ac = 0
10 |     if random == 0:
11 |         max_q = np.NINF
12 |         for action in actions:
13 |             current_q = Q(state, action)
14 |             if current_q > max_q:
15 |                 max_ac = action
16 |                 max_q = current_q
17 |     else:
18 |         action_size = len(actions)
19 |         index = np.random.randint(action_size)
20 |         max_ac = actions[index]
21 |     return max_ac
22 | 
23 | def semi_gradient_sarsa(env, gamma, alpha, Q:StateActionApproximation,
24 |                         epsilon, episodes, actions):
25 |     #episodes = np.arange(num_episode)
26 |     #train, eval = train_test_split(episodes, test_size=0.2)
27 |     bhr_metric = {}
28 |     rewards = {}
29 |     for i in episodes:
30 |         s_current = env.reset(i)
31 |         action = epsilon_greedy(Q, epsilon, s_current, actions)
32 |         done = False
33 |         episode_rewards = []
34 |         while not done:
35 |             s_next, reward, done, info = env.step(action)
36 |             # if action == 0:
37 |             #     print(action)
38 |             episode_rewards.append(reward)
39 |             if done:
40 |                 Q.update(alpha, reward, s_current, action)
41 |                 bhr_metric[i] = info[2]
42 |             else:
43 |                 next_action = epsilon_greedy(Q, epsilon, s_next, actions)
44 |                 G = reward + gamma*Q(s_next, next_action)
45 |                 Q.update(alpha, G, s_current, action)
46 |             s_current = s_next
47 |             action = next_action
48 |         rewards[i] = episode_rewards
49 |     return rewards, bhr_metric
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/algorithms/reinforce_algorithm.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | from state_approximations.linear_v_approximation import Baseline
 4 | from policy_approximations.linear_policy_approximation import LinearPolicyApproximation
 5 | 
 6 | def reinforce(
 7 |     env, #open-ai environment
 8 |     gamma:float,
 9 |     episodes,
10 |     pi:LinearPolicyApproximation,
11 |     V:Baseline):
12 |     """
13 |     implement REINFORCE algorithm with and without baseline.
14 | 
15 |     input:
16 |         env: target environment; openai gym
17 |         gamma: discount factor
18 |         num_episode: #episodes to iterate
19 |         pi: policy
20 |         V: baseline
21 |     output:
22 |         a list that includes the G_0 for every episodes.
23 |     """
24 |     bhr_metric = {}
25 |     return_rewards = {}
26 |     for i in episodes:
27 |         state = env.reset(i)
28 |         done = False
29 |         rewards = [0]
30 |         states = [state]
31 |         actions = []
32 |         episode_rewards = []
33 |         while not done:
34 |             action = pi(state)
35 |             state, r, done, info = env.step(action)
36 |             rewards.append(r)
37 |             episode_rewards.append(r)
38 |             actions.append(action)
39 |             if not done:
40 |                 states.append(state)
41 |             else:
42 |                 bhr_metric[i] = info[2]
43 |         return_rewards[i] = episode_rewards
44 |         G = 0
45 |         for t in range(len(states)):
46 |             G += math.pow(gamma, t)*rewards[t + 1]
47 |         delta = G - V(states[0])
48 |         V.update(states[0], G)
49 |         pi.update(states[0], actions[0], 1, delta)
50 |         i = 1
51 |         gamma_t = 1
52 |         while i < len(states):
53 |             G = (G - rewards[i])/gamma
54 |             gamma_t = gamma_t*gamma
55 |             delta = G - V(states[i])
56 |             V.update(states[i], G)
57 |             pi.update(states[i], actions[i], gamma_t, delta)
58 |             i = i + 1
59 | 
60 |     return return_rewards, bhr_metric


--------------------------------------------------------------------------------
/algorithms/true_online_sarsa_lambda.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | import math
 3 | import numpy as np
 4 | 
 5 | from state_action_approximations.one_d_tc import StateActionOneDTileCoding
 6 | 
 7 | def TrueOnlineSarsaLambda(
 8 |     env,
 9 |     epsilon:float, # exploration factor
10 |     gamma:float, # discount factor
11 |     lam:float, # decay rate
12 |     alpha:float, # step size
13 |     X:StateActionOneDTileCoding,
14 |     episodes,
15 | ) -> np.array:
16 |     """
17 |     Implement True online Sarsa(\lambda)
18 |     """
19 |     def epsilon_greedy_policy(s,done,w,epsilon=.0):
20 |         nA = env.action_space.n
21 |         Q = [np.dot(w, X(s,a,done)) for a in range(nA)]
22 | 
23 |         if np.random.rand() < epsilon:
24 |             return np.random.randint(nA)
25 |         else:
26 |             return np.argmax(Q)
27 | 
28 |     w = np.zeros((X.feature_vector_len()))
29 |     bhr_metric = {}
30 |     rewards = {}
31 |     #TODO: implement this function
32 |     for i in episodes:
33 |         state = env.reset(i)
34 |         done = False
35 |         eps = copy.deepcopy(epsilon)
36 |         action = epsilon_greedy_policy(state, done, w, eps)
37 |         q_old = 0
38 |         x = X(state, action, done)
39 |         z = np.zeros(shape=X.feature_vector_len())
40 |         t = 1
41 |         episode_rewards = []
42 |         while not done:
43 |             t+=1
44 |             state_new, r, done, info = env.step(action)
45 |             episode_rewards.append(r)
46 |             action_new = epsilon_greedy_policy(state_new, done, w, epsilon)
47 |             x_new = X(state_new, action_new, done)
48 |             q = np.dot(w, x)
49 |             q_new = np.dot(w, x_new)
50 |             delta = r + gamma*q_new - q
51 |             multiplier = alpha*lam*gamma*np.dot(z, x)
52 |             z = gamma*lam*z + (1-multiplier)*x
53 |             w += alpha*(delta + q - q_old)*z - alpha*(q - q_old)*x
54 |             q_old = q_new
55 |             x = x_new
56 |             action = action_new
57 |             eps/=t
58 |             if done:
59 |                 bhr_metric[i] = info[2]
60 |         rewards[i] = episode_rewards
61 |     return rewards, bhr_metric


--------------------------------------------------------------------------------
/algorithms/actor_critic_eligibility_trace_algorithm_nn.py:
--------------------------------------------------------------------------------
 1 | # Actor Critic with Eligibility Traces using Neural Network Approximation for State and Linear Approximation Policy
 2 | 
 3 | import numpy as np
 4 | 
 5 | from state_approximations.nn_v_approximation import NNStateApproximation
 6 | from policy_approximations.linear_policy_approximation import LinearPolicyApproximation
 7 | 
 8 | def actor_critic_eligibility_trace_nn(env, gamma, alpha_theta, alpha_w, lambda_theta, lambda_w,
 9 |                                       V:NNStateApproximation, pi:LinearPolicyApproximation,
10 |                                       episodes):
11 |     bhr_metric = {}
12 |     rewards = {}
13 |     for i in episodes:
14 |         s_current = env.reset(i)
15 | 
16 |         #print(s_current)
17 |         #print("Feature length {}".format(list(V.model.model.weight.shape))
18 | 
19 |         V_weights_shape = np.array(list(V.model.weight.shape))
20 |         pi_weights_shape = np.array(list(pi.model.model[0].weight.shape))
21 | 
22 |         z_w = np.zeros(V_weights_shape)
23 |         z_theta = np.zeros(pi_weights_shape)
24 | 
25 |         I = 1
26 |         done = False
27 |         episode_rewards = []
28 |         while not done:
29 |             #s_current  = [s_x /1000 for s_x in s_current]
30 |             action = pi(s_current)
31 |             s_next, reward, done, info = env.step(action)
32 |             episode_rewards.append(reward)
33 |             delta = reward + gamma*V(s_next)- V(s_current)
34 |             z_w = gamma*lambda_w*z_w + V.return_gradient(s_current)
35 |             z_theta = gamma*lambda_theta*z_theta + I*pi.return_gradient(s_current, action)
36 |             # print("z_w = ", z_w)
37 |             # print("z_theta = ", z_theta)
38 |             # print("state: ", s_current)
39 |             # print("pi.return_gradient ", pi.return_gradient(s_current, action))
40 |             V.manual_update(alpha_w*delta*z_w)
41 |             pi.manual_update(alpha_theta*delta*z_theta)
42 |             I = gamma*I
43 |             s_current = s_next
44 |             if done:
45 |                 bhr_metric[i] = info[2]
46 |         rewards[i] = episode_rewards
47 |     return rewards, bhr_metric
48 | 
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/algorithms/actor_critic_eligibility_trace_algorithm_linear.py:
--------------------------------------------------------------------------------
 1 | # Actor Critic with Eligibility Traces using Linear Approximation for State and Policy
 2 | 
 3 | import numpy as np
 4 | 
 5 | from state_approximations.linear_v_approximation import LinearStateApproximation
 6 | from policy_approximations.linear_policy_approximation import LinearPolicyApproximation
 7 | 
 8 | def actor_critic_eligibility_trace_linear(env, gamma, alpha_theta, alpha_w, lambda_theta, lambda_w,
 9 |                                           V:LinearStateApproximation, pi:LinearPolicyApproximation,
10 |                                           episodes):
11 |     bhr_metric = {}
12 |     rewards = {}
13 |     for i in episodes:
14 |         s_current = env.reset(i)
15 | 
16 |         #print(s_current)
17 |         #print("Feature length {}".format(list(V.model.model.weight.shape))
18 | 
19 |         V_weights_shape = np.array(list(V.model.model.weight.shape))
20 |         pi_weights_shape = np.array(list(pi.model.model[0].weight.shape))
21 | 
22 |         z_w = np.zeros(V_weights_shape)
23 |         z_theta = np.zeros(pi_weights_shape)
24 | 
25 |         I = 1
26 |         done = False
27 |         episode_rewards = []
28 |         while not done:
29 |             #s_current  = [s_x /1000 for s_x in s_current]
30 |             action = pi(s_current)
31 |             s_next, reward, done, info = env.step(action)
32 |             episode_rewards.append(reward)
33 |             delta = reward + gamma*V(s_next)- V(s_current)
34 |             z_w = gamma*lambda_w*z_w + V.return_gradient(s_current)
35 |             z_theta = gamma*lambda_theta*z_theta + I*pi.return_gradient(s_current, action)
36 |             # print("z_w = ", z_w)
37 |             # print("z_theta = ", z_theta)
38 |             # print("state: ", s_current)
39 |             # print("pi.return_gradient ", pi.return_gradient(s_current, action))
40 |             V.manual_update(alpha_w*delta*z_w)
41 |             pi.manual_update(alpha_theta*delta*z_theta)
42 |             I = gamma*I
43 |             s_current = s_next
44 |             if done:
45 |                 bhr_metric[i] = info[2]
46 |         rewards[i] = episode_rewards
47 |     return rewards, bhr_metric
48 | 
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/algorithms/optimal_algorithm.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import defaultdict
 3 | from trace_loader import load_traces
 4 | 
 5 | def get_next_access(id, current_time, next_access_times):
 6 |     if id not in next_access_times:
 7 |         return 20000
 8 |     else:
 9 |         for access_time in next_access_times[id]:
10 |             if(access_time > current_time):
11 |                 return access_time
12 |         return 20000
13 | 
14 | def pre_process(ids):
15 |     next_access_times = defaultdict(list)
16 |     for time_step, id in enumerate(ids):
17 |         next_access_times[id].append(time_step)
18 |     return next_access_times
19 | 
20 | def optimal_admission(episodes, cache_size, trace='test'):
21 |     rewards = {}
22 |     bhr_metric = {}
23 |     for i in episodes:
24 |         current_cache = {}
25 |         bhr = 0
26 |         ids = list(load_traces(trace, cache_size, i)[1])
27 |         next_access_times = pre_process(ids)
28 |         episode_rewards = []
29 |         hits_since_previous_miss = 0
30 |         for time_step, id in enumerate(ids):
31 |             if id in current_cache:
32 |                 bhr += 1
33 |                 hits_since_previous_miss += 1
34 |             else:
35 |                 episode_rewards.append(hits_since_previous_miss)
36 |                 hits_since_previous_miss = 0
37 |                 if len(current_cache) < cache_size:
38 |                     current_cache[id] = 1
39 |                 else:
40 |                    max_next_access = time_step
41 |                    max_next_access_id = -1
42 |                    for element in current_cache.keys():
43 |                        next_access = get_next_access(element, time_step, next_access_times)
44 |                        if(next_access > max_next_access):
45 |                            max_next_access = next_access
46 |                            max_next_access_id = element
47 |                    if max_next_access > get_next_access(id, time_step, next_access_times):
48 |                         del current_cache[max_next_access_id]
49 |                         current_cache[id] = 1
50 |         bhr_metric[i] = bhr/len(ids)
51 |         rewards[i] = episode_rewards
52 |     return rewards, bhr_metric
53 | 


--------------------------------------------------------------------------------
/algorithms/actor_critic_eligibility_trace_algorithm_tc.py:
--------------------------------------------------------------------------------
 1 | # Actor Critic with Eligibility Traces using Linear Approximation for State and Policy
 2 | 
 3 | import numpy as np
 4 | 
 5 | from state_approximations.one_d_tc import StateOneDTileCoding
 6 | from policy_approximations.linear_policy_approximation import LinearPolicyApproximation
 7 | 
 8 | def actor_critic_eligibility_trace_tc(env, gamma, alpha_theta, alpha_w, lambda_theta, lambda_w,
 9 |                                       V:StateOneDTileCoding, pi:LinearPolicyApproximation,
10 |                                       episodes):
11 |     bhr_metric = {}
12 |     rewards = {}
13 |     for i in episodes:
14 |         s_current = env.reset(i)
15 | 
16 |         #print(s_current)
17 |         print("Feature length {}".format(V.feature_vector_len()))
18 | 
19 |         V_weights_shape = np.array(V.feature_vector_len())
20 |         pi_weights_shape = np.array(list(pi.model.model[0].weight.shape))
21 | 
22 |         z_w = np.zeros(V_weights_shape)
23 |         z_theta = np.zeros(pi_weights_shape)
24 | 
25 |         V_weights = np.zeros(V_weights_shape)
26 | 
27 |         I = 1
28 |         done = False
29 |         episode_rewards = []
30 |         while not done:
31 |             #s_current  = [s_x /1000 for s_x in s_current]
32 |             action = pi(s_current)
33 |             s_next, reward, done, info = env.step(action)
34 |             episode_rewards.append(reward)
35 |             delta = reward + gamma*np.dot(V(s_next, done), V_weights)- np.dot(V(s_current,done), V_weights)
36 |             z_w = gamma*lambda_w*z_w + V(s_current,done)
37 |             z_theta = gamma*lambda_theta*z_theta + I*pi.return_gradient(s_current, action)
38 |             # print("z_w = ", z_w)
39 |             # print("z_theta = ", z_theta)
40 |             # print("state: ", s_current)
41 |             # print("pi.return_gradient ", pi.return_gradient(s_current, action))
42 |             V_weights += alpha_w*delta*z_w
43 |             pi.manual_update(alpha_theta*delta*z_theta)
44 |             I = gamma*I
45 |             s_current = s_next
46 |             if done:
47 |                 bhr_metric[i] = info[2]
48 |         rewards[i] = episode_rewards
49 |     return rewards, bhr_metric
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/state_action_approximations/linear_q_approximation.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | from state_action_approximations.state_action_approximation import StateActionApproximation
 5 | from torch import nn
 6 | 
 7 | class NeuralNetwork(nn.Module):
 8 |     def __init__(self, dims):
 9 |         super(NeuralNetwork, self).__init__()
10 |         self.model = nn.Linear(dims, 1)
11 | 
12 |     def forward(self, x):
13 |         return self.model(x)
14 | 
15 | class LinearStateActionApproximation(StateActionApproximation):
16 |         def create_model(self):
17 |             self.model = NeuralNetwork(self.state_dims * self.num_actions)
18 |             self.loss_fn = nn.MSELoss()
19 |             self.optimizer = torch.optim.SGD(self.model.parameters(), lr = self.alpha)
20 | 
21 |         def __init__(self,
22 |                      state_dims, num_actions, alpha):
23 |             """
24 |             state_dims: the number of dimensions of state space
25 |             """
26 |             # TODO: implement this method
27 |             self.state_dims = state_dims
28 |             self.num_actions = num_actions
29 |             self.alpha = alpha
30 |             self.create_model()
31 | 
32 |         def get_input(self, s, a):
33 |             # if type(a) == int:
34 |             #     act = [a]
35 |             input = np.zeros(shape = self.state_dims*self.num_actions)
36 |             for i in range(self.state_dims):
37 |                 input[a*self.state_dims + i] = s[i]
38 |             return torch.tensor(input)
39 | 
40 |         def __call__(self, s, a):
41 |             # TODO: implement this method
42 |             self.model.eval()
43 |             input = self.get_input(s, a)
44 |             pred = self.model(input.float())
45 |             return pred.detach().numpy()[0]
46 | 
47 |         def update(self, alpha, G, s, a):
48 |             # TODO: implement this method
49 |             self.model.train()
50 |             input = self.get_input(s, a)
51 |             pred = self.model(input.float())
52 |             G = torch.tensor([G], dtype=torch.float32)
53 |             loss = 0.5 * self.loss_fn(pred, G)
54 |             self.optimizer.zero_grad()
55 |             loss.backward()
56 |             self.optimizer.step()
57 | 


--------------------------------------------------------------------------------
/state_approximations/linear_v_approximation.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | class NeuralNetworkVA(nn.Module):
 6 |     def __init__(self, dims):
 7 |         super(NeuralNetworkVA, self).__init__()
 8 |         self.model = nn.Linear(dims, 1)
 9 | 
10 |     def forward(self, x):
11 |         return self.model(x)
12 | 
13 | class Baseline(object):
14 |     """
15 |     The dumbest baseline; a constant for every state
16 |     """
17 |     def __init__(self,b):
18 |         self.b = b
19 | 
20 |     def __call__(self,s) -> float:
21 |         return self.b
22 | 
23 |     def update(self,s,G):
24 |         pass
25 | 
26 | class LinearStateApproximation(Baseline):
27 | 
28 |     def create_model(self):
29 |         self.model = NeuralNetworkVA(self.state_dims)
30 |         self.loss_fn = nn.MSELoss()
31 |         self.optimizer = torch.optim.Adam(self.model.parameters(), lr = self.alpha, betas=(0.9, 0.999))
32 | 
33 |     def __init__(self,
34 |                  state_dims,
35 |                  alpha):
36 |         """
37 |         state_dims: the number of dimensions of state space
38 |         alpha: learning rate
39 |         """
40 |         self.state_dims = state_dims
41 |         self.alpha = alpha
42 |         self.create_model()
43 | 
44 |     def __call__(self,s) -> float:
45 |         self.model.eval()
46 |         s = torch.tensor(s)
47 |         pred = self.model(s.float())
48 |         return pred.detach().numpy()[0]
49 | 
50 |     def update(self,s,G):
51 |         self.model.train()
52 |         s = torch.tensor(s)
53 |         pred = self.model(s.float())
54 |         G = torch.tensor([G], dtype=torch.float32)
55 |         loss = 0.5 * self.loss_fn(pred, G)
56 |         self.optimizer.zero_grad()
57 |         loss.backward()
58 |         self.optimizer.step()
59 | 
60 |     def return_gradient(self, s):
61 |         self.model.train()
62 |         s = torch.tensor(s)
63 |         pred = self.model(s.float())
64 |         self.model.zero_grad()
65 |         pred.backward()
66 |         grad =  self.model.model.weight.grad.numpy()
67 |         #print("grad: ", grad)
68 |         return grad
69 | 
70 |     def manual_update(self, update_vector):
71 |         with torch.no_grad():
72 |             update_vector = torch.tensor(update_vector)
73 |             self.model.model.weight += update_vector
74 |             #print("Weights : ", self.model.model[0].weight)


--------------------------------------------------------------------------------
/state_action_approximations/nn_q_approximation.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | from state_action_approximations.state_action_approximation import StateActionApproximation
 5 | from collections import OrderedDict
 6 | from torch import nn
 7 | 
 8 | 
 9 | class NeuralNetwork(nn.Module):
10 |     def __init__(self, dims):
11 |         super(NeuralNetwork, self).__init__()
12 |         self.model = nn.Sequential(OrderedDict([('fc1', nn.Linear(dims, dims)),
13 |                                                 ('fc2', nn.Linear(dims, 1))]))
14 | 
15 |     def forward(self, x):
16 |         return self.model(x)
17 | 
18 | class NeuralNetworkStateActionApproximation(StateActionApproximation):
19 |         def create_model(self):
20 |             self.model = NeuralNetwork(self.state_dims * self.num_actions)
21 |             self.loss_fn = nn.MSELoss()
22 |             self.optimizer = torch.optim.SGD(self.model.parameters(), lr = self.alpha)
23 | 
24 |         def __init__(self,
25 |                      state_dims, num_actions, alpha):
26 |             """
27 |             state_dims: the number of dimensions of state space
28 |             """
29 |             # TODO: implement this method
30 |             self.state_dims = state_dims
31 |             self.num_actions = num_actions
32 |             self.alpha = alpha
33 |             self.create_model()
34 | 
35 |         def get_input(self, s, a):
36 |             # if type(a) == int:
37 |             #     act = [a]
38 |             input = np.zeros(shape = self.state_dims*self.num_actions)
39 |             for i in range(self.state_dims):
40 |                 input[a*self.state_dims + i] = s[i]
41 |             return torch.tensor(input)
42 | 
43 |         def __call__(self, s, a):
44 |             # TODO: implement this method
45 |             self.model.eval()
46 |             input = self.get_input(s, a)
47 |             pred = self.model(input.float())
48 |             return pred.detach().numpy()[0]
49 | 
50 |         def update(self, alpha, G, s, a):
51 |             # TODO: implement this method
52 |             self.model.train()
53 |             input = self.get_input(s, a)
54 |             pred = self.model(input.float())
55 |             G = torch.tensor([G], dtype=torch.float32)
56 |             loss = 0.5 * self.loss_fn(pred, G)
57 |             self.optimizer.zero_grad()
58 |             loss.backward()
59 |             self.optimizer.step()
60 | 


--------------------------------------------------------------------------------
/algorithms/actor_critic_one_step.py:
--------------------------------------------------------------------------------
 1 | # Actor Critic with Eligibility Traces using Linear Approximation for State and Policy
 2 | 
 3 | import numpy as np
 4 | 
 5 | from state_approximations.linear_v_approximation import LinearStateApproximation
 6 | from state_approximations.nn_v_approximation import NNStateApproximation
 7 | from policy_approximations.linear_policy_approximation import LinearPolicyApproximation
 8 | 
 9 | def actor_critic_one_step_nn(env, gamma, alpha_theta, alpha_w,
10 |                                    V:NNStateApproximation, pi:LinearPolicyApproximation,
11 |                                    episodes):
12 |     bhr_metric = {}
13 |     rewards = {}
14 |     for i in episodes:
15 |         s_current = env.reset(i)
16 |         I = 1
17 |         done = False
18 |         episode_rewards = []
19 |         while not done:
20 |             action = pi(s_current)
21 |             s_next, reward, done, info = env.step(action)
22 |             episode_rewards.append(reward)
23 |             delta = reward + gamma*V(s_next)- V(s_current)
24 |             V.manual_update(V.return_gradient(s_current)*delta*alpha_w)
25 |             pi.manual_update(pi.return_gradient(s_current, action)*alpha_theta*I*delta)
26 |             I = gamma*I
27 |             s_current = s_next
28 |             if done:
29 |                 bhr_metric[i] = info[2]
30 |         rewards[i] = episode_rewards
31 |     return rewards, bhr_metric
32 | 
33 | def actor_critic_one_step(env, gamma, alpha_theta, alpha_w,
34 |                                    V:LinearStateApproximation, pi:LinearPolicyApproximation,
35 |                                    episodes):
36 |     bhr_metric = {}
37 |     rewards = {}
38 |     for i in episodes:
39 |         s_current = env.reset(i)
40 |         I = 1
41 |         done = False
42 |         episode_rewards = []
43 |         while not done:
44 |             action = pi(s_current)
45 |             s_next, reward, done, info = env.step(action)
46 |             episode_rewards.append(reward)
47 |             delta = reward + gamma*V(s_next)- V(s_current)
48 |             V.manual_update(V.return_gradient(s_current)*delta*alpha_w)
49 |             pi.manual_update(pi.return_gradient(s_current, action)*alpha_theta*I*delta)
50 |             I = gamma*I
51 |             s_current = s_next
52 |             if done:
53 |                 bhr_metric[i] = info[2]
54 |         rewards[i] = episode_rewards
55 |     return rewards, bhr_metric
56 | 
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------
/state_approximations/nn_v_approximation.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | from torch import nn
 5 | from collections import OrderedDict
 6 | 
 7 | class NeuralNetworkVA(nn.Module):
 8 |     def __init__(self, dims):
 9 |         super(NeuralNetworkVA, self).__init__()
10 |         self.model = nn.Sequential(OrderedDict([('fc1', nn.Linear(dims, dims)),
11 |                                                 ('fc2', nn.Linear(dims, 1))]))
12 | 
13 |     def forward(self, x):
14 |         return self.model(x)
15 | 
16 | class Baseline(object):
17 |     """
18 |     The dumbest baseline; a constant for every state
19 |     """
20 |     def __init__(self,b):
21 |         self.b = b
22 | 
23 |     def __call__(self,s) -> float:
24 |         return self.b
25 | 
26 |     def update(self,s,G):
27 |         pass
28 | 
29 | class NNStateApproximation(Baseline):
30 | 
31 |     def create_model(self):
32 |         self.model = NeuralNetworkVA(self.state_dims)
33 |         self.loss_fn = nn.MSELoss()
34 |         self.optimizer = torch.optim.Adam(self.model.parameters(), lr = self.alpha, betas=(0.9, 0.999))
35 | 
36 |     def __init__(self,
37 |                  state_dims,
38 |                  alpha):
39 |         """
40 |         state_dims: the number of dimensions of state space
41 |         alpha: learning rate
42 |         """
43 |         self.state_dims = state_dims
44 |         self.alpha = alpha
45 |         self.create_model()
46 | 
47 |     def __call__(self,s) -> float:
48 |         self.model.eval()
49 |         s = torch.tensor(s)
50 |         pred = self.model(s.float())
51 |         return pred.detach().numpy()[0]
52 | 
53 |     def update(self,s,G):
54 |         self.model.train()
55 |         s = torch.tensor(s)
56 |         pred = self.model(s.float())
57 |         G = torch.tensor([G], dtype=torch.float32)
58 |         loss = 0.5 * self.loss_fn(pred, G)
59 |         self.optimizer.zero_grad()
60 |         loss.backward()
61 |         self.optimizer.step()
62 | 
63 |     def return_gradient(self, s):
64 |         self.model.train()
65 |         s = torch.tensor(s)
66 |         pred = self.model(s.float())
67 |         self.model.zero_grad()
68 |         pred.backward()
69 |         grad =  self.model.model.weight.grad.numpy()
70 |         #print("grad: ", grad)
71 |         return grad
72 | 
73 |     def manual_update(self, update_vector):
74 |         with torch.no_grad():
75 |             update_vector = torch.tensor(update_vector)
76 |             self.model.model.weight += update_vector
77 |             #print("Weights : ", self.model.model[0].weight)


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | trace/
 30 | .idea/
 31 | rlcar/
 32 | logs/
 33 | .DS_Store
 34 | run.sh
 35 | 
 36 | # PyInstaller
 37 | #  Usually these files are written by a python script from a template
 38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 39 | *.manifest
 40 | *.spec
 41 | *.csv
 42 | 
 43 | # Installer logs
 44 | pip-log.txt
 45 | pip-delete-this-directory.txt
 46 | 
 47 | # Unit test / coverage reports
 48 | htmlcov/
 49 | .tox/
 50 | .nox/
 51 | .coverage
 52 | .coverage.*
 53 | .cache
 54 | nosetests.xml
 55 | coverage.xml
 56 | *.cover
 57 | *.py,cover
 58 | .hypothesis/
 59 | .pytest_cache/
 60 | 
 61 | # Translations
 62 | *.mo
 63 | *.pot
 64 | 
 65 | # Django stuff:
 66 | *.log
 67 | local_settings.py
 68 | db.sqlite3
 69 | db.sqlite3-journal
 70 | 
 71 | # Flask stuff:
 72 | instance/
 73 | .webassets-cache
 74 | 
 75 | # Scrapy stuff:
 76 | .scrapy
 77 | 
 78 | # Sphinx documentation
 79 | docs/_build/
 80 | 
 81 | # PyBuilder
 82 | target/
 83 | 
 84 | # Jupyter Notebook
 85 | .ipynb_checkpoints
 86 | 
 87 | # IPython
 88 | profile_default/
 89 | ipython_config.py
 90 | 
 91 | # pyenv
 92 | .python-version
 93 | 
 94 | # pipenv
 95 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 96 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 97 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 98 | #   install all needed dependencies.
 99 | #Pipfile.lock
100 | 
101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
102 | __pypackages__/
103 | 
104 | # Celery stuff
105 | celerybeat-schedule
106 | celerybeat.pid
107 | 
108 | # SageMath parsed files
109 | *.sage.py
110 | 
111 | # Environments
112 | .env
113 | .venv
114 | env/
115 | venv/
116 | ENV/
117 | env.bak/
118 | venv.bak/
119 | 
120 | # Spyder project settings
121 | .spyderproject
122 | .spyproject
123 | 
124 | # Rope project settings
125 | .ropeproject
126 | 
127 | # mkdocs documentation
128 | /site
129 | 
130 | # mypy
131 | .mypy_cache/
132 | .dmypy.json
133 | dmypy.json
134 | 
135 | # Pyre type checker
136 | .pyre/
137 | 


--------------------------------------------------------------------------------
/policy_approximations/linear_policy_approximation.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch import nn
 4 | from collections import OrderedDict
 5 | 
 6 | class NeuralNetworkPA(nn.Module):
 7 |     def __init__(self, dims, outputs):
 8 |         super(NeuralNetworkPA, self).__init__()
 9 |         self.model = nn.Sequential(OrderedDict([('fc1', nn.Linear(dims, outputs)),
10 |                                                 ('act1', nn.Softmax(dim=0))]))
11 | 
12 |     def forward(self, x):
13 |         return self.model(x)
14 | 
15 | class LinearPolicyApproximation():
16 | 
17 |     def create_model(self):
18 |         self.model = NeuralNetworkPA(self.state_dims, self.num_actions)
19 |         self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.alpha, betas=(0.9, 0.999))
20 | 
21 | 
22 |     def __init__(self,
23 |                  state_dims,
24 |                  num_actions,
25 |                  alpha):
26 |         """
27 |         state_dims: the number of dimensions of state space
28 |         action_dims: the number of possible actions
29 |         alpha: learning rate
30 |         """
31 |         self.state_dims = state_dims
32 |         self.num_actions = num_actions
33 |         self.alpha = alpha
34 |         self.create_model()
35 | 
36 |     def __call__(self, s) -> int:
37 |         self.model.eval()
38 |         s = torch.tensor(s)
39 |         pred = self.model(s.float())
40 |         action_probs = pred.detach().numpy()
41 |         action = np.random.choice(np.arange(len(action_probs)), p=action_probs)
42 |         return action
43 | 
44 |     def update(self, s, a, gamma_t, delta):
45 |         """
46 |         s: state S_t
47 |         a: action A_t
48 |         gamma_t: gamma^t
49 |         delta: G-v(S_t,w)
50 |         """
51 |         self.model.train()
52 |         s = torch.tensor(s)
53 |         pred = self.model(s.float())
54 |         log_prob = torch.log(pred)[a].unsqueeze(0)
55 |         loss = - delta * gamma_t * log_prob
56 |         self.optimizer.zero_grad()
57 |         loss.backward()
58 |         self.optimizer.step()
59 | 
60 |     def return_gradient(self, s, a):
61 |         self.model.train()
62 |         s = torch.tensor(s)
63 |         pred = self.model(s.float())
64 |         self.model.zero_grad()
65 |         log_prob = torch.log(pred)[a].unsqueeze(0)
66 |         log_prob.backward()
67 |         grad =  self.model.model[0].weight.grad.numpy()
68 |         #print("grad: ", grad)
69 |         return grad
70 | 
71 |     def manual_update(self, update_vector):
72 |         with torch.no_grad():
73 |             update_vector = torch.tensor(update_vector)
74 |             self.model.model[0].weight += update_vector
75 |             #print("Weights : ", self.model.model[0].weight)
76 | 
77 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RLCaR
 2 | Reinforcement Learning for Cache admission and Replacement
 3 | 
 4 | We propose to apply reinforcement learning on caching systems. The first problem
 5 | we consider is to decide whether we want to admit an object in the cache, when
 6 | an object request leads to a cache miss. While cache replacement policies have
 7 | received significant traction over the years, most systems use simple LRU for
 8 | eviction, without explicit admission algorithms. The optimal algorithm for solving
 9 | cache admission will require access to future requests, thus making it impractical.
10 | We train an RL agent to give a binary decision of admit/don’t admit for each cache
11 | miss. We show that using our RL agent gives a higher byte hit rate compared
12 | to always admitting on a cache miss or using a random policy to admit an item
13 | in the cache when LRU (Least Recently Used) is used as the cache replacement
14 | policy. The next problem that we consider is the more common problem of cache
15 | replacement, i.e, deciding which object to evict from the cache on a miss. We model
16 | this as an adversarial bandit problem, treating LRU, LFU (Least Frequently Used)
17 | and FIFO (First In First Out) as experts, and solve it using the Hedge algorithm,
18 | assuming full feedback. We show that the algorithm eventually converges to the
19 | best expert. Our experiments are based on a simulated environment, where the
20 | cache traces are generated using a Zip-f distribution, which has been widely used
21 | in simulations.
22 | 
23 | ## Environment Setup
24 | Create a new python environment and install dependencies using `pip3 install -r requirements.txt`
25 | 
26 | ## Instructions to Run
27 | To run with default arguments : `python3 main.py` 
28 | 
29 | Arguments : 
30 | + `-ne NUM_EPISODES, --num_episodes NUM_EPISODES
31 |                         Number of episodes`
32 | 
33 | + `-nr NUM_REPETITIONS, --num_repetitions NUM_REPETITIONS
34 |                         Number of repetitions`
35 | 
36 | + `-fa FUNCTION_APPROXIMATION, --function_approximation FUNCTION_APPROXIMATION
37 |                         function approximation to use [linear, tc, nn]`
38 | 
39 | + `-n_steps N_STEPS, --n_steps N_STEPS
40 |                         number of steps in sarsa`
41 | 
42 | + `-lam LAM, --lam LAM   lambda in sarsa`
43 | 
44 | + `-rl RL_ALGO, --rl_algo RL_ALGO
45 |                         rl algorithm to use [always_evict, random_eviction, actor_critic, n_step_sarsa, optimal, sarsa_lambda]`
46 | 
47 | + `-policy POLICY, --policy POLICY
48 |                         cache replacement policy space separated [LRU, LFU, FIFO]`
49 | 
50 | + `-ts TEST_SIZE, --test_size TEST_SIZE
51 |                         test size`
52 | 
53 | + `-cs CACHE_SIZE, --cache_size CACHE_SIZE
54 |                         cache size`
55 | 
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/replacement_policies/lfu.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from collections import defaultdict, Counter
 3 | from replacement_policies.policy_base import PolicyBase
 4 | 
 5 | class LFUCache(PolicyBase):
 6 | 
 7 |     def __init__(self, capacity: int):
 8 |         self.capacity = capacity
 9 |         self.object_to_count = {}
10 |         self.count_to_object = defaultdict(defaultdict)
11 |         self.min_count = None
12 |         self.history = []
13 |         self.history_dict = Counter()
14 | 
15 |     def update(self, key: int, val: int):
16 |         if key in self.object_to_count:
17 |             count = self.object_to_count[key]
18 |             self.object_to_count[key] += 1
19 |             size =  self.count_to_object[count][key]
20 |             del self.count_to_object[count][key]
21 |             self.count_to_object[count + 1][key] = size
22 |             if not self.count_to_object[self.min_count]:
23 |                 self.min_count += 1
24 |         is_present = (key in self.history_dict)
25 |         return is_present
26 | 
27 |     def get_remove_candidate(self):
28 |         return next(iter(self.count_to_object[self.min_count]))
29 | 
30 |     def update_history(self):
31 |         candidate = self.get_remove_candidate()
32 |         if (len(self.history) >= self.capacity):
33 |             remove_item = self.history.pop(0)
34 |             if(self.history_dict[remove_item] == 1):
35 |                 del self.history_dict[remove_item]
36 |             else:
37 |                 self.history_dict[remove_item] -= 1
38 |         self.history.append(candidate)
39 |         self.history_dict[candidate] += 1
40 | 
41 |     def remove(self):
42 |         self.update_history()
43 |         count_dictionary = self.count_to_object[self.min_count]
44 |         key = random.choice(list(count_dictionary.keys()))
45 |         val = count_dictionary[key]
46 |         del self.count_to_object[self.min_count][key]
47 |         del self.object_to_count[key]
48 |         return key ,val
49 | 
50 |     def put(self, key: int, value : int) -> None:
51 |         if len(self.object_to_count) >= self.capacity:
52 |             self.remove()
53 |         self.min_count = 1
54 |         self.object_to_count[key] = 1
55 |         self.count_to_object[1][key] = value
56 | 
57 |     def remove_key(self, key):
58 |         self.update_history()
59 |         if key in self.object_to_count:
60 |             count = self.object_to_count[key]
61 |             del self.object_to_count[key]
62 |             del self.count_to_object[count][key]
63 |             if not self.count_to_object[self.min_count]:
64 |                 self.min_count += 1
65 | 
66 |     def reset(self):
67 |         self.object_to_count = {}
68 |         self.count_to_object = defaultdict(defaultdict)
69 |         self.min_count = None
70 |         self.history = []
71 |         self.history_dict = Counter()


--------------------------------------------------------------------------------
/replacement_agent.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import numpy as np
 4 | 
 5 | from replacement_policies.lru import LRUCache
 6 | from replacement_policies.lfu import LFUCache
 7 | from replacement_policies.fifo import FifoCache
 8 | from utils.weights_logger import WeightLogger
 9 | 
10 | class ReplacementAgent:
11 |     def __init__(self, capacity, policies, episode_index):
12 |         self.capacity = capacity
13 |         self.policies = policies
14 |         self.experts = []
15 |         self.num_experts = len(policies)
16 |         self.current_expert = 0
17 |         self.hit_reward = 1
18 |         self.miss_reward = -0.5
19 |         self.epsilon = 0.1
20 |         if "LRU" in policies:
21 |             self.experts.append(LRUCache(capacity))
22 |         if "LFU" in policies:
23 |             self.experts.append(LFUCache(capacity))
24 |         if "FIFO" in policies:
25 |             self.experts.append(FifoCache(capacity))
26 |         self.running_reward = 0
27 | 
28 |         self.weights = np.ones(shape=self.num_experts)
29 |         self.reward = np.zeros(shape=self.num_experts)
30 |         self.weight_logger = WeightLogger()
31 |         self.tick = 1
32 |         self.episode_index = episode_index
33 | 
34 |     def update(self, key: int, obj_size) -> None:
35 |         for index, expert in enumerate(self.experts):
36 |             is_present_in_history = expert.update(key, obj_size)
37 |             if is_present_in_history:
38 |                 self.reward[index] = self.miss_reward
39 |             else:
40 |                 self.reward[index] = self.hit_reward
41 |         if(self.num_experts > 1):
42 |             self.weight_update()
43 | 
44 |     def remove(self):
45 |         current_expert = int(np.random.choice(np.arange(self.num_experts), 1, p=self.weights/np.sum(self.weights)))
46 |         key ,val = self.experts[current_expert].remove()
47 |         for i in range(self.num_experts):
48 |             if i != current_expert:
49 |                 self.experts[i].remove_key(key)
50 |         self.running_reward = 0
51 |         return key, val
52 | 
53 |     def put(self, key: int, value : int) -> None:
54 |         for expert in self.experts:
55 |             expert.put(key, value)
56 |         self.update(key, value)
57 | 
58 |     def weight_update(self):
59 |         for index, expert in enumerate(self.experts):
60 |             self.weights[index] *= math.pow(1 + self.epsilon, self.reward[index])
61 |         #print("weights = ", self.weights)
62 |         self.weights = self.weights / np.sum(self.weights)
63 |         self.weight_logger.update_weights(self.weights, self.tick)
64 |         self.tick += 1
65 | 
66 |     def reset(self, index):
67 |         for expert in self.experts:
68 |             expert.reset()
69 |         self.weights = self.weights/np.sum(self.weights)
70 |         self.current_expert = 0
71 |         self.weight_logger.end(self.episode_index)
72 |         self.episode_index = index


--------------------------------------------------------------------------------
/state_approximations/one_d_tc.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import numpy as np
 3 | class StateOneDTileCoding():
 4 | 
 5 |     def get_state(self, state):
 6 |         states = []
 7 |         for tile_index in range(self.num_tilings):
 8 |             indices = []
 9 |             previous_index = 0
10 |             for i in range(self.tiling_dimensions):
11 |                 start = self.state_low[i] - tile_index * self.tile_width[i]/self.num_tilings
12 |                 diff = state[i] - start
13 |                 index = math.floor(diff / self.tile_width[i])
14 |                 index = min(index, self.tiles_per_dim[i] - 1)
15 |                 index += previous_index
16 |                 indices.append(index)
17 |                 previous_index += self.tiles_per_dim[i]
18 |             states.append(indices)
19 |         return states
20 | 
21 |     def __init__(self,
22 |                  state_low:np.array,
23 |                  state_high:np.array,
24 |                  num_tilings:int,
25 |                  tile_width:np.array):
26 |         """
27 |         state_low: possible minimum value for each dimension in state
28 |         state_high: possible maimum value for each dimension in state
29 |         num_actions: the number of possible actions
30 |         num_tilings: # tilings
31 |         tile_width: tile width for each dimension
32 |         """
33 |         self.num_tilings = num_tilings
34 |         self.tile_width = tile_width
35 |         self.state_low = state_low
36 |         self.state_high = state_high
37 |         self.tiling_dimensions = len(self.state_low)
38 |         tiles_per_dim = np.zeros(shape=self.tiling_dimensions, dtype=np.int64)
39 |         for i in range(self.tiling_dimensions):
40 |             tiles_per_dim[i] = math.ceil((self.state_high[i] - self.state_low[i]) /self.tile_width[i]) + 1
41 |         self.tiles_per_dim = tiles_per_dim
42 |         self.num_tiles_per_tiling = np.sum(self.tiles_per_dim)
43 |         self.feature_dims = self.num_tilings*self.num_tiles_per_tiling
44 |         self.feature_array = np.zeros(shape=(2))
45 |         self.feature_array[0] = self.num_tilings
46 |         self.feature_array[1] = self.num_tiles_per_tiling
47 |         self.feature_array = self.feature_array.astype(int)
48 | 
49 |     def feature_vector_len(self) -> int:
50 |         """
51 |         return dimension of feature_vector: d = num_actions * num_tilings * num_tiles
52 |         """
53 |         return self.feature_dims
54 | 
55 |     def __call__(self, s, done) -> np.array:
56 |         """
57 |         implement function x: S+ x A -> [0,1]^d
58 |         if done is True, then return 0^d
59 |         """
60 |         # feature = np.zeros(shape=self.feature_vector_len())
61 |         feature = np.zeros(tuple(self.feature_array))
62 |         if done:
63 |             return feature.flatten()
64 |         active_states = self.get_state(s)
65 |         for i in range(len(active_states)):
66 |             feature[i][active_states[i]] = 1
67 |         return feature.flatten()


--------------------------------------------------------------------------------
/trace_generator.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | import math
 4 | 
 5 | import pandas as pd
 6 | 
 7 | from trace_loader import load_traces
 8 | from collections import Counter
 9 | 
10 | def analyse_trace(index):
11 |     trace = load_traces('test', 20, index)
12 | 
13 |     counts = Counter()
14 |     total_count = 0
15 |     for index, row in trace.iterrows():
16 |         counts[row[1]]+=1
17 |         total_count+=1
18 |     for key in counts:
19 |         counts[key] /= total_count
20 | 
21 |     print("Total Count : ", total_count)
22 |     print("Total Unique Objects : ", len(counts))
23 | 
24 |     real_probabilities = np.sort(np.array(list(counts.values())))[::-1]
25 | 
26 |     predicted_probabilities = np.zeros(len(counts))
27 |     for i in range(1,len(counts)+1):
28 |         predicted_probabilities[i-1] = math.pow(i, -(0.939))
29 |     predicted_probabilities /= np.sum(predicted_probabilities)
30 | 
31 |     # print("real probabilities : ", real_probabilities)
32 |     # print("predicted probabilities : ", predicted_probabilities)
33 | 
34 |     # plt.figure()
35 |     # plt.plot(real_probabilities, label='real_probabilities')
36 |     # plt.plot(predicted_probabilities, label='predicted_probabilities')
37 |     # plt.legend()
38 |     # plt.show()
39 | 
40 | def generate_zipf(alpha, total_requests, unique_requests):
41 |     predicted_probabilities = np.zeros(unique_requests)
42 |     for i in range(1, unique_requests + 1):
43 |         predicted_probabilities[i - 1] = math.pow(i, -alpha)
44 |     predicted_probabilities /= np.sum(predicted_probabilities)
45 | 
46 |     trace = np.random.choice(np.arange(unique_requests), p=predicted_probabilities, size=total_requests)
47 |     trace_dict = {"timestamp" : np.arange(total_requests),
48 |                   "id" : trace,
49 |                   "obj_size" : np.ones(total_requests)}
50 |     return pd.DataFrame(trace_dict)
51 | 
52 | def generate_lru_optimal(total_requests, unique_requests):
53 |     trace_subarray = np.zeros(2*unique_requests)
54 |     trace_subarray[:unique_requests] = np.arange(unique_requests)
55 |     trace_subarray[unique_requests:] = np.flip(np.arange(unique_requests))
56 |     trace = np.repeat(trace_subarray, int(total_requests/(2*unique_requests)) + 1)
57 |     trace = trace[:total_requests]
58 |     trace_dict = {"timestamp": np.arange(total_requests),
59 |                   "id": trace,
60 |                   "obj_size": np.ones(total_requests)}
61 |     return pd.DataFrame(trace_dict)
62 | 
63 | alpha = 0.5
64 | num_request = 100000
65 | unique_requests = 9000
66 | 
67 | for i in range(1):
68 |     trace_df = generate_zipf(alpha, num_request, unique_requests)
69 |     file_name = "trace/zipf_{}/trace_0.tr".format(alpha, i)
70 |     trace_df.to_csv(file_name, index=False, header=False, sep=" ")
71 | 
72 | # for i in range(1):
73 | #     trace_df = generate_lru_optimal(num_request, unique_requests)
74 | #     file_name = "trace/lru_optimal/trace_{}.tr".format(i)
75 | #     trace_df.to_csv(file_name, index=False, header=False, sep=" ")
76 | 


--------------------------------------------------------------------------------
/state_action_approximations/tile_coding_state_action.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import numpy as np
 3 | class StateActionFeatureVectorWithTile():
 4 | 
 5 |     def get_state(self, state):
 6 |         states = []
 7 |         for tile_index in range(self.num_tilings):
 8 |             indices = [tile_index]
 9 |             for i in range(self.tiling_dimensions):
10 |                 start = self.state_low[i] - tile_index * self.tile_width[i]/self.num_tilings
11 |                 diff = state[i] - start
12 |                 index = math.floor(diff / self.tile_width[i])
13 |                 index = min(index, self.tiles_per_dim[i] - 1)
14 |                 indices.append(index)
15 |             states.append(indices)
16 |         return states
17 | 
18 |     def __init__(self,
19 |                  state_low:np.array,
20 |                  state_high:np.array,
21 |                  num_actions:int,
22 |                  num_tilings:int,
23 |                  tile_width:np.array):
24 |         """
25 |         state_low: possible minimum value for each dimension in state
26 |         state_high: possible maimum value for each dimension in state
27 |         num_actions: the number of possible actions
28 |         num_tilings: # tilings
29 |         tile_width: tile width for each dimension
30 |         """
31 |         self.num_tilings = num_tilings
32 |         self.tile_width = tile_width
33 |         self.state_low = state_low
34 |         self.state_high = state_high
35 |         self.num_actions = num_actions
36 |         self.tiling_dimensions = len(self.state_low)
37 |         tiles_per_dim = np.zeros(shape=self.tiling_dimensions, dtype=np.int64)
38 |         for i in range(self.tiling_dimensions):
39 |             tiles_per_dim[i] = math.ceil((self.state_high[i] - self.state_low[i]) /self.tile_width[i]) + 1
40 |         self.tiles_per_dim = tiles_per_dim
41 |         self.num_tiles_per_tiling = np.prod(self.tiles_per_dim)
42 |         self.feature_dims = self.num_actions * self.num_tilings*self.num_tiles_per_tiling
43 |         self.feature_array = np.zeros(shape=(2 + self.tiling_dimensions))
44 |         self.feature_array[0] = self.num_actions
45 |         self.feature_array[1] = self.num_tilings
46 |         self.feature_array[2:] = self.tiles_per_dim
47 |         self.feature_array = self.feature_array.astype(int)
48 | 
49 |     def feature_vector_len(self) -> int:
50 |         """
51 |         return dimension of feature_vector: d = num_actions * num_tilings * num_tiles
52 |         """
53 |         return self.feature_dims
54 | 
55 |     def __call__(self, s, done, a) -> np.array:
56 |         """
57 |         implement function x: S+ x A -> [0,1]^d
58 |         if done is True, then return 0^d
59 |         """
60 |         # feature = np.zeros(shape=self.feature_vector_len())
61 |         feature = np.zeros(tuple(self.feature_array))
62 |         if done:
63 |             return feature.flatten()
64 |         active_states = self.get_state(s)
65 |         for i in range(len(active_states)):
66 |             feature[a][tuple(active_states[i])] = 1
67 |         return feature.flatten()


--------------------------------------------------------------------------------
/state_action_approximations/one_d_tc.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import numpy as np
 3 | 
 4 | class StateActionOneDTileCoding():
 5 | 
 6 |     def get_state(self, state):
 7 |         states = []
 8 |         for tile_index in range(self.num_tilings):
 9 |             indices = []
10 |             previous_index = 0
11 |             for i in range(self.tiling_dimensions):
12 |                 start = self.state_low[i] - tile_index * self.tile_width[i]/self.num_tilings
13 |                 diff = state[i] - start
14 |                 index = math.floor(diff / self.tile_width[i])
15 |                 index = min(index, self.tiles_per_dim[i] - 1)
16 |                 index += previous_index
17 |                 indices.append(index)
18 |                 previous_index += self.tiles_per_dim[i]
19 |             states.append(indices)
20 |         return states
21 | 
22 |     def __init__(self,
23 |                  state_low:np.array,
24 |                  state_high:np.array,
25 |                  num_tilings:int,
26 |                  tile_width:np.array,
27 |                  num_actions:int):
28 |         """
29 |         state_low: possible minimum value for each dimension in state
30 |         state_high: possible maimum value for each dimension in state
31 |         num_actions: the number of possible actions
32 |         num_tilings: # tilings
33 |         tile_width: tile width for each dimension
34 |         """
35 |         self.num_tilings = num_tilings
36 |         self.tile_width = tile_width
37 |         self.state_low = state_low
38 |         self.state_high = state_high
39 |         self.tiling_dimensions = len(self.state_low)
40 |         self.num_actions = num_actions
41 |         tiles_per_dim = np.zeros(shape=self.tiling_dimensions, dtype=np.int64)
42 |         for i in range(self.tiling_dimensions):
43 |             tiles_per_dim[i] = math.ceil((self.state_high[i] - self.state_low[i]) /self.tile_width[i]) + 1
44 |         self.tiles_per_dim = tiles_per_dim
45 |         self.num_tiles_per_tiling = np.sum(self.tiles_per_dim)
46 |         self.feature_dims = self.num_tilings*self.num_tiles_per_tiling*self.num_actions
47 |         self.feature_array = np.zeros(shape=(3))
48 |         self.feature_array[0] = self.num_actions
49 |         self.feature_array[1] = self.num_tilings
50 |         self.feature_array[2] = self.num_tiles_per_tiling
51 |         self.feature_array = self.feature_array.astype(int)
52 | 
53 |     def feature_vector_len(self) -> int:
54 |         """
55 |         return dimension of feature_vector: d = num_actions * num_tilings * num_tiles
56 |         """
57 |         return self.feature_dims
58 | 
59 |     def __call__(self, s, a, done) -> np.array:
60 |         """
61 |         implement function x: S+ x A -> [0,1]^d
62 |         if done is True, then return 0^d
63 |         """
64 |         # feature = np.zeros(shape=self.feature_vector_len())
65 |         feature = np.zeros(tuple(self.feature_array))
66 |         if done:
67 |             return feature.flatten()
68 |         active_states = self.get_state(s)
69 |         for i in range(len(active_states)):
70 |             feature[a][i][active_states[i]] = 1
71 |         return feature.flatten()


--------------------------------------------------------------------------------
/trace_loader.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | import statistics
  6 | 
  7 | from collections import Counter
  8 | 
  9 | 
 10 | def load_traces(trace : str, cache_size, rnd):
 11 |     if trace == 'test':
 12 |         trace_folder = os.curdir + '/trace/'
 13 |         print(trace_folder)
 14 | 
 15 |         print('Load #%i trace for cache size of %i' % (rnd, cache_size))
 16 | 
 17 |         # load time, request id, request size
 18 |         df = pd.read_csv(trace_folder + 'test_trace/test_' + str(rnd)  + '.tr', sep=' ', header=None)
 19 |         # remaining cache size, object last access time
 20 |         df[3], df[4] = cache_size, 0
 21 |         df[2] = 1
 22 |     else:
 23 |         trace_folder = os.curdir + '/trace/'
 24 |         print(trace_folder)
 25 | 
 26 |         print('Load #%i trace for cache size of %i' % (rnd, cache_size))
 27 | 
 28 |         # load time, request id, request size
 29 |         df = pd.read_csv(trace_folder + trace + '/trace_' + str(rnd)  + '.tr', sep=' ', header=None)
 30 |         # remaining cache size, object last access time
 31 |         df[3], df[4] = cache_size, 0
 32 |         df[2] = 1
 33 | 
 34 |     # elif trace == 'real':
 35 |     #    df = []
 36 |     # else:
 37 |     #     # load user's trace
 38 |     #     df = pd.read_csv(trace, sep=' ', header=None)
 39 |     #     df[3], df[4] = cache_size, 0
 40 | 
 41 |     return df
 42 | 
 43 | def get_stats(df):
 44 |     cache_unseen_default = 500
 45 | 
 46 |     obj_freq = Counter()
 47 |     obj_interarrival_time = {}
 48 |     all_interarrival_times = []
 49 |     last_access_time = []
 50 |     for index, row in df.iterrows():
 51 |         obj_freq[row[1]] += 1
 52 |         if(row[1] not in obj_interarrival_time):
 53 |             obj_interarrival_time[row[1]] = index
 54 |             last_access_time.append(cache_unseen_default)
 55 |         else:
 56 |             all_interarrival_times.append(index - obj_interarrival_time[row[1]])
 57 |             last_access_time.append(index - obj_interarrival_time[row[1]])
 58 |             obj_interarrival_time[row[1]] = index
 59 | 
 60 |     # stats for object frequency
 61 |     obj_freq_mean = statistics.mean(obj_freq.values())
 62 |     obj_freq_stdev = statistics.stdev(obj_freq.values())
 63 | 
 64 |     # stats for object size
 65 |     obj_size_mean = statistics.mean(df[2])
 66 |     obj_size_stdev = statistics.stdev(df[2])
 67 | 
 68 |     # stats for interarrival times
 69 |     obj_interarrival_time_mean = statistics.mean(all_interarrival_times)
 70 |     obj_interarrival_time_stdev = statistics.stdev(all_interarrival_times)
 71 | 
 72 |     # stats for last access time
 73 |     last_access_time_mean = statistics.mean(last_access_time)
 74 |     last_access_time_stdev = statistics.stdev(last_access_time)
 75 | 
 76 |     #stats for rank
 77 |     ranks = np.arange(len(obj_freq))
 78 |     rank_mean = statistics.mean(ranks)
 79 |     rank_stdev = statistics.stdev(ranks)
 80 | 
 81 |     means = [obj_size_mean, 0, last_access_time_mean, obj_freq_mean, obj_interarrival_time_mean, rank_mean]
 82 |     stddevs = [obj_size_stdev, 1, last_access_time_stdev, obj_freq_stdev, obj_interarrival_time_stdev, rank_stdev]
 83 | 
 84 |     for index, stddev in enumerate(stddevs):
 85 |         if(stddev == 0):
 86 |             stddevs[index] = 1
 87 | 
 88 |     return means, stddevs
 89 | 
 90 | 
 91 | print("running")
 92 | df = load_traces('test', 20, 5)
 93 | obj_freq = Counter()
 94 | for index, row in df.iterrows():
 95 |     obj_freq[row[1]] += 1
 96 | freq = pd.DataFrame.from_records(list(dict(obj_freq).items()), columns=['id','count'])
 97 | file_name = "freq.csv"
 98 | freq.to_csv(file_name, index=False, header=False, sep=",")
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 


--------------------------------------------------------------------------------
/algorithms/semi_gradient_n_step_sarsa_algorithm.py:
--------------------------------------------------------------------------------
  1 | # One step Semi Gradient Sarsa
  2 | 
  3 | import numpy as np
  4 | import math
  5 | import copy
  6 | 
  7 | from state_action_approximations.state_action_approximation import StateActionApproximation
  8 | from state_action_approximations.one_d_tc import StateActionOneDTileCoding
  9 | 
 10 | def epsilon_greedy(Q:StateActionApproximation, epsilon, state, actions):
 11 |     random = np.random.binomial(1, epsilon)
 12 |     max_ac = 0
 13 |     if random == 0:
 14 |         max_q = np.NINF
 15 |         for action in actions:
 16 |             current_q = Q(state, action)
 17 |             if current_q > max_q:
 18 |                 max_ac = action
 19 |                 max_q = current_q
 20 |     else:
 21 |         action_size = len(actions)
 22 |         index = np.random.randint(action_size)
 23 |         max_ac = actions[index]
 24 |     return max_ac
 25 | 
 26 | def semi_gradient_n_step_sarsa(env, gamma, alpha, Q:StateActionApproximation,
 27 |                                epsilon, episodes, actions, n):
 28 |     bhr_metric = {}
 29 |     rewards = {}
 30 |     for i in episodes:
 31 |         s_current = env.reset(i)
 32 |         action = epsilon_greedy(Q, epsilon, s_current, actions)
 33 |         done = False
 34 |         episode_rewards = []
 35 |         episode = []
 36 |         t = 0
 37 |         T = math.inf
 38 |         while not done:
 39 |             if(t < T):
 40 |                 s_next, reward, done, info = env.step(action)
 41 |                 if done:
 42 |                     episode.append((s_current, action, s_next, reward, -1))
 43 |                     T = t + 1
 44 |                     bhr_metric[i] = info[2]
 45 |                 else:
 46 |                     next_action = epsilon_greedy(Q, epsilon, s_next, actions)
 47 |                     episode.append((s_current, action, s_next, reward, next_action))
 48 |                 s_current = s_next
 49 |                 action = next_action
 50 |             tau = t - n + 1
 51 |             if(tau >= 0):
 52 |                 G = 0
 53 |                 discount = 1
 54 |                 for j in range(tau, min(tau+n, T)):
 55 |                     G += discount*episode[j][3]
 56 |                     discount *= gamma
 57 |                 if(tau+n < T):
 58 |                     G = G + np.power(gamma,n)*Q(episode[tau+n-1][2], episode[tau+n-1][4])
 59 |                 Q.update(alpha, G, episode[tau][0], episode[tau][1])
 60 |             t += 1
 61 |             if(tau == T-1):
 62 |                 break
 63 |         rewards[i] = episode_rewards
 64 |     return rewards, bhr_metric
 65 | 
 66 | def epsilon_greedy_tc(Q, epsilon, state, actions, X, done):
 67 |     random = np.random.binomial(1, epsilon)
 68 |     max_ac = 0
 69 |     if random == 0:
 70 |         max_q = np.NINF
 71 |         for action in actions:
 72 |             current_q = Q(state, action, X, done)
 73 |             if current_q > max_q:
 74 |                 max_ac = action
 75 |                 max_q = current_q
 76 |     else:
 77 |         action_size = len(actions)
 78 |         index = np.random.randint(action_size)
 79 |         max_ac = actions[index]
 80 |     return max_ac
 81 | 
 82 | def semi_gradient_n_step_sarsa_tc(env, gamma, alpha, 
 83 |                                   X:StateActionOneDTileCoding,
 84 |                                   epsilon, episodes, actions, n):
 85 | 
 86 |     bhr_metric = {}
 87 |     rewards = {}
 88 |     weights = np.zeros(X.feature_vector_len())
 89 | 
 90 |     def Q(state, action, X, done):
 91 |         features = X(state, action, done)
 92 |         return np.dot(features, weights)
 93 | 
 94 |     for i in episodes:
 95 |         s_current = env.reset(i)
 96 |         done = False
 97 |         action = epsilon_greedy_tc(Q, epsilon, s_current, actions, X, done)
 98 |         episode_rewards = []
 99 |         episode = []
100 |         t = 0
101 |         T = math.inf
102 |         while not done:
103 |             if(t < T):
104 |                 s_next, reward, done, info = env.step(action)
105 |                 if done:
106 |                     episode.append((s_current, action, s_next, reward, -1))
107 |                     T = t + 1
108 |                     bhr_metric[i] = info[2]
109 |                 else:
110 |                     next_action = epsilon_greedy_tc(Q, epsilon, s_next, actions, X, done)
111 |                     episode.append((s_current, action, s_next, reward, next_action))
112 |                 s_current = s_next
113 |                 action = next_action
114 |             tau = t - n + 1
115 |             if(tau >= 0):
116 |                 G = 0
117 |                 discount = 1
118 |                 for j in range(tau, min(tau+n, T)):
119 |                     G += discount*episode[j][3]
120 |                     discount *= gamma
121 |                 if(tau+n < T):
122 |                     G = G + np.power(gamma,n)*Q(episode[tau+n-1][2], episode[tau+n-1][4], X, done)
123 |                 delta = alpha*(G - Q(episode[tau][0], episode[tau][1], X, done))
124 |                 weights += X(episode[tau][0], episode[tau][1], done)*delta
125 |             t += 1
126 |             if(tau == T-1):
127 |                 break
128 |         rewards[i] = episode_rewards
129 |     return rewards, bhr_metric


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | from sklearn.model_selection import train_test_split
  7 | from cache import CacheEnv
  8 | 
  9 | from algorithms.deterministic import always_evict, random_eviction
 10 | from algorithms.semi_gradient_n_step_sarsa_algorithm import semi_gradient_n_step_sarsa, semi_gradient_n_step_sarsa_tc
 11 | from algorithms.actor_critic_eligibility_trace_algorithm_linear import actor_critic_eligibility_trace_linear
 12 | from algorithms.actor_critic_eligibility_trace_algorithm_tc import actor_critic_eligibility_trace_tc
 13 | from algorithms.optimal_algorithm import optimal_admission
 14 | from algorithms.reinforce_algorithm import reinforce
 15 | from algorithms.true_online_sarsa_lambda import TrueOnlineSarsaLambda
 16 | from state_approximations.linear_v_approximation import LinearStateApproximation
 17 | from state_approximations.one_d_tc import StateOneDTileCoding
 18 | from policy_approximations.linear_policy_approximation import LinearPolicyApproximation
 19 | from state_action_approximations.one_d_tc import StateActionOneDTileCoding
 20 | from state_action_approximations.linear_q_approximation import LinearStateActionApproximation
 21 | from state_action_approximations.nn_q_approximation import NeuralNetworkStateActionApproximation
 22 | 
 23 | def plot_reward(rewards, filename):
 24 |     plt.plot(np.arange(0, len(rewards)), np.cumsum(rewards))
 25 |     plt.xlabel("time")
 26 |     plt.ylabel("cumulative reward")
 27 |     plt.savefig(filename)
 28 |     plt.close()
 29 |     #plt.show()
 30 | 
 31 | def get_metrics(test, episode, rewards, bhr, filename):
 32 |     avg_test_bhr = 0.0
 33 |     for index in test:
 34 |         avg_test_bhr += bhr[index]
 35 |     avg_test_bhr /= len(test)
 36 |     print("Average BHR on test data : ", avg_test_bhr)
 37 |     return avg_test_bhr
 38 |     # plot_reward(rewards[test[episode]], filename)
 39 | 
 40 | def run_n_step_sarsa_linear(env, train, test, n=1):
 41 |     print("Running ", n, "-step Sarsa")
 42 |     # Semi-gradient n-step Sarsa with linear function approximation
 43 |     epsilon = 0.1
 44 |     actions = [0, 1]
 45 |     gamma = 1
 46 |     if n == 1:
 47 |         alpha = 1e-2
 48 |     else:
 49 |         alpha = 1e-3
 50 |     L = LinearStateActionApproximation(5, 2, alpha)
 51 |     semi_gradient_n_step_sarsa(env, gamma, alpha, L, epsilon, train, actions, n)
 52 |     rewards, bhr = semi_gradient_n_step_sarsa(env, gamma, alpha, L, epsilon, test, actions, n)
 53 |     print("======================")
 54 |     return get_metrics(test, 0, rewards, bhr, "experiments/graphs/semi_gradient_n_step_sarsa_with_linear_approx.png")
 55 | 
 56 | def run_n_step_sarsa_nn(env, train, test, n=1):
 57 |     print("Running ", n, "-step Sarsa with nn approximation")
 58 |     # Semi-gradient n-step Sarsa with neural network function approximation
 59 |     epsilon = 0.1
 60 |     actions = [0, 1]
 61 |     gamma = 1
 62 |     alpha = 1e-2
 63 |     L = NeuralNetworkStateActionApproximation(5, 2, alpha)
 64 |     semi_gradient_n_step_sarsa(env, gamma, alpha, L, epsilon, train, actions, n)
 65 |     rewards, bhr = semi_gradient_n_step_sarsa(env, gamma, alpha, L, epsilon, test, actions, n)
 66 |     print("======================")
 67 |     return get_metrics(test, 0, rewards, bhr, "experiments/graphs/semi_gradient_n_step_sarsa_with_nn.png")
 68 | 
 69 | def run_actor_critic_tc(env, train, test):
 70 |     print("Running actor critic with eligibility traces tc")
 71 |     # Actor critic with 1-D tile coding
 72 |     gamma = 1
 73 |     alpha_theta = 1e-3
 74 |     alpha_w = 1e-3
 75 |     lambda_theta = 0.8
 76 |     lamdba_w = 0.8
 77 |     state_low = np.array([1, 0, 0, 1, 0])
 78 |     state_high = np.array([1, 20, 500, 1200, 500])
 79 |     tile_width = np.array([1, 1, 10, 50, 10])
 80 |     V = StateOneDTileCoding(
 81 |         state_low,
 82 |         state_high,
 83 |         num_tilings=1,
 84 |         tile_width=tile_width
 85 |     )
 86 |     pi = LinearPolicyApproximation(5, 2, alpha_theta)
 87 |     actor_critic_eligibility_trace_tc(env, gamma,
 88 |                                       alpha_theta, alpha_w,
 89 |                                       lambda_theta, lamdba_w,
 90 |                                       V, pi,
 91 |                                       train)
 92 |     rewards, bhr = actor_critic_eligibility_trace_tc(env, gamma,
 93 |                                                      alpha_theta, alpha_w,
 94 |                                                      lambda_theta, lamdba_w,
 95 |                                                      V, pi,
 96 |                                                      test)
 97 |     print("======================")
 98 |     return get_metrics(test, 0, rewards, bhr, "experiments/graphs/actor_critic_eligibility_trace.png")
 99 | 
100 | def run_actor_critic_linear(env, train, test):
101 |     print("Running actor critic with eligibility traces linear")
102 |     # Actor critic with neural network and eligibility traces
103 |     gamma = 1
104 |     alpha_theta = 1e-3
105 |     alpha_w = 1e-3
106 |     lambda_theta = 0.8
107 |     lamdba_w = 0.8
108 |     V = LinearStateApproximation(5, alpha_w)
109 |     pi = LinearPolicyApproximation(5, 2, alpha_theta)
110 |     actor_critic_eligibility_trace_linear(env, gamma,
111 |                                           alpha_theta, alpha_w,
112 |                                           lambda_theta, lamdba_w,
113 |                                           V, pi,
114 |                                           train)
115 |     rewards, bhr = actor_critic_eligibility_trace_linear(env, gamma,
116 |                                                          alpha_theta, alpha_w,
117 |                                                          lambda_theta, lamdba_w,
118 |                                                          V, pi,
119 |                                                          test)
120 |     print("======================")
121 |     return get_metrics(test, 0, rewards, bhr, "experiments/graphs/actor_critic_eligibility_trace_linear.png")
122 | 
123 | def run_reinforce(env, train, test):
124 |     print("Running Reinforce")
125 |     # Reinforce with linear function approximation
126 |     epsilon = 0.1
127 |     actions = [0, 1]
128 |     gamma = 1
129 |     alpha_theta = 1e-3
130 |     alpha_w = 1e-3
131 |     n = 1
132 |     L = LinearStateApproximation(5, alpha_w)
133 |     pi = LinearPolicyApproximation(5, 2, alpha_theta)
134 |     reinforce(env, gamma, train,pi, L)
135 |     rewards, bhr = reinforce(env, gamma, test,pi, L)
136 |     print("======================")
137 |     return get_metrics(test, 0, rewards, bhr, "experiments/graphs/reinforce.png")
138 | 
139 | def run_sarsa_lambda(env, train, test, lam):
140 |     print("Running True Online Sarsa Lambda")
141 |     # True Online Sarsa Lambda with One Dimensional Tile Coding
142 |     epsilon = 0.1
143 |     gamma = 1
144 |     alpha = 1e-2
145 |     actions = [0, 1]
146 |     state_low = np.array([1, 0, 0, 1, 0])
147 |     state_high = np.array([1, 20, 500, 1200, 500])
148 |     tile_width = np.array([1, 1, 10, 50, 10])
149 |     Q = StateActionOneDTileCoding(
150 |         state_low,
151 |         state_high,
152 |         num_tilings=2,
153 |         tile_width=tile_width,
154 |         num_actions=len(actions)
155 |     )
156 |     TrueOnlineSarsaLambda(env, epsilon, gamma, lam, alpha, Q, train)
157 |     rewards, bhr = TrueOnlineSarsaLambda(env, epsilon, gamma, lam, alpha, Q, test)
158 |     print("======================")
159 |     return get_metrics(test, 0, rewards, bhr, "experiments/graphs/true_online_sarsa_lambda.png")
160 | 
161 | def run_always_evict(env, train, test):
162 |     print("Running Always Evict")
163 |     rewards, bhr = always_evict(env, test)
164 |     print("======================")
165 |     return get_metrics(test, 0, rewards, bhr, "experiments/graphs/lru.png")
166 | 
167 | def run_random_eviction(env, train, test):
168 |     p = [0.5,0.5]
169 |     print("Running Random Policy ")
170 |     rewards, bhr = random_eviction(env, test, p)
171 |     print("======================")
172 |     return get_metrics(test, 0, rewards, bhr, "experiments/graphs/random_eviction_lru.png")
173 | 
174 | def run_optimal(test, cache_size):
175 |     print("Running Optimal Policy")
176 |     rewards, bhr = optimal_admission(episodes=test, cache_size=cache_size)
177 |     print("======================")
178 |     return get_metrics(test, 0, rewards, bhr, "experiments/graphs/optimal_algorithm.png")
179 | 
180 | if __name__ == "__main__":
181 |     parser = argparse.ArgumentParser(description='RL CaR')
182 |     parser.add_argument(
183 |         '-ne',
184 |         '--num_episodes',
185 |         help='Number of episodes',
186 |         type=int,
187 |         default=70
188 |     )
189 |     parser.add_argument(
190 |         '-nr',
191 |         '--num_repetitions',
192 |         help='Number of repetitions',
193 |         type=int,
194 |         default=10
195 |     )
196 |     parser.add_argument(
197 |         '-fa',
198 |         '--function_approximation',
199 |         help='function approximation to use',
200 |         default='tc'
201 |     )
202 | 
203 |     parser.add_argument(
204 |         '-n_steps',
205 |         '--n_steps',
206 |         help='number of steps in sarsa',
207 |         type=int,
208 |         default=2
209 |     )
210 | 
211 |     parser.add_argument(
212 |         '-lam',
213 |         '--lam',
214 |         help='lambda in sarsa',
215 |         type=float,
216 |         default=0.5
217 |     )
218 | 
219 |     parser.add_argument(
220 |         '-rl',
221 |         '--rl_algo',
222 |         help='rl algo to use',
223 |         default='actor_critic'
224 |     )
225 | 
226 |     parser.add_argument(
227 |         '-policy',
228 |         '--policy',
229 |         help='cache replacement policy space separated',
230 |         default="LRU"
231 |     )
232 | 
233 |     parser.add_argument(
234 |         '-ts',
235 |         '--test_size',
236 |         help='test size',
237 |         type=int,
238 |         default="20"
239 |     )
240 | 
241 |     parser.add_argument(
242 |         '-cs',
243 |         '--cache_size',
244 |         help='cache size',
245 |         type=int,
246 |         default="20"
247 |     )
248 |     seeds = [10, 20, 30, 40 ,50, 60, 70, 80, 90, 100]
249 | 
250 |     args = parser.parse_args()
251 |     print("Arguments ", args)
252 |     num_repetitions = args.num_repetitions
253 |     cache_size = args.cache_size
254 | 
255 |     num_episodes = args.num_episodes
256 |     episodes = np.arange(num_episodes)
257 |     function_name = 'run_' + args.rl_algo
258 |     if args.function_approximation is not None:
259 |         function_name += '_' + args.function_approximation
260 | 
261 |     print("Using Function:", function_name)
262 | 
263 |     function_dict = {"run_reinforce" : run_reinforce,
264 |                      "run_actor_critic_tc" : run_actor_critic_tc,
265 |                      "run_actor_critic_linear" : run_actor_critic_linear,
266 |                      "run_random_eviction" : run_random_eviction,
267 |                      "run_always_evict": run_always_evict,
268 |                      "run_n_step_sarsa_nn" : run_n_step_sarsa_nn,
269 |                      "run_n_step_sarsa_linear" : run_n_step_sarsa_linear,
270 |                      "run_optimal" : run_optimal,
271 |                      "run_sarsa_lambda" : run_sarsa_lambda}
272 | 
273 |     logging.basicConfig(level=logging.INFO,
274 |                        datefmt='%Y-%m-%d %H:%M:%S', handlers=[
275 |             logging.FileHandler('logs/{}'.format(function_name)),
276 |             logging.StreamHandler()
277 |         ])
278 | 
279 |     policies = args.policy.split(" ")
280 |     env = CacheEnv(policies, cache_size=cache_size)
281 |     bhr_metrics = []
282 |     for r in range(num_repetitions):
283 |         train, test = train_test_split(episodes, test_size=args.test_size, random_state=seeds[r])
284 |         if function_name.startswith("run_n"):
285 |             n =args.n_steps
286 |             bhr_metrics.append(function_dict[function_name](env, train, test, n))
287 |         elif function_name.startswith("run_optimal"):
288 |             bhr_metrics.append(function_dict[function_name](test, cache_size))
289 |         elif function_name.startswith("run_sarsa_lambda"):
290 |             bhr_metrics.append(function_dict[function_name](env, train, test, args.lam))
291 |         else:
292 |             bhr_metrics.append(function_dict[function_name](env, train, test))
293 |     print("bhr_metrics", bhr_metrics)
294 |     mean_bhr = np.mean(np.array(bhr_metrics))
295 |     print("mean_bhr = ", mean_bhr)
296 |     log_string = "args  = {}, bhr_metrics = {}, mean_bhr = {}".format(args, ', '.join(str(e) for e in bhr_metrics), str(mean_bhr))
297 |     logging.info(log_string)
298 | 
299 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
  1 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl reinforce -cs 10"' &
  2 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl reinforce -cs 20"' &
  3 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl reinforce -cs 50"' &
  4 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl reinforce -cs 75"' &
  5 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl reinforce -cs 100"' &
  6 | 
  7 | 
  8 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl actor_critic -fa tc -cs 10"' &
  9 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl actor_critic -fa tc -cs 20"' &
 10 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl actor_critic -fa tc -cs 50"' &
 11 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl actor_critic -fa tc -cs 75"' &
 12 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl actor_critic -fa tc -cs 100"' &
 13 | 
 14 | 
 15 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl actor_critic -fa linear -cs 20"' &
 16 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl actor_critic -fa linear -cs 50"' &
 17 | 
 18 | 
 19 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa linear -cs 10 -n_steps 1"' &
 20 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa linear -cs 20 -n_steps 1"' &
 21 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa linear -cs 50 -n_steps 1"' &
 22 | <<<<<<< Updated upstream
 23 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa linear -cs 100 -n_steps 1"' &
 24 | =======
 25 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa linear -cs 100 -n_steps 1"' &
 26 | 
 27 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa linear -cs 10 -n_steps 2"' &
 28 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa linear -cs 20 -n_steps 2"' &
 29 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa linear -cs 50 -n_steps 2"' &
 30 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa linear -cs 100 -n_steps 2"' &
 31 | 
 32 | 
 33 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa linear -cs 100 -n_steps 2"' &
 34 | 
 35 | 
 36 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa linear -cs 10 -n_steps 4"' &
 37 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa linear -cs 20 -n_steps 4"' &
 38 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa linear -cs 50 -n_steps 4"' &
 39 | 
 40 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa linear -cs 100 -n_steps 2"' &
 41 | 
 42 | 
 43 | 
 44 | 
 45 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa linear -cs 10 -n_steps 8"' &
 46 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa linear -cs 20 -n_steps 8"' &
 47 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa linear -cs 50 -n_steps 8"' &
 48 | 
 49 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa linear -cs 100 -n_steps 2"' &
 50 | 
 51 | 
 52 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa linear -cs 10 -n_steps 16"' &
 53 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa linear -cs 20 -n_steps 16"' &
 54 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa linear -cs 50 -n_steps 16"' &
 55 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa linear -cs 100 -n_steps 2"' &
 56 | 
 57 | 
 58 | 
 59 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa nn -cs 10 -n_steps 1"' &
 60 | 
 61 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa nn -cs 20 -n_steps 1"' &
 62 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa nn -cs 50 -n_steps 1"' &
 63 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa nn -cs 75 -n_steps 1"' &
 64 | 
 65 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa nn -cs 10 -n_steps 2"' &
 66 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa nn -cs 20 -n_steps 2"' &
 67 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa nn -cs 50 -n_steps 2"' &
 68 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa nn -cs 75 -n_steps 2"' &
 69 | 
 70 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa nn -cs 10 -n_steps 16"' &
 71 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa nn -cs 20 -n_steps 16"' &
 72 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa nn -cs 50 -n_steps 16"' &
 73 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl n_step_sarsa -fa nn -cs 75 -n_steps 16"' &
 74 | 
 75 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -cs 10 -lam 0.5 -rl sarsa_lambda"' &
 76 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -cs 20 -lam 0.5 -rl sarsa_lambda"' &
 77 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -cs 50 -lam 0.5 -rl sarsa_lambda"' &
 78 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -cs 75 -lam 0.5 -rl sarsa_lambda"' &
 79 | 
 80 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -cs 10 -lam 0.75 -rl sarsa_lambda"' &
 81 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -cs 20 -lam 0.75 -rl sarsa_lambda"' &
 82 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -cs 50 -lam 0.75 -rl sarsa_lambda"' &
 83 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -cs 75 -lam 0.75 -rl sarsa_lambda"' &
 84 | 
 85 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -cs 10 -lam 0.95 -rl sarsa_lambda"' &
 86 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -cs 20 -lam 0.95 -rl sarsa_lambda"' &
 87 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -cs 50 -lam 0.95 -rl sarsa_lambda"' &
 88 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -cs 75 -lam 0.95 -rl sarsa_lambda"' &
 89 | 
 90 | 
 91 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl always_evict -policy LFU -cs 10"' &
 92 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl always_evict -policy LFU -cs 20"' &
 93 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl always_evict -policy LFU -cs 50"' &
 94 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl always_evict -policy LFU -cs 75"' &
 95 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl always_evict -policy LFU -cs 100"' &
 96 | 
 97 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl reinforce -policy LFU -cs 10"' &
 98 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl reinforce -policy LFU -cs 20"' &
 99 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl reinforce -policy LFU -cs 50"' &
100 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl reinforce -policy LFU -cs 75"' &
101 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl reinforce -policy LFU -cs 100"' &
102 | 
103 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl always_evict -policy FIFO -cs 10"' &
104 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl always_evict -policy FIFO -cs 20"' &
105 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl always_evict -policy FIFO -cs 50"' &
106 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl always_evict -policy FIFO -cs 75"' &
107 | osascript -e 'tell application "Terminal" to do script "cd /Users/isha/Desktop/Courses/RL/RLCaR && /usr/local/bin/python3.9 main.py -rl always_evict -policy FIFO -cs 100"' &
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 


--------------------------------------------------------------------------------
/cache.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | import pandas as pd
  4 | from collections import Counter, defaultdict, OrderedDict
  5 | import heapq
  6 | from gym import spaces
  7 | from replacement_agent import ReplacementAgent
  8 | 
  9 | from trace_loader import load_traces, get_stats
 10 | 
 11 | # from park import core, spaces, logger
 12 | # from park.param import config
 13 | # from park.utils import seeding
 14 | # from park.envs.cache.trace_loader import load_traces
 15 | 
 16 | accept = 1
 17 | reject = 0
 18 | 
 19 | cache_unseen_default = 500
 20 | cache_size_default = 20
 21 | cache_trace_default = "test"
 22 | 
 23 | 
 24 | class TraceSrc(object):
 25 |     '''
 26 |     Tracesrc is the Trace Loader
 27 | 
 28 |     @param trace: The file name of the trace file
 29 |     @param cache_size: The fixed size of the whole cache
 30 |     @param load_trace: The list of trace data. The item could be gotten by using load_trace.iloc[self.idx]
 31 |     @param n_request: length of the trace
 32 |     @param min_values, max values: Used for the restricted of the value space
 33 |     @param req: The obj_time of the object
 34 |     '''
 35 | 
 36 |     def __init__(self, trace, cache_size):
 37 |         self.trace = trace
 38 |         self.cache_size = cache_size
 39 |         self.load_trace = load_traces(self.trace, self.cache_size, 0)
 40 |         self.means, self.stddevs = get_stats(self.load_trace)
 41 |         self.n_request = len(self.load_trace)
 42 |         self.cache_size = cache_size
 43 |         self.min_values = np.asarray([1, 0, 0])
 44 |         self.max_values = np.asarray([self.cache_size, self.cache_size, max(self.load_trace[0])])
 45 |         self.req = 0
 46 | 
 47 |     def reset(self, random):
 48 |         if self.trace == 'test' or self.trace.startswith('zipf'):
 49 |             self.load_trace = load_traces(self.trace, self.cache_size, random)
 50 |             self.means, self.stddevs = get_stats(self.load_trace)
 51 |         self.n_request = len(self.load_trace)
 52 |         self.min_values = np.asarray([1, 0, 0])
 53 |         self.max_values = np.asarray([self.cache_size, self.cache_size, max(self.load_trace[0])])
 54 |         self.req = 0
 55 | 
 56 |     def step(self):
 57 |         #  Obs is: (obj_time, obj_id, obj_size)
 58 |         #  print("req id in trace step:", self.req)
 59 |         obs = self.load_trace.iloc[self.req].values
 60 |         self.req += 1
 61 |         done = self.req >= self.n_request
 62 |         return obs, done
 63 | 
 64 |     def next(self):
 65 |         obs = self.load_trace.iloc[self.req].values
 66 |         done = (self.req + 1) >= self.n_request
 67 |         return obs, done
 68 | 
 69 |     def get_trace_stats(self):
 70 |         return self.means, self.stddevs
 71 | 
 72 | class CacheSim(object):
 73 |     def __init__(self, cache_size, policy, action_space, state_space, replacement_policies, trace_means, trace_stddevs, episode_index=0):
 74 |         # invariant
 75 |         '''
 76 |         This is the simulater for the cache.
 77 |         @param cache_size
 78 |         @param policy: Not implement yet. Maybe we should instead put this part in the action
 79 |         @param action_space: The restriction for action_space. For the cache admission agent, it is [0, 1]: 0 is for reject and 1 is for admit
 80 |         @param req: It is the idx for the object requiration
 81 |         @param non_cache: It is the list for those requiration that aren't cached, have obj_id and last req time
 82 |         @param cache: It is the list for those requiration that are cached, have obj id and last req time
 83 |         @param count_ohr: ohr is (sigma hit) / req
 84 |         @param count_bhr: ohr is (sigma object_size * hit) / sigma object_size
 85 |         @param size_all: size_all is sigma object_size
 86 |         '''
 87 | 
 88 |         self.cache_size = cache_size
 89 |         self.policy = policy
 90 |         self.action_space = action_space
 91 |         self.observation_space = state_space
 92 |         self.req = 0
 93 |         self.non_cache = defaultdict(list)
 94 |         self.cache = defaultdict(list)  # requested items with caching
 95 |         self.cache_pq = []
 96 |         # self.lru_cache = LRUCache(self.cache_size)
 97 |         self.agent = ReplacementAgent(capacity=self.cache_size, policies=replacement_policies,episode_index=episode_index)
 98 |         self.cache_remain = self.cache_size
 99 |         self.count_ohr = 0
100 |         self.count_bhr = 0
101 |         self.size_all = 0
102 |         self.object_frequency = Counter()
103 |         self.object_average_interarrival = Counter()
104 |         self.trace_means = trace_means
105 |         self.trace_stddevs = trace_stddevs
106 | 
107 |     def reset(self, trace_means, trace_stddevs, episode_index):
108 |         self.req = 0
109 |         self.non_cache = defaultdict(list)
110 |         self.cache = defaultdict(list)
111 |         self.cache_pq = []
112 |         self.cache_remain = self.cache_size
113 |         self.count_ohr = 0
114 |         self.count_bhr = 0
115 |         self.size_all = 0
116 |         self.agent.reset(index=episode_index)
117 |         self.object_frequency = Counter()
118 |         self.object_average_interarrival = Counter()
119 |         self.trace_means = trace_means
120 |         self.trace_stddevs = trace_stddevs
121 | 
122 |     def step(self, action, obj):
123 |         #print("object_freq in step(): {}".format(self.object_frequency))
124 |         req = self.req
125 |         # print(self.req)
126 |         cache_size_online_remain = self.cache_remain
127 |         discard_obj_if_admit = []
128 |         obj_time, obj_id, obj_size = obj[0], obj[1], obj[2]
129 |         self.object_frequency[obj_id] += 1
130 | 
131 | 
132 |         # create the current state for cache simulator
133 |         cost = 0
134 | 
135 |         # simulation
136 |         # if the object size is larger than cache size
137 |         if obj_size >= self.cache_size:
138 |             # record the request
139 |             cost += obj_size
140 |             hit = 0
141 |             try:
142 |                 self.non_cache[obj_id][1] = req
143 |             except IndexError:
144 |                 self.non_cache[obj_id] = [obj_size, req]
145 | 
146 |         else:
147 |             #  Search the object in the cache
148 |             #  If hit
149 |             try:
150 |                 self.cache[obj_id][1] = req
151 |                 self.count_bhr += obj_size
152 |                 self.count_ohr += 1
153 |                 hit = 1
154 |                 cost += obj_size
155 |                 self.agent.update(obj_id, obj_size)
156 | 
157 |             #  If not hit
158 |             except IndexError:
159 |                 # accept request
160 |                 if action == 1:
161 |                     # find the object in the cache, no cost, OHR and BHR ++
162 |                     # can't find the object in the cache, add the object into cache after replacement, cost ++
163 |                     while cache_size_online_remain < obj_size:
164 |                         # rm_id = self.cache_pq[0][1]
165 |                         # cache_size_online_remain += self.cache_pq[0][0]
166 |                         # cost += self.cache_pq[0][0]
167 |                         # discard_obj_if_admit.append(rm_id)
168 |                         # heapq.heappop(self.cache_pq)
169 |                         # del self.cache[rm_id]
170 |                         rm_id, size = self.agent.remove()
171 |                         #print("rm_id = ",rm_id, " size = ", size)
172 |                         cache_size_online_remain += size
173 |                         cost += size
174 |                         discard_obj_if_admit.append(rm_id)
175 |                         del self.cache[rm_id]
176 | 
177 | 
178 |                         # add into cache
179 |                     self.cache[obj_id] = [obj_size, req]
180 |                     # heapq.heappush(self.cache_pq, (obj_size, obj_id))
181 |                     self.agent.put(obj_id, obj_size)
182 |                     cache_size_online_remain -= obj_size
183 | 
184 |                     # cost value is based on size, can be changed
185 |                     cost += obj_size
186 |                     hit = 0
187 | 
188 |                 # reject request
189 |                 else:
190 |                     hit = 0
191 |                     # record the request to non_cache
192 |                     try:
193 |                         self.non_cache[obj_id][1] = req
194 |                     except IndexError:
195 |                         self.non_cache[obj_id] = [obj_size, req]
196 | 
197 |         self.size_all += obj_size
198 |         bhr = float(self.count_bhr / self.size_all)
199 |         ohr = float(self.count_ohr / (req + 1))
200 |         # print("debug:", bhr, ohr)
201 |         reward = hit * cost
202 | 
203 |         if self.object_frequency[obj_id] != 1:
204 |             new_count = self.object_frequency[obj_id] - 1
205 |             cur_avg = self.object_average_interarrival[obj_id]
206 |             try:
207 |                 last_interarrival = self.req - self.cache[obj_id][1]
208 |             except IndexError:
209 |                     last_interarrival = self.req - self.non_cache[obj_id][1]
210 |             new_avg = cur_avg + (last_interarrival - cur_avg)/new_count
211 |             self.object_average_interarrival[obj_id] = new_avg
212 | 
213 |         self.req += 1
214 |         self.cache_remain = cache_size_online_remain
215 | 
216 |         info = [self.count_bhr, self.size_all, float(float(self.count_bhr) / float(self.size_all))]
217 |         return reward, info
218 | 
219 |     def next_hit(self, obj):
220 |         try:
221 |             obj_id = obj[1]
222 |             self.cache[obj_id][1] = self.cache[obj_id][1]
223 |             return True
224 | 
225 |         except IndexError:
226 |             return False
227 | 
228 |     def get_normalized_state(self, state):
229 |         normalized_state = []
230 |         for index, s in enumerate(state):
231 |             normalized_state.append( (s-self.trace_means[index])/self.trace_stddevs[index])
232 |         normalized_state[1] /= self.cache_size
233 |         return normalized_state
234 | 
235 |     def get_state(self, obj=[0, 0, 0, 0]):
236 |         '''
237 |         Return the state of the object,  [obj_size, cache_size_online_remain, recency (steps since object was last visited) = req - last visited time]
238 |         If an object has never been seen before, assigned a constant for the recency feature.
239 |         For more information, see Learning Caching policy_approximations with Subsampling:
240 |             http://mlforsystems.org/assets/papers/neurips2019/learning_wang_2019.pdf
241 |         '''
242 |         obj_time, obj_id, obj_size = obj[0], obj[1], obj[2]
243 |         try:
244 |             req = self.req - self.cache[obj_id][1]
245 |         except IndexError:
246 |             try:
247 |                 req = self.req - self.non_cache[obj_id][1]
248 |             except IndexError:
249 |                 # Unseen objects (not in non_cache or cache) are assigned this recency constant
250 |                 req = cache_unseen_default
251 | 
252 |         #print("object_freq in get_state: {}".format(self.object_frequency))
253 |         # sorted_frequency = dict(sorted(self.object_frequency.items(), key=lambda item: item[1]))
254 |         # rank  = -1
255 |         # if obj_id in sorted_frequency:
256 |         #     rank = list(sorted_frequency.keys()).index(obj_id)
257 |         # cache_min_freq = math.inf
258 |         # for object in self.cache:
259 |         #     freq = self.object_frequency[object]
260 |         #     cache_min_freq = min(cache_min_freq, freq)
261 |         #print("obj_id = {}, rank = {}".format(obj_id, rank))
262 |         state = [obj_size, self.cache_remain, req, self.object_frequency[obj_id],
263 |                  self.object_average_interarrival[obj_id]]
264 | 
265 |         return self.get_normalized_state(state)
266 | 
267 | 
268 | class CacheEnv():
269 |     """
270 |     Cache description.
271 | 
272 |     * STATE *
273 |         The state is represented as a vector:
274 |         [request object size,
275 |          cache remaining size,
276 |          time of last request to the same object]
277 | 
278 |     * ACTIONS *
279 |     TODO: should be fixed here, there should be both
280 |         Whether the cache accept the incoming request, represented as an
281 |         integer in [0, 1].
282 | 
283 |     * REWARD * (BHR)
284 |         Cost of previous step (object size) * hit
285 | 
286 |     * REFERENCE *
287 |     """
288 | 
289 |     def __init__(self, replacement_policies, cache_size=cache_size_default,
290 |                  trace=cache_trace_default, seed=42):
291 |         self.seed(seed)
292 |         self.cache_size = cache_size
293 | 
294 |         # load trace, attach initial online feature values
295 |         self.src = TraceSrc(trace=trace, cache_size=self.cache_size)
296 | 
297 |         # set up the state and action space
298 |         self.action_space = spaces.Discrete(2)
299 |         self.observation_space = spaces.Box(self.src.min_values, \
300 |                                             self.src.max_values, \
301 |                                             dtype=np.float32)
302 | 
303 |         # cache simulator
304 |         trace_means, trace_stddevs = self.src.get_trace_stats()
305 |         self.sim = CacheSim(cache_size=self.cache_size, \
306 |                             policy='lru', \
307 |                             action_space=self.action_space, \
308 |                             state_space=self.observation_space,
309 |                             replacement_policies=replacement_policies,
310 |                             trace_means=trace_means,
311 |                             trace_stddevs=trace_stddevs,
312 |                             episode_index=0)
313 | 
314 |         # reset environment (generate new jobs)
315 |         self.reset(1, 2)
316 | 
317 |     def reset(self, trace_index, low=0, high=1000):
318 |         #new_trace = np.random.randint(low, high)
319 |         self.src.reset(trace_index)
320 |         trace_means, trace_stddevs = self.src.get_trace_stats()
321 |         self.sim.reset(trace_means, trace_stddevs, episode_index=trace_index)
322 |         if cache_trace_default == 'test':
323 |             print("New Env Start", trace_index)
324 |         elif cache_trace_default == 'real':
325 |             print("New Env Start Real")
326 |         return self.sim.get_state()
327 | 
328 |     def seed(self, seed):
329 |         self.np_random = np.random.seed(seed)
330 | 
331 |     def step(self, action):
332 |         # 0 <= action < num_servers
333 |         global accept
334 |         assert self.action_space.contains(action)
335 |         state, done = self.src.step()
336 |         reward, info = self.sim.step(action, state)
337 |         obj, done = self.src.next()
338 |         while self.sim.next_hit(obj):
339 |             state, done = self.src.step()
340 |             hit_reward, info = self.sim.step(accept, state)
341 |             reward += hit_reward
342 |             if done is True:
343 |                 break
344 |             obj, done = self.src.next()
345 | 
346 |         obs = self.sim.get_state(obj)
347 |         #info = {}
348 |         return obs, reward, done, info
349 | 
350 |     def render(self, mode='human', close=False):
351 |         pass
352 | 


--------------------------------------------------------------------------------