├── README.md ├── utils └── analysis.py ├── tool └── data_loader.py ├── model ├── environment.py └── DQN.py ├── .gitignore └── test_DQN.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # V2V Communication: 2 | Using DRL for the V2V groupcast resource allocation. 3 | ``` 4 | @INPROCEEDINGS{9527765, 5 | author={Cao, Liu and Yin, Hao}, 6 | booktitle={2021 IEEE International Black Sea Conference on Communications and Networking (BlackSeaCom)}, 7 | title={Resource Allocation for Vehicle Platooning in 5G NR-V2X via Deep Reinforcement Learning}, 8 | year={2021}, 9 | volume={}, 10 | number={}, 11 | pages={1-7}, 12 | doi={10.1109/BlackSeaCom52164.2021.9527765}} 13 | ``` 14 | -------------------------------------------------------------------------------- /utils/analysis.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import pandas as pd 4 | 5 | p = [0.1, 0.3, 0.5] 6 | col_prob_analy = np.zeros([3, 20]) 7 | n = np.arange(10, 210, 10) 8 | Ts = 10 9 | 10 | for k in range(3): 11 | i = 0 12 | for density in range(10, 210, 10): 13 | Nv = density * 0.8 14 | N_ht = density * 0.1 15 | # P_single = (200 - density * 0.4 - 1) / (200 - density * 0.4) 16 | pc = 1 - (1 - p[k] / (Ts * 200))**(Nv - 1) 17 | col_prob_analy[k][i] = 1 - (1 - pc) * P_single**N_ht 18 | i = i + 1 19 | 20 | plt.plot(n, col_prob_analy[0], '-*') 21 | plt.plot(n, col_prob_analy[1], '-o') 22 | plt.plot(n, col_prob_analy[2], '-d') 23 | #plt.plot(n, simulation_p1, '-x') 24 | plt.xlabel('Vehicle density') 25 | plt.ylabel('Collision probability') 26 | plt.legend(['p = 0.9', 'p = 0.7', 'p = 0.5']) 27 | plt.show() -------------------------------------------------------------------------------- /tool/data_loader.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | def load_data(leader_csv, member_csv): 5 | rb_leader = pd.read_csv(leader_csv) 6 | rb_member = pd.read_csv(member_csv) 7 | len_train = int(len(rb_leader)) 8 | rb_leader_train = rb_leader.values.reshape( 9 | rb_leader.shape[0], rb_leader.shape[1])[:len_train, :] 10 | rb_member_train = rb_member.values.reshape( 11 | rb_member.shape[0], rb_member.shape[1])[1:len_train, :] 12 | 13 | return rb_leader_train, rb_member_train 14 | 15 | 16 | def load_data_split(leader_csv, member_csv): 17 | rb_leader = pd.read_csv(leader_csv) 18 | rb_member = pd.read_csv(member_csv) 19 | len_train = int(len(rb_leader) / 10 * 7) 20 | rb_leader_train = rb_leader.values.reshape( 21 | rb_leader.shape[0], rb_leader.shape[1])[:len_train, :] 22 | rb_member_train = rb_member.values.reshape( 23 | rb_member.shape[0], rb_member.shape[1])[1:len_train, :] 24 | 25 | rb_leader_test = rb_leader.values.reshape( 26 | rb_leader.shape[0], rb_leader.shape[1])[len_train + 1:, :] 27 | rb_member_test = rb_member.values.reshape( 28 | rb_member.shape[0], rb_member.shape[1])[len_train + 2:, :] 29 | 30 | return rb_leader_train, rb_member_train, rb_leader_test, rb_member_test -------------------------------------------------------------------------------- /model/environment.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | 5 | class ENVIRONMENT(object): 6 | """docstring for ENVIRONMENT""" 7 | def __init__( 8 | self, 9 | n_actions, 10 | rb_leader, 11 | rb_member, 12 | rb_hidden, 13 | state_size=32, 14 | window_size=1, 15 | ): 16 | super(ENVIRONMENT, self).__init__() 17 | self.state_size = state_size 18 | self.window_size = window_size 19 | self.rb_leader = rb_leader 20 | self.rb_member = rb_member 21 | self.rb_hidden = rb_hidden 22 | self.n_actions = n_actions 23 | self.n_nodes = 2 24 | 25 | def reset(self): 26 | state = [] 27 | for i in range(int(self.state_size / 2)): 28 | action_index = np.where(self.rb_leader[i] == 0) 29 | action_size_period = len(action_index[0]) 30 | action_sel = np.random.choice(action_size_period) 31 | #The leader vehicle choose an idle RB to send 32 | action = action_index[0][action_sel] 33 | #The leader vehicle will receive an ack/nack from the last member 34 | obs = int(self.rb_hidden[i][action] > 0) 35 | state.append(action) 36 | state.append(obs) 37 | state = np.array(state) 38 | return state 39 | 40 | def step(self, action, time): 41 | reward = 0 42 | observation_ = int(self.rb_hidden[time][action] + 43 | self.rb_leader[time + 1][action] > 0) 44 | reward = int(observation_ == 0) 45 | 46 | return observation_, reward 47 | 48 | def test_update(self, rb_leader, rb_member): 49 | self.rb_leader = rb_leader 50 | self.rb_member = rb_member 51 | self.rb_hidden = rb_member 52 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | .vscode 131 | .idea 132 | 133 | # custom 134 | *.pkl 135 | output/ 136 | p3/ 137 | p1/ 138 | run*.ipynb 139 | *.npy 140 | # Pytorch 141 | *.pth -------------------------------------------------------------------------------- /test_DQN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd \n", 11 | "import matplotlib.pyplot as plt\n", 12 | "\n", 13 | "from tool.data_loader import load_data\n", 14 | "from model.DQN import DQN\n", 15 | "from model.environment import ENVIRONMENT" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "### Init Parameters" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 3, 28 | "metadata": { 29 | "scrolled": true 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "leader_csv = \"./data/leader_density200_p1.csv\"\n", 34 | "member_csv = \"./data/member_density200_p1.csv\"\n", 35 | "\n", 36 | "rb_leader, rb_member = load_data(leader_csv, member_csv)\n", 37 | "rb_hidden = rb_member\n", 38 | "loss_val = []\n", 39 | "\n", 40 | "env = ENVIRONMENT(\n", 41 | " n_actions = 200,\n", 42 | " rb_leader = rb_leader,\n", 43 | " rb_member = rb_member,\n", 44 | " rb_hidden = rb_hidden,\n", 45 | " state_size=32, \n", 46 | " window_size=7,\n", 47 | " )\n", 48 | "\n", 49 | "dqn_agent = DQN(env.state_size,\n", 50 | " env.n_actions, \n", 51 | " loss_val = loss_val,\n", 52 | " memory_size=1000,\n", 53 | " replace_target_iter=200,\n", 54 | " batch_size=1,\n", 55 | " learning_rate=0.01,\n", 56 | " gamma=0.9,\n", 57 | " epsilon=0.1,\n", 58 | " epsilon_min=0,\n", 59 | " epsilon_decay=0.5,\n", 60 | " )\n", 61 | "\n", 62 | "counter = 0\n", 63 | "total_reward = 0\n", 64 | "collision_num = 0\n", 65 | "prob_collision = []\n", 66 | "state = env.reset()\n", 67 | "state_size = len(state)\n" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "### Online Training and Testing" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 4, 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "name": "stdout", 84 | "output_type": "stream", 85 | "text": [ 86 | "------------------------------------------\n", 87 | "---------- Start processing ... ----------\n", 88 | "------------------------------------------\n", 89 | "Step:100%, action:167, reward:1\r" 90 | ] 91 | } 92 | ], 93 | "source": [ 94 | "print('------------------------------------------')\n", 95 | "print('---------- Start processing ... ----------')\n", 96 | "print('------------------------------------------')\n", 97 | "\n", 98 | "\n", 99 | "for time in range(int(state_size/2), (len(rb_member))):\n", 100 | " action_index = np.where(rb_leader[time] == 0)\n", 101 | " action_index = np.reshape(action_index, len(action_index[0]))\n", 102 | " action = dqn_agent.choose_action(state, action_index)\n", 103 | "\n", 104 | " observation_, reward = env.step(action, time)\n", 105 | " if observation_ > 0:\n", 106 | " collision_num +=1 \n", 107 | " prob_collision.append(collision_num/(time+1))\n", 108 | " total_reward += reward\n", 109 | "\n", 110 | " next_state = np.concatenate([state[2:], [action, observation_]])\n", 111 | "\n", 112 | " dqn_agent.store_transition(state, action, reward, next_state)\n", 113 | " if counter < 200:\n", 114 | " dqn_agent.learn() # internally iterates default (prediction) model\n", 115 | " counter += 1\n", 116 | " elif counter < 250:\n", 117 | " counter += 1\n", 118 | " else:\n", 119 | " counter = 0\n", 120 | " state = next_state\n", 121 | " print(\"Step:{0}%, action:{1}, reward:{2}\".format(round((time + 1) * 100 / len(rb_member)), action, reward), end=\"\\r\")" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 5, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "name": "stdout", 131 | "output_type": "stream", 132 | "text": [ 133 | "\n", 134 | "total reward is 1854\n", 135 | "\n", 136 | "collision probability = 0.06458123107971746\n" 137 | ] 138 | } 139 | ], 140 | "source": [ 141 | "p_col_rl = collision_num/((len(rb_member)) - int(state_size/2))\n", 142 | "print(\"\\ntotal reward is {0}\".format(total_reward))\n", 143 | "print(\"\\ncollision probability = \", p_col_rl)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [] 152 | } 153 | ], 154 | "metadata": { 155 | "kernelspec": { 156 | "display_name": "Python 3", 157 | "language": "python", 158 | "name": "python3" 159 | }, 160 | "language_info": { 161 | "codemirror_mode": { 162 | "name": "ipython", 163 | "version": 3 164 | }, 165 | "file_extension": ".py", 166 | "mimetype": "text/x-python", 167 | "name": "python", 168 | "nbconvert_exporter": "python", 169 | "pygments_lexer": "ipython3", 170 | "version": "3.7.5" 171 | } 172 | }, 173 | "nbformat": 4, 174 | "nbformat_minor": 2 175 | } 176 | -------------------------------------------------------------------------------- /model/DQN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | import numpy as np 6 | import os 7 | 8 | DEBUG = False 9 | 10 | 11 | def train(model, 12 | state, 13 | q_target, 14 | learningRate, 15 | batch_size, 16 | loss_val, 17 | epochs=1, 18 | verbose=0): 19 | 20 | if DEBUG: 21 | print("state.shape:{}, q_target.shape:{}".format( 22 | state.shape, q_target.shape)) 23 | print("batchsize:{}".format(batch_size)) 24 | 25 | loss_fc = nn.MSELoss() 26 | if torch.cuda.is_available(): 27 | loss_fc = nn.MSELoss().cuda() 28 | 29 | optimizer = torch.optim.Adam(model.parameters(), lr=learningRate) 30 | 31 | # optimizer = torch.optim.SGD( 32 | # model.parameters(), lr=learningRate, momentum=0.9) 33 | loss = 0 34 | for epoch in range(epochs): 35 | optimizer.zero_grad() 36 | result = model(state) 37 | loss = loss_fc(result, q_target) 38 | loss.backward() 39 | optimizer.step() 40 | 41 | if verbose: 42 | message = "[in train] epoch{}, loss:{}".format(epoch, loss) 43 | print(message) 44 | loss_val.append(loss) 45 | 46 | 47 | class ResNet(nn.Module): 48 | def __init__(self, state_size, n_actions): 49 | super(ResNet, self).__init__() 50 | self.h1 = nn.Linear(state_size, 64) 51 | self.h2 = nn.Linear(64, 64) 52 | self.h3 = nn.Linear(64, 64) 53 | self.h4 = nn.Linear(64, 64) 54 | self.h5 = nn.Linear(64, 64) 55 | self.h6 = nn.Linear(64, 64) 56 | self.out = nn.Linear(64, n_actions) 57 | 58 | def forward(self, x): 59 | h1 = F.relu(self.h1(x)) 60 | h2 = F.relu(self.h2(h1)) 61 | 62 | h3 = F.relu(self.h3(h2)) 63 | h4 = F.relu(self.h4(h3)) + h2 64 | 65 | h5 = F.relu(self.h5(h4)) 66 | h6 = F.relu(self.h6(h5)) + h4 67 | 68 | return self.out(h6) 69 | 70 | 71 | class AllLinear(nn.Module): 72 | def __init__(self, state_size, n_actions): 73 | super(AllLinear, self).__init__() 74 | self.h1 = nn.Linear(state_size, 64) 75 | self.h2 = nn.Linear(64, 64) 76 | #self.h3 = nn.Linear(64, 64) 77 | self.out = nn.Linear(64, n_actions) 78 | 79 | def forward(self, x): 80 | h1 = F.relu(self.h1(x)) 81 | h2 = F.relu(self.h2(h1)) 82 | #h3 = F.relu(self.h3(h2)) 83 | 84 | return self.out(h2) 85 | 86 | 87 | class DQN(nn.Module): 88 | def __init__(self, 89 | state_size, 90 | n_actions, 91 | loss_val, 92 | memory_size=500, 93 | replace_target_iter=200, 94 | batch_size=32, 95 | learning_rate=0.01, 96 | gamma=0.9, 97 | epsilon=1, 98 | epsilon_min=0.01, 99 | epsilon_decay=0.995): 100 | 101 | super(DQN, self).__init__() 102 | self.state_size = state_size 103 | self.n_actions = n_actions 104 | self.loss_val = loss_val 105 | self.memory_size = memory_size 106 | self.replace_target_iter = replace_target_iter 107 | self.batch_size = batch_size 108 | self.learning_rate = learning_rate 109 | self.gamma = gamma 110 | self.epsilon = epsilon 111 | self.epsilon_min = epsilon_min 112 | self.epsilon_decay = epsilon_decay 113 | # self.memory = torch.zeros(self.memory_size, self.state_size*2+2) 114 | self.memory = np.zeros((self.memory_size, self.state_size * 2 + 2)) 115 | self.learn_step_counter = 0 116 | self.memory_couter = 0 117 | 118 | # self.model = ResNet(self.state_size, self.n_actions).cuda() 119 | # self.target_model = ResNet(self.state_size, self.n_actions).cuda() 120 | if torch.cuda.is_available(): 121 | self.model = AllLinear(self.state_size, self.n_actions).cuda() 122 | else: 123 | self.model = AllLinear(self.state_size, self.n_actions) 124 | # self.target_model = AllLinear(self.state_size, self.n_actions).cuda() 125 | 126 | def choose_action(self, state, action_index): 127 | state = state[np.newaxis, :] 128 | self.epsilon *= self.epsilon_decay 129 | self.epsilon = max(self.epsilon_min, self.epsilon) 130 | action_size_period = len(action_index) 131 | if np.random.random() < self.epsilon: 132 | action_sel = np.random.choice(action_size_period) 133 | return action_index[action_sel] 134 | 135 | state = Variable(torch.from_numpy(state.astype(float))).float() 136 | if torch.cuda.is_available(): 137 | state = state.cuda() 138 | 139 | q_out = self.model(state) 140 | action = action_index[0] 141 | for i in range(1, action_size_period): 142 | if q_out[0][action_index[i]] > q_out[0][action]: 143 | action = action_index[i] 144 | return action 145 | 146 | def forward(self): 147 | batch_memory = self.memory 148 | state = batch_memory[:, :self.state_size] 149 | action = batch_memory[:, self.state_size].astype(int) 150 | reward = batch_memory[:, self.state_size + 1] 151 | next_state = batch_memory[:, -self.state_size:] 152 | 153 | q_eval = self.model.forward(state) 154 | #q_next = self.target_model.forward(state) 155 | 156 | q_target = reward + self.gamma * torch.max(q_eval, axis=1) 157 | return (q_eval, q_target) 158 | 159 | def store_transition(self, s, a, r, s_): # s_: next_state 160 | if not hasattr(self, 'memory_couter'): 161 | self.memory_couter = 0 162 | transition = np.concatenate((s, [a, r], s_)) 163 | index = self.memory_couter % self.memory_size 164 | 165 | self.memory[index, :] = transition 166 | self.memory_couter += 1 167 | 168 | def pretrain_learn(self, state): 169 | state = state[np.newaxis, :] 170 | init_value = 0.5 / (1 - self.gamma) 171 | q_target = np.ones(3) * init_value 172 | q_target = q_target[np.newaxis, :] 173 | 174 | train(self.model, 175 | state, 176 | q_target, 177 | self.learning_rate, 178 | self.batch_size, 179 | epochs=1, 180 | verbose=0) 181 | 182 | def repalce_target_parameters(self): 183 | model_state_dict = self.model.state_dict() 184 | self.target_model.load_state_dict(model_state_dict) 185 | 186 | def learn(self): 187 | # check to update target netowrk parameters 188 | # if self.learn_step_counter % self.replace_target_iter == 0: 189 | # self.repalce_target_parameters() # iterative target model 190 | self.learn_step_counter += 1 191 | 192 | # sample batch memory from all memory 193 | if self.memory_couter > self.memory_size: 194 | sample_index = np.random.choice(self.memory_size, 195 | size=self.batch_size) 196 | else: 197 | sample_index = np.random.choice(self.memory_couter, 198 | size=self.batch_size) 199 | batch_memory = self.memory[sample_index, :] 200 | 201 | # batch memory row: [s, a, r1, r2, s_] 202 | # number of batch memory: batch size 203 | # extract state, action, reward, reward2, next_state from batch memory 204 | state = batch_memory[:, :self.state_size] 205 | action = batch_memory[:, self.state_size].astype(int) # float -> int 206 | reward = batch_memory[:, self.state_size + 1] 207 | next_state = batch_memory[:, -self.state_size:] 208 | 209 | if torch.cuda.is_available(): 210 | state = Variable(torch.from_numpy(state.astype(int))).float().cuda() 211 | next_state = Variable(torch.from_numpy( 212 | next_state.astype(int))).float().cuda() 213 | else: 214 | state = Variable(torch.from_numpy(state.astype(int))).float() 215 | next_state = Variable(torch.from_numpy( 216 | next_state.astype(int))).float() 217 | 218 | q_eval = self.model(state) # state 219 | q_next = self.model(next_state) # next state 220 | # q_target = q_eval.cpu().detach().numpy() 221 | q_target = q_eval.clone() 222 | 223 | batch_index = np.arange(self.batch_size, dtype=np.int32) 224 | if torch.cuda.is_available(): 225 | q_target[batch_index, action] = torch.from_numpy(reward).float()\ 226 | .cuda() + self.gamma * torch.max(q_next, axis=1)[0].float() 227 | else: 228 | q_target[batch_index, action] = torch.from_numpy(reward).float()\ 229 | + self.gamma * torch.max(q_next, axis=1)[0].float() 230 | 231 | train(self.model, 232 | state, 233 | q_target, 234 | self.learning_rate, 235 | self.batch_size, 236 | self.loss_val, 237 | epochs=1, 238 | verbose=0) 239 | 240 | def save_model(self, fn): 241 | print("==> saving model") 242 | torch.save({"model_state_dict": self.model.state_dict()}, fn) 243 | --------------------------------------------------------------------------------