├── .gitignore ├── Instructions.pdf ├── README.md ├── config.ini ├── ddpg.py ├── ddpg_cnn.py ├── env_test.py ├── mpsched.c ├── mptcp_recv ├── info.c ├── recv.py └── setup.py ├── mptcp_results ├── cal ├── cal_throughput.c ├── eth ├── mptcplog.c ├── tcpdump.sh ├── w0 ├── w1 └── w2 ├── naf.py ├── naf_cnn.py ├── normalized_actions.py ├── old_main.py ├── ounoise.py ├── out_log_0325 ├── replay_memory.py ├── setup.py ├── tc.sh ├── train.py ├── train_2.py └── train_test.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | t2.tar.gz 3 | *.pcap 4 | *.txt 5 | *.so 6 | *.o 7 | *.dat 8 | -------------------------------------------------------------------------------- /Instructions.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kallen666/MPTCP-Deep-Reinforcement-Learning/6fa1bf8eb8d39527c8bc91148730ba4120093523/Instructions.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | state:\ 2 | 一个时间片内获取的状态:[[吞吐量1、RTT1、未确认数1、重传次数1]、[吞吐量2、RTT2、未确认数2、重传次数2]]\ 3 | 一个state包含k个时间片,例如k=4:\ 4 | [[[210, 4841, 20, 13], [114, 3158, 1, 14]], [[183, 4063, 5, 13], [138, 733, 0, 14]], [[246, 4519, 2, 13], [133, 718, 0, 14]], [[248, 3465, 10, 13], [136, 3040, 1, 14]]] 5 | 6 | 7 | \ 8 | reward:\ 9 | l\*各子流吞吐量之和(k个时间片) - m\*最近一次各子流rtt之和 - n\*最近一次各子流未确认数 - p\*在k各时间片内的重传次数 10 | -------------------------------------------------------------------------------- /config.ini: -------------------------------------------------------------------------------- 1 | [server] 2 | port:9000 3 | ip=114.212.80.16 4 | 5 | [file] 6 | file=./256mb.dat 7 | 8 | [env] 9 | buffer_size=1024 10 | time=1 11 | episode=100 12 | -------------------------------------------------------------------------------- /ddpg.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.optim import Adam 6 | from torch.autograd import Variable 7 | import torch.nn.functional as F 8 | 9 | MSELoss = nn.MSELoss() 10 | 11 | def soft_update(target, source, tau): 12 | for target_param, param in zip(target.parameters(), source.parameters()): 13 | target_param.data.copy_(target_param.data * (1.0 - tau) + param.data * tau) 14 | 15 | def hard_update(target, source): 16 | for target_param, param in zip(target.parameters(), source.parameters()): 17 | target_param.data.copy_(param.data) 18 | 19 | 20 | class Actor(nn.Module): 21 | 22 | def __init__(self, hidden_size, num_inputs, action_space): 23 | super(Actor, self).__init__() 24 | self.action_space = action_space 25 | num_outputs = action_space.shape[0] 26 | 27 | self.bn0 = nn.BatchNorm1d(num_inputs) 28 | self.bn0.weight.data.fill_(1) 29 | self.bn0.bias.data.fill_(0) 30 | 31 | self.linear1 = nn.Linear(num_inputs, hidden_size) 32 | self.bn1 = nn.BatchNorm1d(hidden_size) 33 | self.bn1.weight.data.fill_(1) 34 | self.bn1.bias.data.fill_(0) 35 | 36 | self.linear2 = nn.Linear(hidden_size, hidden_size) 37 | self.bn2 = nn.BatchNorm1d(hidden_size) 38 | self.bn2.weight.data.fill_(1) 39 | self.bn2.bias.data.fill_(0) 40 | 41 | self.mu = nn.Linear(hidden_size, num_outputs) 42 | self.mu.weight.data.mul_(0.1) 43 | self.mu.bias.data.mul_(0.1) 44 | 45 | 46 | def forward(self, inputs): 47 | x = inputs 48 | x = self.bn0(x) 49 | x = F.tanh(self.linear1(x)) 50 | x = F.tanh(self.linear2(x)) 51 | 52 | mu = F.tanh(self.mu(x)) 53 | return mu 54 | 55 | 56 | class Critic(nn.Module): 57 | 58 | def __init__(self, hidden_size, num_inputs, action_space): 59 | super(Critic, self).__init__() 60 | self.action_space = action_space 61 | num_outputs = action_space.shape[0] 62 | self.bn0 = nn.BatchNorm1d(num_inputs) 63 | self.bn0.weight.data.fill_(1) 64 | self.bn0.bias.data.fill_(0) 65 | 66 | self.linear1 = nn.Linear(num_inputs, hidden_size) 67 | self.bn1 = nn.BatchNorm1d(hidden_size) 68 | self.bn1.weight.data.fill_(1) 69 | self.bn1.bias.data.fill_(0) 70 | 71 | self.linear_action = nn.Linear(num_outputs, hidden_size) 72 | self.bn_a = nn.BatchNorm1d(hidden_size) 73 | self.bn_a.weight.data.fill_(1) 74 | self.bn_a.bias.data.fill_(0) 75 | 76 | self.linear2 = nn.Linear(hidden_size + hidden_size, hidden_size) 77 | self.bn2 = nn.BatchNorm1d(hidden_size) 78 | self.bn2.weight.data.fill_(1) 79 | self.bn2.bias.data.fill_(0) 80 | 81 | self.V = nn.Linear(hidden_size, 1) 82 | self.V.weight.data.mul_(0.1) 83 | self.V.bias.data.mul_(0.1) 84 | 85 | def forward(self, inputs, actions): 86 | x = inputs 87 | x = self.bn0(x) 88 | x = F.tanh(self.linear1(x)) 89 | a = F.tanh(self.linear_action(actions)) 90 | x = torch.cat((x, a), 1) 91 | x = F.tanh(self.linear2(x)) 92 | 93 | V = self.V(x) 94 | return V 95 | 96 | 97 | class DDPG(object): 98 | def __init__(self, gamma, tau, hidden_size, num_inputs, action_space): 99 | 100 | self.num_inputs = num_inputs 101 | self.action_space = action_space 102 | 103 | self.actor = Actor(hidden_size, self.num_inputs, self.action_space) 104 | self.actor_target = Actor(hidden_size, self.num_inputs, self.action_space) 105 | self.actor_optim = Adam(self.actor.parameters(), lr=1e-4) 106 | 107 | self.critic = Critic(hidden_size, self.num_inputs, self.action_space) 108 | self.critic_target = Critic(hidden_size, self.num_inputs, self.action_space) 109 | self.critic_optim = Adam(self.critic.parameters(), lr=1e-3) 110 | 111 | self.gamma = gamma 112 | self.tau = tau 113 | 114 | hard_update(self.actor_target, self.actor) # Make sure target is with the same weight 115 | hard_update(self.critic_target, self.critic) 116 | 117 | 118 | def select_action(self, state, exploration=None): 119 | self.actor.eval() 120 | mu = self.actor((Variable(state, volatile=True))) 121 | self.actor.train() 122 | mu = mu.data 123 | if exploration is not None: 124 | mu += torch.Tensor(exploration.noise()) 125 | 126 | return mu.clamp(0, 4) 127 | 128 | 129 | def update_parameters(self, batch): 130 | state_batch = Variable(torch.cat(batch.state)) 131 | next_state_batch = Variable(torch.cat(batch.next_state), volatile=True) 132 | action_batch = Variable(torch.cat(batch.action)) 133 | reward_batch = Variable(torch.cat(batch.reward)) 134 | mask_batch = Variable(torch.cat(batch.mask)) 135 | 136 | next_action_batch = self.actor_target(next_state_batch) 137 | next_state_action_values = self.critic_target(next_state_batch, next_action_batch) 138 | 139 | reward_batch = torch.unsqueeze(reward_batch, 1) 140 | expected_state_action_batch = reward_batch + (self.gamma * next_state_action_values) 141 | 142 | self.critic_optim.zero_grad() 143 | 144 | state_action_batch = self.critic((state_batch), (action_batch)) 145 | 146 | value_loss = MSELoss(state_action_batch, expected_state_action_batch) 147 | value_loss.backward() 148 | self.critic_optim.step() 149 | 150 | self.actor_optim.zero_grad() 151 | 152 | policy_loss = -self.critic((state_batch),self.actor((state_batch))) 153 | 154 | policy_loss = policy_loss.mean() 155 | policy_loss.backward() 156 | self.actor_optim.step() 157 | 158 | soft_update(self.actor_target, self.actor, self.tau) 159 | soft_update(self.critic_target, self.critic, self.tau) 160 | -------------------------------------------------------------------------------- /ddpg_cnn.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.optim import Adam 6 | from torch.autograd import Variable 7 | import torch.nn.functional as F 8 | 9 | MSELoss = nn.MSELoss() 10 | 11 | def soft_update(target, source, tau): 12 | for target_param, param in zip(target.parameters(), source.parameters()): 13 | target_param.data.copy_(target_param.data * (1.0 - tau) + param.data * tau) 14 | 15 | def hard_update(target, source): 16 | for target_param, param in zip(target.parameters(), source.parameters()): 17 | target_param.data.copy_(param.data) 18 | 19 | 20 | class Actor(nn.Module): 21 | 22 | def __init__(self, hidden_size, num_inputs, action_space): 23 | super(Actor, self).__init__() 24 | self.action_space = action_space 25 | num_outputs = action_space.shape[0] 26 | 27 | self.conv1 = nn.Sequential( 28 | nn.Conv1d( 29 | in_channels=2, 30 | out_channels=16, 31 | kernel_size=4, 32 | stride=1, 33 | padding=1, 34 | ), 35 | nn.ReLU(), 36 | nn.MaxPool1d(kernel_size=2), 37 | ) 38 | self.out1 = nn.Linear(48, 16) 39 | 40 | self.conv2 = nn.Sequential( 41 | nn.Conv1d( 42 | in_channels=2, 43 | out_channels=16, 44 | kernel_size=4, 45 | stride=1, 46 | padding=1, 47 | ), 48 | nn.ReLU(), 49 | nn.MaxPool1d(kernel_size=2), 50 | ) 51 | self.out2 = nn.Linear(48, 16) 52 | 53 | 54 | 55 | self.bn0 = nn.BatchNorm1d(num_inputs) 56 | self.bn0.weight.data.fill_(1) 57 | self.bn0.bias.data.fill_(0) 58 | 59 | self.linear1 = nn.Linear(num_inputs, hidden_size) 60 | self.bn1 = nn.BatchNorm1d(hidden_size) 61 | self.bn1.weight.data.fill_(1) 62 | self.bn1.bias.data.fill_(0) 63 | 64 | self.linear2 = nn.Linear(hidden_size, hidden_size) 65 | self.bn2 = nn.BatchNorm1d(hidden_size) 66 | self.bn2.weight.data.fill_(1) 67 | self.bn2.bias.data.fill_(0) 68 | 69 | self.mu = nn.Linear(hidden_size, num_outputs) 70 | self.mu.weight.data.mul_(0.1) 71 | self.mu.bias.data.mul_(0.1) 72 | 73 | 74 | def forward(self, inputs): 75 | 76 | cnn1 = inputs[:, 0:8].contiguous() 77 | cnn1 = cnn1.view(1,2,8) 78 | cnn1 = self.conv1(cnn1) 79 | cnn1 = cnn1.view(cnn1.size(0), -1) 80 | cnn1 = self.out1(cnn1) 81 | cnn1 = cnn1.view(2,8) 82 | 83 | cnn2 = inputs[:, 8:16].contiguous() 84 | cnn2 = cnn2.view(1,2,8) 85 | cnn2 = self.conv2(cnn2) 86 | cnn2 = cnn2.view(cnn2.size(0), -1) 87 | cnn2 = self.out2(cnn2) 88 | cnn2 = cnn2.view(2,8) 89 | 90 | x = torch.cat((cnn1,cnn2), 1) 91 | x = torch.cat((x,inputs[:,16:]), 1) 92 | 93 | 94 | x = self.bn0(x) 95 | x = F.tanh(self.linear1(x)) 96 | x = F.tanh(self.linear2(x)) 97 | 98 | mu = F.tanh(self.mu(x)) 99 | return mu 100 | 101 | 102 | class Critic(nn.Module): 103 | 104 | def __init__(self, hidden_size, num_inputs, action_space): 105 | super(Critic, self).__init__() 106 | self.action_space = action_space 107 | num_outputs = action_space.shape[0] 108 | 109 | self.conv1 = nn.Sequential( 110 | nn.Conv1d( 111 | in_channels=2, 112 | out_channels=16, 113 | kernel_size=4, 114 | stride=1, 115 | padding=1, 116 | ), 117 | nn.ReLU(), 118 | nn.MaxPool1d(kernel_size=2), 119 | ) 120 | self.out1 = nn.Linear(48, 16) 121 | 122 | self.conv2 = nn.Sequential( 123 | nn.Conv1d( 124 | in_channels=2, 125 | out_channels=16, 126 | kernel_size=4, 127 | stride=1, 128 | padding=1, 129 | ), 130 | nn.ReLU(), 131 | nn.MaxPool1d(kernel_size=2), 132 | ) 133 | self.out2 = nn.Linear(48, 16) 134 | 135 | self.bn0 = nn.BatchNorm1d(num_inputs) 136 | self.bn0.weight.data.fill_(1) 137 | self.bn0.bias.data.fill_(0) 138 | 139 | self.linear1 = nn.Linear(num_inputs, hidden_size) 140 | self.bn1 = nn.BatchNorm1d(hidden_size) 141 | self.bn1.weight.data.fill_(1) 142 | self.bn1.bias.data.fill_(0) 143 | 144 | self.linear_action = nn.Linear(num_outputs, hidden_size) 145 | self.bn_a = nn.BatchNorm1d(hidden_size) 146 | self.bn_a.weight.data.fill_(1) 147 | self.bn_a.bias.data.fill_(0) 148 | 149 | self.linear2 = nn.Linear(hidden_size + hidden_size, hidden_size) 150 | self.bn2 = nn.BatchNorm1d(hidden_size) 151 | self.bn2.weight.data.fill_(1) 152 | self.bn2.bias.data.fill_(0) 153 | 154 | self.V = nn.Linear(hidden_size, 1) 155 | self.V.weight.data.mul_(0.1) 156 | self.V.bias.data.mul_(0.1) 157 | 158 | def forward(self, inputs, actions): 159 | cnn1 = inputs[:, 0:8].contiguous() 160 | cnn1 = cnn1.view(1,2,8) 161 | cnn1 = self.conv1(cnn1) 162 | cnn1 = cnn1.view(cnn1.size(0), -1) 163 | cnn1 = self.out1(cnn1) 164 | cnn1 = cnn1.view(2,8) 165 | 166 | cnn2 = inputs[:, 8:16].contiguous() 167 | cnn2 = cnn2.view(1,2,8) 168 | cnn2 = self.conv2(cnn2) 169 | cnn2 = cnn2.view(cnn2.size(0), -1) 170 | cnn2 = self.out2(cnn2) 171 | cnn2 = cnn2.view(2,8) 172 | 173 | x = torch.cat((cnn1,cnn2), 1) 174 | x = torch.cat((x,inputs[:,16:]), 1) 175 | 176 | x = self.bn0(x) 177 | x = F.tanh(self.linear1(x)) 178 | a = F.tanh(self.linear_action(actions)) 179 | x = torch.cat((x, a), 1) 180 | x = F.tanh(self.linear2(x)) 181 | 182 | V = self.V(x) 183 | return V 184 | 185 | 186 | class DDPG_CNN(object): 187 | def __init__(self, gamma, tau, hidden_size, num_inputs, action_space): 188 | 189 | self.num_inputs = num_inputs 190 | self.action_space = action_space 191 | 192 | self.actor = Actor(hidden_size, self.num_inputs, self.action_space) 193 | self.actor_target = Actor(hidden_size, self.num_inputs, self.action_space) 194 | self.actor_optim = Adam(self.actor.parameters(), lr=1e-4) 195 | 196 | self.critic = Critic(hidden_size, self.num_inputs, self.action_space) 197 | self.critic_target = Critic(hidden_size, self.num_inputs, self.action_space) 198 | self.critic_optim = Adam(self.critic.parameters(), lr=1e-3) 199 | 200 | self.gamma = gamma 201 | self.tau = tau 202 | 203 | hard_update(self.actor_target, self.actor) # Make sure target is with the same weight 204 | hard_update(self.critic_target, self.critic) 205 | 206 | 207 | def select_action(self, state, exploration=None): 208 | self.actor.eval() 209 | mu = self.actor((Variable(state, volatile=True))) 210 | self.actor.train() 211 | mu = mu.data 212 | if exploration is not None: 213 | mu += torch.Tensor(exploration.noise()) 214 | 215 | return mu.clamp(0, 4) 216 | 217 | 218 | def update_parameters(self, batch): 219 | state_batch = Variable(torch.cat(batch.state)) 220 | next_state_batch = Variable(torch.cat(batch.next_state), volatile=True) 221 | action_batch = Variable(torch.cat(batch.action)) 222 | reward_batch = Variable(torch.cat(batch.reward)) 223 | mask_batch = Variable(torch.cat(batch.mask)) 224 | 225 | next_action_batch = self.actor_target(next_state_batch) 226 | next_state_action_values = self.critic_target(next_state_batch, next_action_batch) 227 | 228 | reward_batch = torch.unsqueeze(reward_batch, 1) 229 | expected_state_action_batch = reward_batch + (self.gamma * next_state_action_values) 230 | 231 | self.critic_optim.zero_grad() 232 | 233 | state_action_batch = self.critic((state_batch), (action_batch)) 234 | 235 | value_loss = MSELoss(state_action_batch, expected_state_action_batch) 236 | value_loss.backward() 237 | self.critic_optim.step() 238 | 239 | self.actor_optim.zero_grad() 240 | 241 | policy_loss = -self.critic((state_batch),self.actor((state_batch))) 242 | 243 | policy_loss = policy_loss.mean() 244 | policy_loss.backward() 245 | self.actor_optim.step() 246 | 247 | soft_update(self.actor_target, self.actor, self.tau) 248 | soft_update(self.critic_target, self.critic, self.tau) 249 | -------------------------------------------------------------------------------- /env_test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import gym 3 | import numpy as np 4 | from gym import wrappers 5 | from gym import spaces 6 | 7 | import torch 8 | from ddpg_cnn import DDPG_CNN 9 | from naf_cnn import NAF_CNN 10 | from normalized_actions import NormalizedActions 11 | from ounoise import OUNoise 12 | from replay_memory import ReplayMemory, Transition 13 | 14 | 15 | 16 | class env(): 17 | """ """ 18 | def __init__(self): 19 | self.observation_space = spaces.Box(np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]), np.array([float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf")])) 20 | 21 | self.action_space = spaces.Box(np.array([1]), np.array([4])) 22 | 23 | 24 | 25 | def reward(self): 26 | rewards = 0; 27 | for i in range(self.k): 28 | temp = self.list[i] 29 | for j in range(len(temp)): 30 | rewards = rewards + self.l * temp[j][0] 31 | temp = self.list[-1] 32 | for j in range(len(temp)): 33 | rewards = rewards - self.m*temp[j][1] - self.n * temp[j][2] - self.p * (temp[j][3] - self.list[0][j][3]) 34 | return rewards 35 | 36 | """ reset env, return the initial state """ 37 | def reset(self): 38 | state = np.array([[1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8,1,2],[1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8,1,2]]) 39 | state = torch.FloatTensor(state) 40 | return state 41 | 42 | """ action = [sub1_buff_size, sub2_buff_size] """ 43 | def step(self, action): 44 | state = np.array([[1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8,1,2],[1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8,1,2]]) 45 | state = torch.FloatTensor(state) 46 | reward = 1 47 | done = 0 48 | return state, reward, done 49 | 50 | 51 | def main(): 52 | my_env = env() 53 | 54 | agent = NAF_CNN(0.99, 0.001, 128, 55 | my_env.observation_space.shape[0], my_env.action_space) 56 | 57 | parser = argparse.ArgumentParser(description='PyTorch REINFORCE example') 58 | parser.add_argument('--noise_scale', type=float, default=0.3, metavar='G', 59 | help='initial noise scale (default: 0.3)') 60 | parser.add_argument('--final_noise_scale', type=float, default=0.3, metavar='G', 61 | help='final noise scale (default: 0.3)') 62 | parser.add_argument('--exploration_end', type=int, default=100, metavar='N', 63 | help='number of episodes with noise (default: 100)') 64 | args = parser.parse_args() 65 | 66 | ounoise = OUNoise(my_env.action_space.shape[0]) 67 | ounoise.scale = (args.noise_scale - args.final_noise_scale) * max(0, args.exploration_end - 1) / args.exploration_end + args.final_noise_scale 68 | ounoise.reset() 69 | 70 | state = my_env.reset() 71 | i = 10 72 | while i>0: 73 | action = agent.select_action(state, ounoise) 74 | print("action: {}".format(action)) 75 | next_state, reward, done = my_env.step(action) 76 | if done: 77 | break 78 | print(reward) 79 | i = i-1 80 | 81 | 82 | 83 | 84 | if __name__ == '__main__': 85 | main() 86 | -------------------------------------------------------------------------------- /mpsched.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | static PyObject* persist_state(PyObject* self, PyObject* args) 5 | { 6 | int fd; 7 | if(!PyArg_ParseTuple(args, "i", &fd)) { 8 | return NULL; 9 | } 10 | int val = MPTCP_INFO_FLAG_SAVE_MASTER; 11 | setsockopt(fd, SOL_TCP, MPTCP_INFO, &val, sizeof(val)); 12 | return Py_BuildValue("i", fd); 13 | } 14 | 15 | static PyObject* get_meta_info(PyObject* self, PyObject* args) 16 | { 17 | int fd; 18 | if(!PyArg_ParseTuple(args, "i", &fd)) { 19 | return NULL; 20 | } 21 | 22 | struct mptcp_info minfo; 23 | struct mptcp_meta_info meta_info; 24 | struct tcp_info initial; 25 | struct tcp_info others[NUM_SUBFLOWS]; 26 | struct mptcp_sub_info others_info[NUM_SUBFLOWS]; 27 | 28 | minfo.tcp_info_len = sizeof(struct tcp_info); 29 | minfo.sub_len = sizeof(others); 30 | minfo.meta_len = sizeof(struct mptcp_meta_info); 31 | minfo.meta_info = &meta_info; 32 | minfo.initial = &initial; 33 | minfo.subflows = &others; 34 | minfo.sub_info_len = sizeof(struct mptcp_sub_info); 35 | minfo.total_sub_info_len = sizeof(others_info); 36 | minfo.subflow_info = &others_info; 37 | 38 | socklen_t len = sizeof(minfo); 39 | 40 | getsockopt(fd, SOL_TCP, MPTCP_INFO, &minfo, &len); 41 | PyObject *list = PyList_New(0); 42 | PyList_Append(list, Py_BuildValue("I", meta_info.mptcpi_unacked)); 43 | PyList_Append(list, Py_BuildValue("I", meta_info.mptcpi_retransmits)); 44 | return list; 45 | } 46 | 47 | static PyObject* get_sub_info(PyObject* self, PyObject* args) 48 | { 49 | int fd; 50 | if(!PyArg_ParseTuple(args, "i", &fd)) { 51 | return NULL; 52 | } 53 | 54 | struct mptcp_info minfo; 55 | struct mptcp_meta_info meta_info; 56 | struct tcp_info initial; 57 | struct tcp_info others[NUM_SUBFLOWS]; 58 | struct mptcp_sub_info others_info[NUM_SUBFLOWS]; 59 | 60 | minfo.tcp_info_len = sizeof(struct tcp_info); 61 | minfo.sub_len = sizeof(others); 62 | minfo.meta_len = sizeof(struct mptcp_meta_info); 63 | minfo.meta_info = &meta_info; 64 | minfo.initial = &initial; 65 | minfo.subflows = &others; 66 | minfo.sub_info_len = sizeof(struct mptcp_sub_info); 67 | minfo.total_sub_info_len = sizeof(others_info); 68 | minfo.subflow_info = &others_info; 69 | 70 | socklen_t len = sizeof(minfo); 71 | 72 | getsockopt(fd, SOL_TCP, MPTCP_INFO, &minfo, &len); 73 | 74 | PyObject *list = PyList_New(0); 75 | int i; 76 | for(i=0; i < NUM_SUBFLOWS; i++){ 77 | 78 | if(others[i].tcpi_state != 1) 79 | break; 80 | 81 | PyObject *subflows = PyList_New(0); 82 | PyList_Append(subflows, Py_BuildValue("I", others[i].tcpi_segs_out)); 83 | PyList_Append(subflows, Py_BuildValue("I", others[i].tcpi_rtt)); 84 | PyList_Append(subflows, Py_BuildValue("I", others[i].tcpi_snd_cwnd)); 85 | //PyList_Append(subflows, Py_BuildValue("I", others[i].tcpi_unacked)); 86 | //PyList_Append(subflows, Py_BuildValue("I", others[i].tcpi_total_retrans)); /* Packets which are "in flight" */ 87 | 88 | PyList_Append(list, subflows); 89 | } 90 | return list; 91 | } 92 | 93 | 94 | static PyObject* set_seg(PyObject* self, PyObject* args) 95 | { 96 | PyObject * listObj; 97 | if (! PyArg_ParseTuple( args, "O", &listObj )) 98 | return NULL; 99 | 100 | long length = PyList_Size(listObj); 101 | int fd = (int)PyLong_AsLong(PyList_GetItem(listObj, 0)); 102 | int i; 103 | 104 | struct mptcp_sched_info sched_info; 105 | sched_info.len = length-1; 106 | unsigned char quota[NUM_SUBFLOWS]; 107 | unsigned char segments[NUM_SUBFLOWS]; 108 | 109 | sched_info.quota = "a; 110 | sched_info.num_segments = &segments; 111 | 112 | for(i=1; i 2 | #include 3 | 4 | static PyObject* persist_state(PyObject* self, PyObject* args) 5 | { 6 | int fd; 7 | if(!PyArg_ParseTuple(args, "i", &fd)) { 8 | return NULL; 9 | } 10 | int val = MPTCP_INFO_FLAG_SAVE_MASTER; 11 | setsockopt(fd, SOL_TCP, MPTCP_INFO, &val, sizeof(val)); 12 | return Py_BuildValue("i", fd); 13 | } 14 | 15 | static PyObject* get_info(PyObject* self, PyObject* args) 16 | { 17 | int fd; 18 | if(!PyArg_ParseTuple(args, "i", &fd)) { 19 | return NULL; 20 | } 21 | 22 | struct mptcp_info minfo; 23 | struct mptcp_meta_info meta_info; 24 | struct tcp_info initial; 25 | struct tcp_info others[NUM_SUBFLOWS]; 26 | struct mptcp_sub_info others_info[NUM_SUBFLOWS]; 27 | 28 | minfo.tcp_info_len = sizeof(struct tcp_info); 29 | minfo.sub_len = sizeof(others); 30 | minfo.meta_len = sizeof(struct mptcp_meta_info); 31 | minfo.meta_info = &meta_info; 32 | minfo.initial = &initial; 33 | minfo.subflows = &others; 34 | minfo.sub_info_len = sizeof(struct mptcp_sub_info); 35 | minfo.total_sub_info_len = sizeof(others_info); 36 | minfo.subflow_info = &others_info; 37 | 38 | socklen_t len = sizeof(minfo); 39 | 40 | getsockopt(fd, SOL_TCP, MPTCP_INFO, &minfo, &len); 41 | 42 | PyObject *list = PyList_New(0); 43 | if(others[0].tcpi_state == 1) 44 | { 45 | PyList_Append(list, Py_BuildValue("I", others[0].tcpi_bytes_received)); 46 | PyList_Append(list, Py_BuildValue("I", others[1].tcpi_bytes_received)); 47 | PyList_Append(list, Py_BuildValue("I", meta_info.mptcpi_recv_ofo_buff)); 48 | } 49 | return list; 50 | } 51 | 52 | 53 | static PyMethodDef Methods[] = { 54 | {"persist_state", persist_state, METH_VARARGS, "persist mptcp subflows tate"}, 55 | {"get_info", get_info, METH_VARARGS, "get recv info"}, 56 | {NULL, NULL, 0, NULL} 57 | }; 58 | 59 | static struct PyModuleDef Def = { 60 | PyModuleDef_HEAD_INIT, 61 | "info", 62 | "get recv info", 63 | -1, 64 | Methods 65 | }; 66 | 67 | PyMODINIT_FUNC PyInit_info(void) 68 | { 69 | return PyModule_Create(&Def); 70 | } 71 | -------------------------------------------------------------------------------- /mptcp_recv/recv.py: -------------------------------------------------------------------------------- 1 | import socket 2 | import threading 3 | import info 4 | import time 5 | 6 | 7 | class recv_thread(threading.Thread): 8 | 9 | def __init__(self, sock, buff_size=2048): 10 | threading.Thread.__init__(self) 11 | self.sock = sock 12 | self.buffer_size = buff_size 13 | 14 | def run(self): 15 | buff = self.sock.recv(self.buffer_size) 16 | filename = str(buff, encoding='utf8') 17 | fp = open(filename, 'wb') 18 | if not fp: 19 | print("open file error.\n") 20 | self.sock.send(bytes("open file error.", encoding='utf8')) 21 | pass 22 | else: 23 | self.sock.send(bytes("ok", encoding='utf8')) 24 | while(True): 25 | buff = self.sock.recv(self.buffer_size) 26 | if not buff: 27 | break 28 | else: 29 | fp.write(buff) 30 | print("recieve file {} from sender finished.".format(filename)) 31 | fp.close() 32 | 33 | 34 | class record(object): 35 | """docstring for record.""" 36 | def __init__(self, timestep=0.2, datafile="record"): 37 | self.data = [] 38 | self.timestep = timestep 39 | self.datafile = datafile 40 | 41 | def save(self): 42 | lenth = len(self.data) 43 | with open(self.datafile, 'w') as f: 44 | f.write(str(self.timestep)) 45 | f.write('\n') 46 | f.write(str(lenth)) 47 | f.write('\n') 48 | for i in range(lenth): 49 | f.write('%d %d %d\n' % (self.data[i][0], self.data[i][1], self.data[i][2])) 50 | f.close() 51 | 52 | def load(self, datafile): 53 | self.datafile = datafile 54 | try: 55 | f = open(datafile, 'r') 56 | self.timestep = float(f.readline()) 57 | lenth = int(f.readline()) 58 | for i in range(lenth): 59 | s = f.readline().split(' ') 60 | self.data.append([int(s[0]), int(s[1]), int(s[2])]) 61 | finally: 62 | if f: 63 | f.close() 64 | 65 | def put(self, recd): 66 | self.data.append(recd) 67 | 68 | def draw(self): 69 | pass 70 | 71 | 72 | def main(): 73 | server = socket.socket() 74 | host = '*' 75 | port = 6669 76 | server.bind((host, port)) 77 | 78 | server.listen(1) 79 | num = 0; 80 | while True: 81 | c, addr = server.accept() 82 | print('connect addr : {}'.format(addr)) 83 | fd = c.fileno() 84 | io = recv_thread(c) 85 | info.persist_state(fd) 86 | io.start() 87 | 88 | timestep = 0.2 89 | r = record(timestep=timestep, datafile="record{}".format(num)) 90 | time.sleep(1) 91 | while True: 92 | time.sleep(timestep) 93 | data = info.get_info(fd) 94 | if len(data) == 0: 95 | io.join() 96 | break 97 | r.put(data) 98 | 99 | r.save() 100 | num = num + 1 101 | 102 | 103 | if __name__ == '__main__': 104 | main() 105 | -------------------------------------------------------------------------------- /mptcp_recv/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup, Extension 2 | 3 | setup(name='info', 4 | ext_modules=[ 5 | Extension('info', 6 | ['info.c'], 7 | include_dirs=['/usr/src/linux-headers-4.4.110-mptcp+/include/uapi', '/usr/src/linux-headers-4.4.110-mptcp+/include', '/usr/include/python3.5m'], 8 | define_macros=[('NUM_SUBFLOWS', '2'), ('SOL_TCP', '6')] 9 | ) 10 | ]) 11 | -------------------------------------------------------------------------------- /mptcp_results/cal: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kallen666/MPTCP-Deep-Reinforcement-Learning/6fa1bf8eb8d39527c8bc91148730ba4120093523/mptcp_results/cal -------------------------------------------------------------------------------- /mptcp_results/cal_throughput.c: -------------------------------------------------------------------------------- 1 | /* 2 | * README 3 | * 该文件用于计算指定单位时间内,多个tcp数据字段的长度和,即吞吐量 4 | * 输入文件为 mptcplog.c程序的输出文件 5 | * 输入文件名后缀需为:_out.txt, 例如:s_2_2.cap_port0_59607_out.txt 6 | * 程序会自动遍历所在当前目录下具有该后缀的所有文件,并处理输出相应文件 7 | * 8 | * 9 | ***输入数据格式*** 10 | * 第一列为时间戳,第二列为tcp数据字段长度, 两列之间为Tab键,即'\t' 11 | * 例如: 12 | //时间戳 数据长度 13 | 1436339533.121812 8 14 | 1436339533.121852 8 15 | 1436339533.127700 8 16 | 1436339533.137741 8 17 | 1436339533.137794 8 18 | 1436339533.137803 8 19 | 20 | ... 21 | 22 | ***输出数据格式*** 23 | * 第一列为调整后的单位时间戳,第二列为该单位时间内的数据长度和 24 | * 例如单位时间指定为 0.01秒 (precision==2) 25 | * 那么由以上输入文件的数据得出的输出为: 26 | //单位时间戳 求和后数据长度 27 | 1436339533.12 24 //3*8=24 28 | 1436339533.13 24 //两列之间用'\t'分开 29 | 30 | 31 | 32 | encoding: utf-8 33 | created time: 2015-07-10 34 | 35 | 36 | */ 37 | 38 | 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include //opendir 44 | 45 | 46 | //precision用于指定时间戳的小数点后位数 47 | //例如 precision == 2时, 单位时间为0.01 48 | //更改precision的值,可调节精度 49 | // 0 < precision < 小数部分长度 50 | #define precision 2 51 | 52 | //文件行最大长度 53 | #define MAX_LINE_LEN 1024 54 | 55 | //时间戳最大长度 56 | #define MAX_TIME_LEN 64 57 | 58 | int total_sum = 0; 59 | 60 | void calculate(char* in_file_name, char* out_file_name) 61 | { 62 | FILE* fread = fopen(in_file_name,"r"); 63 | 64 | //w+ 打开可读写文件,若文件存在则文件长度清为零,即该文件内容会消失。若文件不存在则建立该文件。 65 | FILE* fwrite = fopen(out_file_name,"w+"); 66 | 67 | char line[MAX_LINE_LEN]; 68 | 69 | //记录之前时间,整数部分时间和precision部分的小数部分 70 | char pre_time[MAX_TIME_LEN]; 71 | pre_time[0]='\0'; 72 | pre_time[MAX_TIME_LEN-1]='\0'; 73 | 74 | //记录当前时间,整数部分时间和precision部分的小数部分 75 | char current_time[MAX_TIME_LEN]; 76 | current_time[0]='\0'; 77 | current_time[MAX_TIME_LEN-1]='\0'; 78 | 79 | 80 | 81 | 82 | //注意:若数据长度过长,可能超过unsigned int范围 83 | unsigned int current_data_len = 0; 84 | unsigned int sum = 0; 85 | 86 | //时间长度,整数部分 + precision 87 | int time_len = 0; 88 | 89 | while(1) 90 | { 91 | //line_count++; 92 | fgets(line, MAX_LINE_LEN, fread); 93 | if (feof(fread)) 94 | break; 95 | 96 | //printf("line: %s\n", line); 97 | 98 | char* p1 = strchr(line, '.'); 99 | char* p2 = strchr(line, '\t'); 100 | if (p1==NULL || p2==NULL) 101 | { 102 | if (feof(fread)) 103 | break; 104 | 105 | printf("Input data format error! or end of file\n"); 106 | fclose(fread); 107 | fclose(fwrite); 108 | assert(0); 109 | } 110 | 111 | //tcp数据字段长度 112 | current_data_len = atoi(p2+1); 113 | 114 | time_len = p1 + precision - line + 1; 115 | 116 | strncpy(current_time, line, time_len); 117 | current_time[time_len]='\0'; 118 | 119 | //printf ("%s\n",current_time); 120 | 121 | int i=0; 122 | for (i=0; id_name, "_out.txt")) 196 | { 197 | int len = strlen(pDirEntry->d_name); 198 | len = len-8; 199 | strncpy(outfilename,pDirEntry->d_name,len); 200 | outfilename[len]='\0'; 201 | strcat(outfilename,"_throughput.txt"); 202 | 203 | printf("输入文件名:%s\t 输出文件名:%s\n",pDirEntry->d_name, outfilename); 204 | 205 | calculate(pDirEntry->d_name, outfilename); 206 | printf("total data byte: %d\n", total_sum); 207 | printf("------------------------------------\n"); 208 | total_sum = 0; 209 | } 210 | } 211 | closedir(pDir); 212 | 213 | } 214 | 215 | 216 | 217 | return 0; 218 | } 219 | 220 | 221 | 222 | 223 | 224 | -------------------------------------------------------------------------------- /mptcp_results/eth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kallen666/MPTCP-Deep-Reinforcement-Learning/6fa1bf8eb8d39527c8bc91148730ba4120093523/mptcp_results/eth -------------------------------------------------------------------------------- /mptcp_results/mptcplog.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include //opendir 10 | #include 11 | 12 | #define SIZE 15120 13 | 14 | 15 | /* 16 | * readme 17 | * (1) install libpcap 参考: http://blog.csdn.net/qinggebuyao/article/details/7715843 18 | * (2) gcc -o programname filename.c -lpcap 19 | * (3) sudo ./programname 20 | * (4) 如果有libpcap.so.1 链接错误,请建立软链接,参考:http://blog.csdn.net/cfjtaishan/article/details/7096085 21 | * 22 | * 23 | */ 24 | 25 | //最大tcp目标端口数,和mptcp子流数相关 26 | #define MAX_PORT_NUM 20 27 | 28 | //ip address 用于过滤数据包 29 | #define dst_ip "114.212.80.16" 30 | 31 | //#define src_ip "192.168.1.106" //wifi2 32 | //#define src_ip "172.27.152.212" 33 | //#define src_ip "192.168.1.104" //wifi1 34 | //#define src_ip "114.212.85.15" //lte 35 | #define src_ip "114.212.83.34" //eth 36 | 37 | 38 | unsigned short tcp_dst_port_table[MAX_PORT_NUM]; 39 | int port_count = 0; 40 | 41 | //查表/填表 42 | int search_table(unsigned short port) 43 | { 44 | int i=0; 45 | for (i=0; i 0) 101 | { 102 | 103 | if(*(pkt_data+12)==0x08 && *(pkt_data+13)==0x00 && *(unsigned int*)(pkt_data+26)==src && *(unsigned int*)(pkt_data+30)==dst) //MAC type==IP ip.src==client, ip.dst=server 104 | { 105 | 106 | if(*(pkt_data+23)==0x06) // TCP 6 107 | { 108 | tcp_dst_port = *(unsigned short*)(pkt_data+36); 109 | tcp_dst_port = ntohs(tcp_dst_port); 110 | search_table(tcp_dst_port); 111 | } 112 | } 113 | 114 | reval = pcap_next_ex(pcap_handle, &header, (const u_char **)&pkt_data); 115 | 116 | } 117 | 118 | 119 | 120 | return; 121 | } 122 | 123 | //打印tcp时间戳和数据字段长度(区分不同的tcp目的端口) 124 | int get_tcp_info(char *filename) 125 | { 126 | int reval; 127 | struct pcap_pkthdr* header; 128 | u_char *pkt_data; 129 | pcap_t *pcap_handle; 130 | char error_content[PCAP_ERRBUF_SIZE]; 131 | 132 | char tempname[256]; 133 | char dst_port_info[32]; 134 | 135 | FILE *fd; 136 | 137 | int i=0; 138 | 139 | //循环输出多个端口tcp 数据信息到不同的文件中 140 | for (i=0; i 0) 176 | { 177 | 178 | if(*(pkt_data+12)==0x08 && *(pkt_data+13)==0x00 && *(unsigned int*)(pkt_data+26)==src && *(unsigned int*)(pkt_data+30)==dst) //MAC type==IP ip.src==client, ip.dst=server 179 | { 180 | if(*(pkt_data+23)==0x06) // TCP 6 181 | { 182 | tcp_dst_port = *(unsigned short*)(pkt_data+36); 183 | tcp_dst_port = ntohs(tcp_dst_port); 184 | 185 | if(tcp_dst_port == tcp_dst_port_table[i]) 186 | { 187 | double usec = (double)header->ts.tv_usec; 188 | usec /= 1000000; 189 | time_stamp = header->ts.tv_sec + usec; 190 | 191 | //fprintf(fd,"%ld.%ld\t",header->ts.tv_sec,header->ts.tv_usec); // has a bug 192 | 193 | fprintf(fd, "%lf\t", time_stamp); 194 | //total frame len - tcp header len - lens before tcp frame 195 | fprintf(fd,"%d\n",header->len-*(pkt_data+46)/4-34); 196 | //printf("%d, %d\n", header->len, *(pkt_data+46)/4); 197 | // check port 198 | //fprintf(fd,"%d\n",tcp_dst_port); 199 | } 200 | } 201 | } 202 | reval = pcap_next_ex(pcap_handle, &header, (const u_char **)&pkt_data); 203 | } 204 | fclose(fd); 205 | 206 | } 207 | } 208 | 209 | int main(int argc, char *argv[]) 210 | { 211 | 212 | struct dirent *pDirEntry = NULL; 213 | DIR *pDir = NULL; 214 | if( (pDir = opendir("./")) == NULL ) 215 | { 216 | printf("opendir failed!\n"); 217 | return 1; 218 | } 219 | else 220 | { 221 | while( pDirEntry = readdir(pDir) ) 222 | { 223 | //判断是否为指定类型的.cap file 224 | /* 225 | if (pDirEntry->d_name[strlen(pDirEntry->d_name)-4]=='.' 226 | && pDirEntry->d_name[strlen(pDirEntry->d_name)-3]=='c' 227 | && pDirEntry->d_name[strlen(pDirEntry->d_name)-2]=='a' 228 | && pDirEntry->d_name[strlen(pDirEntry->d_name)-1]=='p') 229 | */ 230 | //.pcap files 231 | 232 | if (pDirEntry->d_name[strlen(pDirEntry->d_name)-5]=='.' 233 | &&pDirEntry->d_name[strlen(pDirEntry->d_name)-4]=='p' 234 | && pDirEntry->d_name[strlen(pDirEntry->d_name)-3]=='c' 235 | && pDirEntry->d_name[strlen(pDirEntry->d_name)-2]=='a' 236 | && pDirEntry->d_name[strlen(pDirEntry->d_name)-1]=='p') 237 | { 238 | printf("输入文件:%s\n",pDirEntry->d_name); 239 | 240 | load_table(pDirEntry->d_name); 241 | get_tcp_info(pDirEntry->d_name); 242 | //display_table(); 243 | table_init(); 244 | printf("------------------------------------\n"); 245 | } 246 | } 247 | closedir(pDir); 248 | 249 | } 250 | 251 | 252 | 253 | return 0; 254 | } 255 | -------------------------------------------------------------------------------- /mptcp_results/tcpdump.sh: -------------------------------------------------------------------------------- 1 | sudo tcpdump -i wlp3s0 -w wifi1.pcap & 2 | sudo tcpdump -i wlx485d601fa31c -w wifi2.pcap & 3 | sudo tcpdump -i enp2s0 -w eth.pcap & 4 | -------------------------------------------------------------------------------- /mptcp_results/w0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kallen666/MPTCP-Deep-Reinforcement-Learning/6fa1bf8eb8d39527c8bc91148730ba4120093523/mptcp_results/w0 -------------------------------------------------------------------------------- /mptcp_results/w1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kallen666/MPTCP-Deep-Reinforcement-Learning/6fa1bf8eb8d39527c8bc91148730ba4120093523/mptcp_results/w1 -------------------------------------------------------------------------------- /mptcp_results/w2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kallen666/MPTCP-Deep-Reinforcement-Learning/6fa1bf8eb8d39527c8bc91148730ba4120093523/mptcp_results/w2 -------------------------------------------------------------------------------- /naf.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.optim import Adam 6 | from torch.autograd import Variable 7 | import torch.nn.functional as F 8 | 9 | MSELoss = nn.MSELoss() 10 | 11 | def soft_update(target, source, tau): 12 | for target_param, param in zip(target.parameters(), source.parameters()): 13 | target_param.data.copy_(target_param.data * (1.0 - tau) + param.data * tau) 14 | 15 | def hard_update(target, source): 16 | for target_param, param in zip(target.parameters(), source.parameters()): 17 | target_param.data.copy_(param.data) 18 | 19 | class Policy(nn.Module): 20 | 21 | def __init__(self, hidden_size, num_inputs, action_space): 22 | super(Policy, self).__init__() 23 | self.action_space = action_space 24 | num_outputs = action_space.shape[0] 25 | 26 | self.bn0 = nn.BatchNorm1d(num_inputs) 27 | self.bn0.weight.data.fill_(1) 28 | self.bn0.bias.data.fill_(0) 29 | 30 | self.linear1 = nn.Linear(num_inputs, hidden_size) 31 | self.bn1 = nn.BatchNorm1d(hidden_size) 32 | self.bn1.weight.data.fill_(1) 33 | self.bn1.bias.data.fill_(0) 34 | 35 | self.linear2 = nn.Linear(hidden_size, hidden_size) 36 | self.bn2 = nn.BatchNorm1d(hidden_size) 37 | self.bn2.weight.data.fill_(1) 38 | self.bn2.bias.data.fill_(0) 39 | 40 | self.V = nn.Linear(hidden_size, 1) 41 | self.V.weight.data.mul_(0.1) 42 | self.V.bias.data.mul_(0.1) 43 | 44 | self.mu = nn.Linear(hidden_size, num_outputs) 45 | self.mu.weight.data.mul_(0.1) 46 | self.mu.bias.data.mul_(0.1) 47 | 48 | self.L = nn.Linear(hidden_size, num_outputs ** 2) 49 | self.L.weight.data.mul_(0.1) 50 | self.L.bias.data.mul_(0.1) 51 | 52 | self.tril_mask = Variable(torch.tril(torch.ones( 53 | num_outputs, num_outputs), diagonal=-1).unsqueeze(0)) 54 | self.diag_mask = Variable(torch.diag(torch.diag( 55 | torch.ones(num_outputs, num_outputs))).unsqueeze(0)) 56 | 57 | def forward(self, inputs): 58 | x, u = inputs 59 | x = self.bn0(x) 60 | x = F.tanh(self.linear1(x)) 61 | x = F.tanh(self.linear2(x)) 62 | 63 | V = self.V(x) 64 | mu = F.tanh(self.mu(x)) 65 | 66 | Q = None 67 | if u is not None: 68 | num_outputs = mu.size(1) 69 | L = self.L(x).view(-1, num_outputs, num_outputs) 70 | L = L * \ 71 | self.tril_mask.expand_as( 72 | L) + torch.exp(L) * self.diag_mask.expand_as(L) 73 | P = torch.bmm(L, L.transpose(2, 1)) 74 | 75 | u_mu = (u - mu).unsqueeze(2) 76 | A = -0.5 * \ 77 | torch.bmm(torch.bmm(u_mu.transpose(2, 1), P), u_mu)[:, :, 0] 78 | 79 | Q = A + V 80 | 81 | return mu, Q, V 82 | 83 | 84 | class NAF: 85 | 86 | def __init__(self, gamma, tau, hidden_size, num_inputs, action_space): 87 | self.action_space = action_space 88 | self.num_inputs = num_inputs 89 | 90 | self.model = Policy(hidden_size, num_inputs, action_space) 91 | self.target_model = Policy(hidden_size, num_inputs, action_space) 92 | self.optimizer = Adam(self.model.parameters(), lr=1e-3) 93 | 94 | self.gamma = gamma 95 | self.tau = tau 96 | 97 | hard_update(self.target_model, self.model) 98 | 99 | def select_action(self, state, exploration=None): 100 | self.model.eval() 101 | mu, _, _ = self.model((Variable(state, volatile=True), None)) 102 | self.model.train() 103 | mu = mu.data 104 | if exploration is not None: 105 | mu += torch.Tensor(exploration.noise()) 106 | 107 | return mu.clamp(1, 4) 108 | 109 | def update_parameters(self, batch): 110 | state_batch = Variable(torch.cat(batch.state)) 111 | next_state_batch = Variable(torch.cat(batch.next_state), volatile=True) 112 | action_batch = Variable(torch.cat(batch.action)) 113 | reward_batch = Variable(torch.cat(batch.reward)) 114 | mask_batch = Variable(torch.cat(batch.mask)) 115 | 116 | _, _, next_state_values = self.target_model((next_state_batch, None)) 117 | 118 | reward_batch = (torch.unsqueeze(reward_batch, 1)) 119 | expected_state_action_values = reward_batch + (next_state_values * self.gamma) 120 | 121 | _, state_action_values, _ = self.model((state_batch, action_batch)) 122 | 123 | loss = MSELoss(state_action_values, expected_state_action_values) 124 | 125 | self.optimizer.zero_grad() 126 | loss.backward() 127 | torch.nn.utils.clip_grad_norm(self.model.parameters(), 1) 128 | self.optimizer.step() 129 | 130 | soft_update(self.target_model, self.model, self.tau) 131 | -------------------------------------------------------------------------------- /naf_cnn.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.optim import Adam 6 | from torch.autograd import Variable 7 | import torch.nn.functional as F 8 | 9 | MSELoss = nn.MSELoss() 10 | 11 | def soft_update(target, source, tau): 12 | for target_param, param in zip(target.parameters(), source.parameters()): 13 | target_param.data.copy_(target_param.data * (1.0 - tau) + param.data * tau) 14 | 15 | def hard_update(target, source): 16 | for target_param, param in zip(target.parameters(), source.parameters()): 17 | target_param.data.copy_(param.data) 18 | 19 | class Policy(nn.Module): 20 | 21 | def __init__(self, hidden_size, num_inputs, action_space): 22 | super(Policy, self).__init__() 23 | self.action_space = action_space 24 | num_outputs = action_space.shape[0] 25 | 26 | self.conv1 = nn.Sequential( 27 | nn.Conv1d( 28 | in_channels=2, 29 | out_channels=16, 30 | kernel_size=4, 31 | stride=1, 32 | padding=1, 33 | ), 34 | nn.ReLU(), 35 | nn.MaxPool1d(kernel_size=2), 36 | ) 37 | self.out1 = nn.Linear(48, 16) 38 | 39 | self.conv2 = nn.Sequential( 40 | nn.Conv1d( 41 | in_channels=2, 42 | out_channels=16, 43 | kernel_size=4, 44 | stride=1, 45 | padding=1, 46 | ), 47 | nn.ReLU(), 48 | nn.MaxPool1d(kernel_size=2), 49 | ) 50 | self.out2 = nn.Linear(48, 16) 51 | 52 | self.conv3 = nn.Sequential( 53 | nn.Conv1d( 54 | in_channels=2, 55 | out_channels=16, 56 | kernel_size=4, 57 | stride=1, 58 | padding=1, 59 | ), 60 | nn.ReLU(), 61 | nn.MaxPool1d(kernel_size=2), 62 | ) 63 | self.out3 = nn.Linear(48, 16) 64 | 65 | self.bn0 = nn.BatchNorm1d(num_inputs) 66 | self.bn0.weight.data.fill_(1) 67 | self.bn0.bias.data.fill_(0) 68 | 69 | self.linear1 = nn.Linear(num_inputs, hidden_size) 70 | self.bn1 = nn.BatchNorm1d(hidden_size) 71 | self.bn1.weight.data.fill_(1) 72 | self.bn1.bias.data.fill_(0) 73 | 74 | self.linear2 = nn.Linear(hidden_size, hidden_size) 75 | self.bn2 = nn.BatchNorm1d(hidden_size) 76 | self.bn2.weight.data.fill_(1) 77 | self.bn2.bias.data.fill_(0) 78 | 79 | self.V = nn.Linear(hidden_size, 1) 80 | self.V.weight.data.mul_(0.1) 81 | self.V.bias.data.mul_(0.1) 82 | 83 | self.mu = nn.Linear(hidden_size, num_outputs) 84 | self.mu.weight.data.mul_(0.1) 85 | self.mu.bias.data.mul_(0.1) 86 | 87 | self.L = nn.Linear(hidden_size, num_outputs ** 2) 88 | self.L.weight.data.mul_(0.1) 89 | self.L.bias.data.mul_(0.1) 90 | 91 | self.tril_mask = Variable(torch.tril(torch.ones( 92 | num_outputs, num_outputs), diagonal=-1).unsqueeze(0)) 93 | self.diag_mask = Variable(torch.diag(torch.diag( 94 | torch.ones(num_outputs, num_outputs))).unsqueeze(0)) 95 | 96 | def forward(self, inputs): 97 | inputs, u = inputs 98 | cnn1 = inputs[:, 0:8].contiguous() 99 | cnn1 = cnn1.view(1,2,8) 100 | cnn1 = self.conv1(cnn1) 101 | cnn1 = cnn1.view(cnn1.size(0), -1) 102 | cnn1 = self.out1(cnn1) 103 | cnn1 = cnn1.view(2,8) 104 | 105 | cnn2 = inputs[:, 8:16].contiguous() 106 | cnn2 = cnn2.view(1,2,8) 107 | cnn2 = self.conv2(cnn2) 108 | cnn2 = cnn2.view(cnn2.size(0), -1) 109 | cnn2 = self.out2(cnn2) 110 | cnn2 = cnn2.view(2,8) 111 | 112 | cnn3 = inputs[:, 16:24].contiguous() 113 | cnn3 = cnn3.view(1,2,8) 114 | cnn3 = self.conv3(cnn3) 115 | cnn3 = cnn3.view(cnn3.size(0), -1) 116 | cnn3 = self.out3(cnn3) 117 | cnn3 = cnn3.view(2,8) 118 | 119 | x = torch.cat((cnn1,cnn2), 1) 120 | x = torch.cat((x,cnn3), 1) 121 | x = torch.cat((x,inputs[:,24:]), 1) 122 | 123 | 124 | x = self.bn0(x) 125 | x = F.tanh(self.linear1(x)) 126 | x = F.tanh(self.linear2(x)) 127 | 128 | V = self.V(x) 129 | mu = F.tanh(self.mu(x)) 130 | 131 | Q = None 132 | if u is not None: 133 | num_outputs = mu.size(1) 134 | L = self.L(x).view(-1, num_outputs, num_outputs) 135 | L = L * \ 136 | self.tril_mask.expand_as( 137 | L) + torch.exp(L) * self.diag_mask.expand_as(L) 138 | P = torch.bmm(L, L.transpose(2, 1)) 139 | 140 | u_mu = (u - mu).unsqueeze(2) 141 | A = -0.5 * \ 142 | torch.bmm(torch.bmm(u_mu.transpose(2, 1), P), u_mu)[:, :, 0] 143 | 144 | Q = A + V 145 | 146 | return mu, Q, V 147 | 148 | 149 | class NAF_CNN: 150 | 151 | def __init__(self, gamma, tau, hidden_size, num_inputs, action_space): 152 | self.action_space = action_space 153 | self.num_inputs = num_inputs 154 | 155 | self.model = Policy(hidden_size, num_inputs, action_space) 156 | self.target_model = Policy(hidden_size, num_inputs, action_space) 157 | self.optimizer = Adam(self.model.parameters(), lr=1e-3) 158 | 159 | self.gamma = gamma 160 | self.tau = tau 161 | 162 | hard_update(self.target_model, self.model) 163 | 164 | def select_action(self, state, exploration=None): 165 | self.model.eval() 166 | mu, _, _ = self.model((Variable(state, volatile=True), None)) 167 | self.model.train() 168 | mu = mu.data 169 | if exploration is not None: 170 | mu += torch.Tensor(exploration.noise()) 171 | 172 | return mu.clamp(1, 4) 173 | 174 | def update_parameters(self, batch): 175 | state_batch = Variable(torch.cat(batch.state)) 176 | next_state_batch = Variable(torch.cat(batch.next_state), volatile=True) 177 | action_batch = Variable(torch.cat(batch.action)) 178 | reward_batch = Variable(torch.cat(batch.reward)) 179 | mask_batch = Variable(torch.cat(batch.mask)) 180 | 181 | _, _, next_state_values = self.target_model((next_state_batch, None)) 182 | 183 | reward_batch = (torch.unsqueeze(reward_batch, 1)) 184 | expected_state_action_values = reward_batch + (next_state_values * self.gamma) 185 | 186 | _, state_action_values, _ = self.model((state_batch, action_batch)) 187 | 188 | loss = MSELoss(state_action_values, expected_state_action_values) 189 | 190 | self.optimizer.zero_grad() 191 | loss.backward() 192 | torch.nn.utils.clip_grad_norm(self.model.parameters(), 1) 193 | self.optimizer.step() 194 | 195 | soft_update(self.target_model, self.model, self.tau) 196 | -------------------------------------------------------------------------------- /normalized_actions.py: -------------------------------------------------------------------------------- 1 | import gym 2 | 3 | 4 | class NormalizedActions(gym.ActionWrapper): 5 | 6 | def _action(self, action): 7 | action = (action + 1) / 2 # [-1, 1] => [0, 1] 8 | action *= (self.action_space.high - self.action_space.low) 9 | action += self.action_space.low 10 | return action 11 | 12 | def _reverse_action(self, action): 13 | action -= self.action_space.low 14 | action /= (self.action_space.high - self.action_space.low) 15 | action = action * 2 - 1 16 | return actions 17 | -------------------------------------------------------------------------------- /old_main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import math 3 | from collections import namedtuple 4 | from itertools import count 5 | import sys 6 | import gym 7 | import numpy as np 8 | from gym import wrappers 9 | 10 | import torch 11 | from ddpg import DDPG 12 | from naf import NAF 13 | from normalized_actions import NormalizedActions 14 | from ounoise import OUNoise 15 | from replay_memory import ReplayMemory, Transition 16 | 17 | parser = argparse.ArgumentParser(description='PyTorch REINFORCE example') 18 | parser.add_argument('--algo', default='NAF', 19 | help='algorithm to use: DDPG | NAF') 20 | 21 | parser.add_argument('--gamma', type=float, default=0.99, metavar='G', 22 | help='discount factor for reward (default: 0.99)') 23 | 24 | parser.add_argument('--tau', type=float, default=0.001, metavar='G', 25 | help='discount factor for model (default: 0.001)') 26 | 27 | parser.add_argument('--noise_scale', type=float, default=0.3, metavar='G', 28 | help='initial noise scale (default: 0.3)') 29 | 30 | parser.add_argument('--final_noise_scale', type=float, default=0.3, metavar='G', 31 | help='final noise scale (default: 0.3)') 32 | 33 | parser.add_argument('--exploration_end', type=int, default=100, metavar='N', 34 | help='number of episodes with noise (default: 100)') 35 | 36 | parser.add_argument('--seed', type=int, default=4, metavar='N', 37 | help='random seed (default: 4)') 38 | 39 | parser.add_argument('--batch_size', type=int, default=128, metavar='N', 40 | help='batch size (default: 128)') 41 | 42 | parser.add_argument('--num_steps', type=int, default=1000, metavar='N', 43 | help='max episode length (default: 1000)') 44 | 45 | parser.add_argument('--num_episodes', type=int, default=1000, metavar='N', 46 | help='number of episodes (default: 1000)') 47 | 48 | parser.add_argument('--hidden_size', type=int, default=128, metavar='N', 49 | help='number of hidden size (default: 128)') 50 | 51 | parser.add_argument('--updates_per_step', type=int, default=5, metavar='N', 52 | help='model updates per simulator step (default: 5)') 53 | 54 | parser.add_argument('--replay_size', type=int, default=1000000, metavar='N', 55 | help='size of replay buffer (default: 1000000)') 56 | 57 | parser.add_argument('--render', action='store_true', 58 | help='render the environment') 59 | 60 | args = parser.parse_args() 61 | 62 | env_name = 'Pendulum-v0' 63 | env = NormalizedActions(gym.make(env_name)) 64 | 65 | #env = wrappers.Monitor(env, '/tmp/{}-experiment'.format(env_name), force=True) 66 | 67 | env.seed(args.seed) 68 | torch.manual_seed(args.seed) 69 | np.random.seed(args.seed) 70 | if args.algo == "NAF": 71 | agent = NAF(args.gamma, args.tau, args.hidden_size, 72 | env.observation_space.shape[0], env.action_space) 73 | else: 74 | agent = DDPG(args.gamma, args.tau, args.hidden_size, 75 | env.observation_space.shape[0], env.action_space) 76 | 77 | memory = ReplayMemory(args.replay_size) 78 | ounoise = OUNoise(env.action_space.shape[0]) 79 | 80 | rewards = [] 81 | for i_episode in range(args.num_episodes): 82 | if i_episode < args.num_episodes // 2: 83 | state = torch.Tensor([env.reset()]) 84 | ounoise.scale = (args.noise_scale - args.final_noise_scale) * max(0, args.exploration_end - 85 | i_episode) / args.exploration_end + args.final_noise_scale 86 | ounoise.reset() 87 | episode_reward = 0 88 | for t in range(args.num_steps): 89 | print("state: {}\n".format(state)) 90 | action = agent.select_action(state, ounoise) 91 | print("action: {}\n".format(action)) 92 | next_state, reward, done, _ = env.step(action.numpy()[0]) 93 | episode_reward += reward 94 | 95 | action = torch.Tensor(action) 96 | mask = torch.Tensor([not done]) 97 | next_state = torch.Tensor([next_state]) 98 | reward = torch.Tensor([reward]) 99 | # sys.exit(0) 100 | # if i_episode % 10 == 0: 101 | # env.render() 102 | 103 | memory.push(state, action, mask, next_state, reward) 104 | 105 | state = next_state 106 | 107 | if len(memory) > args.batch_size * 5: 108 | for _ in range(args.updates_per_step): 109 | transitions = memory.sample(args.batch_size) 110 | batch = Transition(*zip(*transitions)) 111 | 112 | agent.update_parameters(batch) 113 | 114 | if done: 115 | 116 | break 117 | rewards.append(episode_reward) 118 | else: 119 | state = torch.Tensor([env.reset()]) 120 | episode_reward = 0 121 | for t in range(args.num_steps): 122 | action = agent.select_action(state) 123 | 124 | next_state, reward, done, _ = env.step(action.numpy()[0]) 125 | episode_reward += reward 126 | 127 | next_state = torch.Tensor([next_state]) 128 | 129 | # if i_episode % 10 == 0: 130 | # env.render() 131 | 132 | state = next_state 133 | if done: 134 | break 135 | 136 | rewards.append(episode_reward) 137 | print("Episode: {}, noise: {}, reward: {}, average reward: {}".format(i_episode, ounoise.scale, 138 | rewards[-1], np.mean(rewards[-100:]))) 139 | 140 | env.close() 141 | -------------------------------------------------------------------------------- /ounoise.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | # from https://github.com/songrotek/DDPG/blob/master/ou_noise.py 5 | class OUNoise: 6 | 7 | def __init__(self, action_dimension, scale=0.1, mu=0, theta=0.15, sigma=0.2): 8 | self.action_dimension = action_dimension 9 | self.scale = scale 10 | self.mu = mu 11 | self.theta = theta 12 | self.sigma = sigma 13 | self.state = np.ones(self.action_dimension) * self.mu 14 | self.reset() 15 | 16 | def reset(self): 17 | self.state = np.ones(self.action_dimension) * self.mu 18 | 19 | def noise(self): 20 | x = self.state 21 | dx = self.theta * (self.mu - x) + self.sigma * np.random.randn(len(x)) 22 | self.state = x + dx 23 | return self.state * self.scale 24 | -------------------------------------------------------------------------------- /out_log_0325: -------------------------------------------------------------------------------- 1 | state: 2 | 28 20565 781 1 1 3 | 4 16354 749 1 1 4 | [torch.FloatTensor of size 2x5] 5 | 6 | ounoise: 7 | next state: 8 | 10 12537 694 1 1 9 | 3 9980 404 1 1 10 | [torch.FloatTensor of size 2x5] 11 | 12 | state: 13 | 28 20565 781 1 1 14 | 4 16354 749 1 1 15 | [torch.FloatTensor of size 2x5] 16 | 17 | ounoise: 18 | next state: 19 | 6 14712 711 1 1 20 | 3 8424 571 1 1 21 | [torch.FloatTensor of size 2x5] 22 | 23 | state: 24 | 28 20565 781 1 1 25 | 4 16354 749 1 1 26 | [torch.FloatTensor of size 2x5] 27 | 28 | ounoise: 29 | next state: 30 | 4 26811 676 1 1 31 | 3 15007 573 1 1 32 | [torch.FloatTensor of size 2x5] 33 | 34 | state: 35 | 28 20565 781 1 1 36 | 4 16354 749 1 1 37 | [torch.FloatTensor of size 2x5] 38 | 39 | ounoise: 40 | next state: 41 | 14 30643 573 1 1 42 | 4 5557 583 1 1 43 | [torch.FloatTensor of size 2x5] 44 | 45 | state: 46 | 28 20565 781 1 1 47 | 4 16354 749 1 1 48 | [torch.FloatTensor of size 2x5] 49 | 50 | ounoise: 51 | next state: 52 | 45 40812 115 1 1 53 | 2 14306 951 1 1 54 | [torch.FloatTensor of size 2x5] 55 | 56 | state: 57 | 28 20565 781 1 1 58 | 4 16354 749 1 1 59 | [torch.FloatTensor of size 2x5] 60 | 61 | ounoise: 62 | update -------------------- 63 | update -------------------- 64 | update -------------------- 65 | update -------------------- 66 | update -------------------- 67 | next state: 68 | 45 40812 1 1 1 69 | 2 7986 959 1 1 70 | [torch.FloatTensor of size 2x5] 71 | 72 | state: 73 | 28 20565 781 1 1 74 | 4 16354 749 1 1 75 | [torch.FloatTensor of size 2x5] 76 | 77 | ounoise: 78 | update -------------------- 79 | update -------------------- 80 | update -------------------- 81 | update -------------------- 82 | update -------------------- 83 | next state: 84 | 45 40812 1 1 1 85 | 4 8664 895 1 1 86 | [torch.FloatTensor of size 2x5] 87 | 88 | state: 89 | 28 20565 781 1 1 90 | 4 16354 749 1 1 91 | [torch.FloatTensor of size 2x5] 92 | 93 | ounoise: 94 | update -------------------- 95 | update -------------------- 96 | update -------------------- 97 | update -------------------- 98 | update -------------------- 99 | next state: 100 | 45 40812 0 1 1 101 | 2 5386 1159 1 1 102 | [torch.FloatTensor of size 2x5] 103 | 104 | state: 105 | 28 20565 781 1 1 106 | 4 16354 749 1 1 107 | [torch.FloatTensor of size 2x5] 108 | 109 | ounoise: 110 | update -------------------- 111 | update -------------------- 112 | update -------------------- 113 | update -------------------- 114 | update -------------------- 115 | next state: 116 | 45 40812 1 1 1 117 | 3 5939 1074 1 1 118 | [torch.FloatTensor of size 2x5] 119 | 120 | state: 121 | 28 20565 781 1 1 122 | 4 16354 749 1 1 123 | [torch.FloatTensor of size 2x5] 124 | 125 | ounoise: 126 | update -------------------- 127 | update -------------------- 128 | update -------------------- 129 | update -------------------- 130 | update -------------------- 131 | next state: 132 | 45 40812 0 1 1 133 | 2 3583 1323 1 1 134 | [torch.FloatTensor of size 2x5] 135 | 136 | state: 137 | 28 20565 781 1 1 138 | 4 16354 749 1 1 139 | [torch.FloatTensor of size 2x5] 140 | 141 | ounoise: 142 | update -------------------- 143 | update -------------------- 144 | update -------------------- 145 | update -------------------- 146 | update -------------------- 147 | next state: 148 | 45 40812 0 1 1 149 | 5 12816 1143 1 1 150 | [torch.FloatTensor of size 2x5] 151 | 152 | state: 153 | 28 20565 781 1 1 154 | 4 16354 749 1 1 155 | [torch.FloatTensor of size 2x5] 156 | 157 | ounoise: 158 | 159 | -------------------------------------------------------------------------------- /replay_memory.py: -------------------------------------------------------------------------------- 1 | import random 2 | from collections import namedtuple 3 | 4 | # Taken from 5 | # https://github.com/pytorch/tutorials/blob/master/Reinforcement%20(Q-)Learning%20with%20PyTorch.ipynb 6 | 7 | Transition = namedtuple( 8 | 'Transition', ('state', 'action', 'mask', 'next_state', 'reward')) 9 | 10 | 11 | class ReplayMemory(object): 12 | 13 | def __init__(self, capacity): 14 | self.capacity = capacity 15 | self.memory = [] 16 | self.position = 0 17 | 18 | def push(self, *args): 19 | """Saves a transition.""" 20 | if len(self.memory) < self.capacity: 21 | self.memory.append(None) 22 | self.memory[self.position] = Transition(*args) 23 | self.position = (self.position + 1) % self.capacity 24 | 25 | def sample(self, batch_size): 26 | return random.sample(self.memory, batch_size) 27 | 28 | def __len__(self): 29 | return len(self.memory) 30 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup, Extension 2 | 3 | setup(name='mpsched', 4 | ext_modules=[ 5 | Extension('mpsched', 6 | ['mpsched.c'], 7 | include_dirs=['/usr/src/linux-headers-4.4.110-mptcp+/include/uapi', '/usr/src/linux-headers-4.4.110-mptcp+/include', '/usr/include/python3.5m'], 8 | define_macros=[('NUM_SUBFLOWS', '2'), ('SOL_TCP', '6')] 9 | ) 10 | ]) 11 | -------------------------------------------------------------------------------- /tc.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | ETH0=enp0s31f6 #有线网卡 3 | WIFI=wlx485d605766e1 #无线网卡 4 | 5 | ETH_RTT=50ms 6 | ETH_RATE=7040kbit 7 | 8 | WIFI_RTT=70ms 9 | WIFI_RATE=9185kbit 10 | 11 | tc qd del dev $ETH0 root 12 | tc qd add dev $ETH0 root handle 1:0 tbf rate $ETH_RATE latency 50ms burst 1540 13 | tc qd add dev $ETH0 parent 1:0 handle 10:0 netem delay $ETH_RTT 14 | 15 | tc qd del dev $WIFI root 16 | tc qd add dev $WIFI root handle 1:0 tbf rate $WIFI_RATE latency 50ms burst 1540 17 | tc qd add dev $WIFI parent 1:0 handle 10:0 netem delay $WIFI_RTT 18 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import threading 2 | import time 3 | import socket 4 | from configparser import ConfigParser 5 | import mpsched 6 | 7 | 8 | class io_thread(threading.Thread): 9 | 10 | def __init__(self, sock, filename, buffer_size): 11 | threading.Thread.__init__(self) 12 | self.sock = sock 13 | self.buffer_size = buffer_size 14 | self.filename = filename 15 | 16 | def run(self): 17 | fp = open(self.filename, 'rb') 18 | self.sock.send(bytes(self.filename, encoding='utf8')) 19 | buff = self.sock.recv(16) 20 | print(str(buff, encoding='utf8')) 21 | 22 | while(True): 23 | buff = fp.read(self.buffer_size) 24 | if not buff: 25 | break 26 | self.sock.send(buff) 27 | self.sock.close() 28 | fp.close() 29 | 30 | 31 | class env(): 32 | """ """ 33 | def __init__(self, fd, buff_size, time, k, l, n, p): 34 | self.fd = fd 35 | self.buff_size = buff_size 36 | self.k = k ##对以往k个时间段的观测 37 | self.l = l ##吞吐量的奖励因子 38 | #self.m = m ##RTT惩罚因子 39 | self.n = n ##缓冲区膨胀惩罚因子 40 | self.p = p ##重传惩罚因子 41 | self.time = time 42 | self.last = [] 43 | self.tp = [[], []] 44 | self.rtt = [[], []] 45 | self.cwnd = [[], []] 46 | self.rr = 0 47 | self.count = 1 48 | self.recv_buff_size = 0 49 | 50 | 51 | """ adjust info to get goodput """ 52 | def adjust(self, state): 53 | for j in range(len(state)): 54 | self.tp[j].pop(0) 55 | self.tp[j].append(state[j][0]-self.last[j][0]) 56 | self.rtt[j].pop(0) 57 | self.rtt[j].append(state[j][1]-self.last[j][1]) 58 | self.cwnd[j].pop(0) 59 | self.cwnd[j].append(state[j][2]) 60 | self.last = state 61 | mate = mpsched.get_meta_info(self.fd) 62 | self.recv_buff_size = mate[0] 63 | self.rr = mate[1] - self.rr 64 | return [self.tp[0] + self.rtt[0] + self.cwnd[0] + [self.recv_buff_size, self.rr], self.tp[1] + self.rtt[1] + self.cwnd[1]+ [self.recv_buff_size, self.rr]] 65 | 66 | def reward(self): 67 | rewards = self.l * (sum(self.tp[0]) + sum(self.tp[1])) 68 | #rewards = rewards - self.m * (sum(self.rtt[0]) + sum(self.rtt[1])) 69 | rewards = rewards + self.n * self.recv_buff_size 70 | rewards = rewards - self.p * self.rr 71 | return rewards 72 | 73 | """ reset env, return the initial state """ 74 | def reset(self): 75 | mpsched.persist_state(self.fd) 76 | time.sleep(1) 77 | self.last = mpsched.get_sub_info(self.fd) 78 | 79 | for i in range(self.k): 80 | subs = mpsched.get_sub_info(self.fd) 81 | for j in range(len(subs)): 82 | self.tp[j].append(subs[j][0] - self.last[j][0]) 83 | self.rtt[j].append(subs[j][1] - self.last[j][1]) 84 | self.cwnd[j].append(subs[j][2]) 85 | self.last = subs 86 | time.sleep(self.time) 87 | mate = mpsched.get_meta_info(self.fd) 88 | self.recv_buff_size = mate[0] 89 | self.rr = mate[1] 90 | return [self.tp[0] + self.rtt[0] + self.cwnd[0] + [self.recv_buff_size, self.rr], self.tp[1] + self.rtt[1] + self.cwnd[1]+ [self.recv_buff_size, self.rr]] 91 | 92 | """ action = [sub1_buff_size, sub2_buff_size] """ 93 | def step(self, action): 94 | # A = [self.fd, action[0], action[1]] 95 | # mpsched.set_seg(A) 96 | time.sleep(self.time) 97 | state_nxt = mpsched.get_sub_info(self.fd) 98 | done = False 99 | if len(state_nxt) == 0: 100 | done = True 101 | self.count = self.count + 1 102 | return self.adjust(state_nxt), self.reward(), self.count, self.recv_buff_size, done 103 | 104 | 105 | def main(): 106 | cfg = ConfigParser() 107 | cfg.read('config.ini') 108 | 109 | IP = cfg.get('server', 'ip') 110 | PORT = cfg.getint('server', 'port') 111 | FILE = cfg.get('file', 'file') 112 | SIZE = cfg.getint('env', 'buffer_size') 113 | TIME = cfg.getfloat('env', 'time') 114 | 115 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 116 | sock.connect((IP, PORT)) 117 | fd = sock.fileno() 118 | io = io_thread(sock=sock, filename=FILE, buffer_size=SIZE) 119 | mpsched.persist_state(fd) 120 | 121 | io.start() 122 | my_env = env(fd=fd, buff_size=SIZE, time=TIME, k=4, l=0.01, n=0.03, p=0.05) 123 | 124 | state = my_env.reset() 125 | while True: 126 | action = [] 127 | state_nxt, reward, count, recv_buff_size, done = my_env.step(action) 128 | if done: 129 | break 130 | print(reward) 131 | print(recv_buff_size) 132 | print(count) 133 | 134 | io.join() 135 | 136 | 137 | if __name__ == '__main__': 138 | main() 139 | -------------------------------------------------------------------------------- /train_2.py: -------------------------------------------------------------------------------- 1 | import threading 2 | import time 3 | import socket 4 | from configparser import ConfigParser 5 | import mpsched 6 | 7 | 8 | import argparse 9 | import gym 10 | import numpy as np 11 | from gym import wrappers 12 | from gym import spaces 13 | 14 | import torch 15 | from ddpg_cnn import DDPG_CNN 16 | from naf_cnn import NAF_CNN 17 | from normalized_actions import NormalizedActions 18 | from ounoise import OUNoise 19 | from replay_memory import ReplayMemory, Transition 20 | 21 | 22 | class io_thread(threading.Thread): 23 | 24 | def __init__(self, sock, filename, buffer_size): 25 | threading.Thread.__init__(self) 26 | self.sock = sock 27 | self.buffer_size = buffer_size 28 | self.filename = filename 29 | 30 | def run(self): 31 | fp = open(self.filename, 'rb') 32 | self.sock.send(bytes(self.filename, encoding='utf8')) 33 | buff = self.sock.recv(16) 34 | print(str(buff, encoding='utf8')) 35 | 36 | while(True): 37 | buff = fp.read(self.buffer_size) 38 | if not buff: 39 | break 40 | self.sock.send(buff) 41 | self.sock.close() 42 | fp.close() 43 | 44 | 45 | class env(): 46 | """ """ 47 | def __init__(self, fd, buff_size, time, k, l, n, p): 48 | self.fd = fd 49 | self.buff_size = buff_size 50 | self.k = k ##对以往k个时间段的观测 51 | self.l = l ##吞吐量的奖励因子 52 | #self.m = m ##RTT惩罚因子 53 | self.n = n ##缓冲区膨胀惩罚因子 54 | self.p = p ##重传惩罚因子 55 | self.time = time 56 | self.last = [] 57 | self.tp = [[], []] 58 | self.rtt = [[], []] 59 | self.cwnd = [[], []] 60 | self.rr = 0 61 | self.count = 1 62 | self.recv_buff_size = 0 63 | 64 | self.observation_space = spaces.Box(np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]), np.array([float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf")])) 65 | 66 | self.action_space = spaces.Box(np.array([1]), np.array([4])) 67 | 68 | 69 | """ adjust info to get goodput """ 70 | def adjust(self, state): 71 | for j in range(len(state)): 72 | self.tp[j].pop(0) 73 | self.tp[j].append(state[j][0]-self.last[j]) 74 | self.rtt[j].pop(0) 75 | self.rtt[j].append(state[j][1]) 76 | self.cwnd[j].pop(0) 77 | self.cwnd[j].append(state[j][2]) 78 | self.last = [x[0] for x in state] 79 | mate = mpsched.get_meta_info(self.fd) 80 | self.recv_buff_size = mate[0] 81 | self.rr = mate[1] - self.rr 82 | return [self.tp[0] + self.rtt[0] + self.cwnd[0] + [self.recv_buff_size, self.rr], self.tp[1] + self.rtt[1] + self.cwnd[1]+ [self.recv_buff_size, self.rr]] 83 | 84 | def reward(self): 85 | rewards = self.l * (sum(self.tp[0]) + sum(self.tp[1])) 86 | #rewards = rewards - self.m * (sum(self.rtt[0]) + sum(self.rtt[1])) 87 | rewards = rewards + self.n * self.recv_buff_size 88 | rewards = rewards - self.p * self.rr 89 | return rewards 90 | 91 | """ reset env, return the initial state """ 92 | def reset(self): 93 | mpsched.persist_state(self.fd) 94 | time.sleep(1) 95 | self.last = [x[0] for x in mpsched.get_sub_info(self.fd)] 96 | 97 | for i in range(self.k): 98 | subs = mpsched.get_sub_info(self.fd) 99 | for j in range(len(subs)): 100 | self.tp[j].append(subs[j][0]-self.last[j]) 101 | self.rtt[j].append(subs[j][1]) 102 | self.cwnd[j].append(subs[j][2]) 103 | self.last = [x[0] for x in subs] 104 | time.sleep(self.time) 105 | mate = mpsched.get_meta_info(self.fd) 106 | self.recv_buff_size = mate[0] 107 | self.rr = mate[1] 108 | return [self.tp[0] + self.rtt[0] + self.cwnd[0] + [self.recv_buff_size, self.rr], self.tp[1] + self.rtt[1] + self.cwnd[1]+ [self.recv_buff_size, self.rr]] 109 | 110 | """ action = [sub1_buff_size, sub2_buff_size] """ 111 | def step(self, action): 112 | # A = [self.fd, action[0], action[1]] 113 | # mpsched.set_seg(A) 114 | time.sleep(self.time) 115 | state_nxt = mpsched.get_sub_info(self.fd) 116 | done = False 117 | if len(state_nxt) == 0: 118 | done = True 119 | self.count = self.count + 1 120 | return self.adjust(state_nxt), self.reward(), self.count, self.recv_buff_size, done 121 | 122 | 123 | def main(): 124 | cfg = ConfigParser() 125 | cfg.read('config.ini') 126 | 127 | IP = cfg.get('server', 'ip') 128 | PORT = cfg.getint('server', 'port') 129 | FILE = cfg.get('file', 'file') 130 | SIZE = cfg.getint('env', 'buffer_size') 131 | TIME = cfg.getfloat('env', 'time') 132 | EPISODE = cfg.getint('env', 'episode') 133 | 134 | parser = argparse.ArgumentParser(description='PyTorch REINFORCE example') 135 | 136 | parser.add_argument('--gamma', type=float, default=0.99, metavar='G', 137 | help='discount factor for reward (default: 0.99)') 138 | parser.add_argument('--tau', type=float, default=0.001, metavar='G', 139 | help='discount factor for model (default: 0.001)') 140 | 141 | parser.add_argument('--noise_scale', type=float, default=0.3, metavar='G', 142 | help='initial noise scale (default: 0.3)') 143 | parser.add_argument('--final_noise_scale', type=float, default=0.3, metavar='G', 144 | help='final noise scale (default: 0.3)') 145 | parser.add_argument('--exploration_end', type=int, default=100, metavar='N', 146 | help='number of episodes with noise (default: 100)') 147 | 148 | parser.add_argument('--hidden_size', type=int, default=128, metavar='N', 149 | help='number of hidden size (default: 128)') 150 | parser.add_argument('--replay_size', type=int, default=1000000, metavar='N', 151 | help='size of replay buffer (default: 1000000)') 152 | parser.add_argument('--updates_per_step', type=int, default=5, metavar='N', 153 | help='model updates per simulator step (default: 5)') 154 | parser.add_argument('--batch_size', type=int, default=64, metavar='N', 155 | help='batch size (default: 128)') 156 | 157 | 158 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 159 | sock.connect((IP, PORT)) 160 | fd = sock.fileno() 161 | my_env = env(fd=fd, buff_size=SIZE, time=TIME, k=8, l=0.01, n=0.03, p=0.05) 162 | mpsched.persist_state(fd) 163 | 164 | args = parser.parse_args() 165 | agent = NAF_CNN(args.gamma, args.tau, args.hidden_size, 166 | my_env.observation_space.shape[0], my_env.action_space) 167 | memory = ReplayMemory(args.replay_size) 168 | ounoise = OUNoise(my_env.action_space.shape[0]) 169 | 170 | rewards = [] 171 | times = [] 172 | for i_episode in range(EPISODE): 173 | if (i_episode < 0.9*EPISODE): # training 174 | io = io_thread(sock=sock, filename=FILE, buffer_size=SIZE) 175 | io.start() 176 | 177 | state=my_env.reset() 178 | 179 | ounoise.scale = (args.noise_scale - args.final_noise_scale) * max(0, args.exploration_end - i_episode) / args.exploration_end + args.final_noise_scale 180 | ounoise.reset() 181 | print(state) 182 | episode_reward = 0 183 | while True: 184 | state = torch.FloatTensor(state) 185 | #print("state: {}\n ounoise: {}".format(state, ounoise.scale)) 186 | action = agent.select_action(state, ounoise) 187 | #print("action: {}".format(action)) 188 | next_state, reward, count, recv_buff_size, done = my_env.step(action) 189 | #print("buff size: ",recv_buff_size) 190 | #print("reward: ", reward) 191 | episode_reward += reward 192 | 193 | action = torch.FloatTensor(action) 194 | mask = torch.Tensor([not done]) 195 | next_state = torch.FloatTensor(next_state) 196 | reward = torch.FloatTensor([float(reward)]) 197 | memory.push(state, action, mask, next_state, reward) 198 | 199 | state = next_state 200 | 201 | if len(memory) > args.batch_size * 5: 202 | for _ in range(args.updates_per_step): 203 | transitions = memory.sample(args.batch_size) 204 | batch = Transition(*zip(*transitions)) 205 | #print("update",10*'--') 206 | agent.update_parameters(batch) 207 | 208 | if done: 209 | break 210 | rewards.append(episode_reward) 211 | io.join() 212 | else: # testing 213 | io = io_thread(sock=sock, filename=FILE, buffer_size=SIZE) 214 | io.start() 215 | state=my_env.reset() 216 | episode_reward = 0 217 | start_time = time.time() 218 | while True: 219 | state = torch.FloatTensor(state) 220 | #print("state: {}\n".format(state)) 221 | action = agent.select_action(state) 222 | #print("action: {}".format(action)) 223 | next_state, reward, count, done = my_env.step(action) 224 | episode_reward += reward 225 | state = next_state 226 | 227 | if done: 228 | break 229 | rewards.append(episode_reward) 230 | times.append(str(time.time() - start_time) + "\n") 231 | io.join() 232 | #print("Episode: {}, noise: {}, reward: {}, average reward: {}".format(i_episode, ounoise.scale, rewards[-1], np.mean(rewards[-100:]))) 233 | fo = open("times.txt", "w") 234 | fo.writelines(lines) 235 | fo.close() 236 | 237 | sock.close() 238 | 239 | 240 | if __name__ == '__main__': 241 | main() 242 | -------------------------------------------------------------------------------- /train_test.py: -------------------------------------------------------------------------------- 1 | import threading 2 | import time 3 | import socket 4 | from configparser import ConfigParser 5 | import mpsched 6 | 7 | 8 | class io_thread(threading.Thread): 9 | 10 | def __init__(self, sock, filename, buffer_size): 11 | threading.Thread.__init__(self) 12 | self.sock = sock 13 | self.buffer_size = buffer_size 14 | self.filename = filename 15 | 16 | def run(self): 17 | fp = open(self.filename, 'rb') 18 | self.sock.send(bytes(self.filename, encoding='utf8')) 19 | buff = self.sock.recv(16) 20 | print(str(buff, encoding='utf8')) 21 | 22 | while(True): 23 | buff = fp.read(self.buffer_size) 24 | if not buff: 25 | break 26 | self.sock.send(buff) 27 | self.sock.close() 28 | fp.close() 29 | 30 | 31 | class record(object): 32 | """docstring for record.""" 33 | def __init__(self, timestep=0.2, datafile="record"): 34 | self.data = [] 35 | self.timestep = timestep 36 | self.datafile = datafile 37 | 38 | def save(self): 39 | lenth = len(self.data) 40 | with open(self.datafile, 'w') as f: 41 | f.write(str(self.timestep)) 42 | f.write('\n') 43 | f.write(str(lenth)) 44 | f.write('\n') 45 | for i in range(lenth): 46 | f.write('%d %d\n' % (self.data[i][0][1], self.data[i][1][1])) 47 | f.close() 48 | 49 | def put(self, recd): 50 | self.data.append(recd) 51 | 52 | def draw(self): 53 | pass 54 | 55 | 56 | def main(): 57 | cfg = ConfigParser() 58 | cfg.read('config.ini') 59 | 60 | IP = cfg.get('server', 'ip') 61 | PORT = cfg.getint('server', 'port') 62 | FILE = cfg.get('file', 'file') 63 | SIZE = cfg.getint('env', 'buffer_size') 64 | timestep = cfg.getfloat('env', 'time') 65 | 66 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 67 | sock.connect((IP, PORT)) 68 | fd = sock.fileno() 69 | 70 | io = io_thread(sock=sock, filename='./256mb.dat', buffer_size=SIZE) 71 | 72 | 73 | start_time = time.time() 74 | io.start() 75 | io.join(); 76 | 77 | end_time = time.time() 78 | print("completion time: ", end_time - start_time) 79 | 80 | 81 | if __name__ == '__main__': 82 | main() 83 | --------------------------------------------------------------------------------