├── .gitignore
├── Instructions.pdf
├── README.md
├── config.ini
├── ddpg.py
├── ddpg_cnn.py
├── env_test.py
├── mpsched.c
├── mptcp_recv
    ├── info.c
    ├── recv.py
    └── setup.py
├── mptcp_results
    ├── cal
    ├── cal_throughput.c
    ├── eth
    ├── mptcplog.c
    ├── tcpdump.sh
    ├── w0
    ├── w1
    └── w2
├── naf.py
├── naf_cnn.py
├── normalized_actions.py
├── old_main.py
├── ounoise.py
├── out_log_0325
├── replay_memory.py
├── setup.py
├── tc.sh
├── train.py
├── train_2.py
└── train_test.py


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | t2.tar.gz
3 | *.pcap
4 | *.txt
5 | *.so
6 | *.o
7 | *.dat
8 | 


--------------------------------------------------------------------------------
/Instructions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kallen666/MPTCP-Deep-Reinforcement-Learning/6fa1bf8eb8d39527c8bc91148730ba4120093523/Instructions.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | state:\
 2 | 一个时间片内获取的状态：[[吞吐量1、RTT1、未确认数1、重传次数1]、[吞吐量2、RTT2、未确认数2、重传次数2]]\
 3 | 一个state包含k个时间片，例如k=4：\
 4 | [[[210, 4841, 20, 13], [114, 3158, 1, 14]], [[183, 4063, 5, 13], [138, 733, 0, 14]], [[246, 4519, 2, 13], [133, 718, 0, 14]], [[248, 3465, 10, 13], [136, 3040, 1, 14]]]
 5 | 
 6 | 
 7 | \
 8 | reward:\
 9 | l\*各子流吞吐量之和(k个时间片) - m\*最近一次各子流rtt之和 - n\*最近一次各子流未确认数 - p\*在k各时间片内的重传次数
10 | 


--------------------------------------------------------------------------------
/config.ini:
--------------------------------------------------------------------------------
 1 | [server]
 2 | port:9000
 3 | ip=114.212.80.16
 4 | 
 5 | [file]
 6 | file=./256mb.dat
 7 | 
 8 | [env]
 9 | buffer_size=1024
10 | time=1
11 | episode=100
12 | 


--------------------------------------------------------------------------------
/ddpg.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.optim import Adam
  6 | from torch.autograd import Variable
  7 | import torch.nn.functional as F
  8 | 
  9 | MSELoss = nn.MSELoss()
 10 | 
 11 | def soft_update(target, source, tau):
 12 |     for target_param, param in zip(target.parameters(), source.parameters()):
 13 |         target_param.data.copy_(target_param.data * (1.0 - tau) + param.data * tau)
 14 | 
 15 | def hard_update(target, source):
 16 |     for target_param, param in zip(target.parameters(), source.parameters()):
 17 |         target_param.data.copy_(param.data)
 18 | 
 19 | 
 20 | class Actor(nn.Module):
 21 | 
 22 |     def __init__(self, hidden_size, num_inputs, action_space):
 23 |         super(Actor, self).__init__()
 24 |         self.action_space = action_space
 25 |         num_outputs = action_space.shape[0]
 26 | 
 27 |         self.bn0 = nn.BatchNorm1d(num_inputs)
 28 |         self.bn0.weight.data.fill_(1)
 29 |         self.bn0.bias.data.fill_(0)
 30 | 
 31 |         self.linear1 = nn.Linear(num_inputs, hidden_size)
 32 |         self.bn1 = nn.BatchNorm1d(hidden_size)
 33 |         self.bn1.weight.data.fill_(1)
 34 |         self.bn1.bias.data.fill_(0)
 35 | 
 36 |         self.linear2 = nn.Linear(hidden_size, hidden_size)
 37 |         self.bn2 = nn.BatchNorm1d(hidden_size)
 38 |         self.bn2.weight.data.fill_(1)
 39 |         self.bn2.bias.data.fill_(0)
 40 | 
 41 |         self.mu = nn.Linear(hidden_size, num_outputs)
 42 |         self.mu.weight.data.mul_(0.1)
 43 |         self.mu.bias.data.mul_(0.1)
 44 | 
 45 | 
 46 |     def forward(self, inputs):
 47 |         x = inputs
 48 |         x = self.bn0(x)
 49 |         x = F.tanh(self.linear1(x))
 50 |         x = F.tanh(self.linear2(x))
 51 | 
 52 |         mu = F.tanh(self.mu(x))
 53 |         return mu
 54 | 
 55 |     
 56 | class Critic(nn.Module):
 57 | 
 58 |     def __init__(self, hidden_size, num_inputs, action_space):
 59 |         super(Critic, self).__init__()
 60 |         self.action_space = action_space
 61 |         num_outputs = action_space.shape[0]
 62 |         self.bn0 = nn.BatchNorm1d(num_inputs)
 63 |         self.bn0.weight.data.fill_(1)
 64 |         self.bn0.bias.data.fill_(0)
 65 | 
 66 |         self.linear1 = nn.Linear(num_inputs, hidden_size)
 67 |         self.bn1 = nn.BatchNorm1d(hidden_size)
 68 |         self.bn1.weight.data.fill_(1)
 69 |         self.bn1.bias.data.fill_(0)
 70 | 
 71 |         self.linear_action = nn.Linear(num_outputs, hidden_size)
 72 |         self.bn_a = nn.BatchNorm1d(hidden_size)
 73 |         self.bn_a.weight.data.fill_(1)
 74 |         self.bn_a.bias.data.fill_(0)
 75 | 
 76 |         self.linear2 = nn.Linear(hidden_size + hidden_size, hidden_size)
 77 |         self.bn2 = nn.BatchNorm1d(hidden_size)
 78 |         self.bn2.weight.data.fill_(1)
 79 |         self.bn2.bias.data.fill_(0)
 80 | 
 81 |         self.V = nn.Linear(hidden_size, 1)
 82 |         self.V.weight.data.mul_(0.1)
 83 |         self.V.bias.data.mul_(0.1)
 84 | 
 85 |     def forward(self, inputs, actions):
 86 |         x = inputs
 87 |         x = self.bn0(x)
 88 |         x = F.tanh(self.linear1(x))
 89 |         a = F.tanh(self.linear_action(actions))
 90 |         x = torch.cat((x, a), 1)
 91 |         x = F.tanh(self.linear2(x))
 92 | 
 93 |         V = self.V(x)
 94 |         return V
 95 | 
 96 | 
 97 | class DDPG(object):
 98 |     def __init__(self, gamma, tau, hidden_size, num_inputs, action_space):
 99 | 
100 |         self.num_inputs = num_inputs
101 |         self.action_space = action_space
102 | 
103 |         self.actor = Actor(hidden_size, self.num_inputs, self.action_space)
104 |         self.actor_target = Actor(hidden_size, self.num_inputs, self.action_space)
105 |         self.actor_optim = Adam(self.actor.parameters(), lr=1e-4)
106 | 
107 |         self.critic = Critic(hidden_size, self.num_inputs, self.action_space)
108 |         self.critic_target = Critic(hidden_size, self.num_inputs, self.action_space)
109 |         self.critic_optim = Adam(self.critic.parameters(), lr=1e-3)
110 | 
111 |         self.gamma = gamma
112 |         self.tau = tau
113 | 
114 |         hard_update(self.actor_target, self.actor)  # Make sure target is with the same weight
115 |         hard_update(self.critic_target, self.critic)
116 | 
117 | 
118 |     def select_action(self, state, exploration=None):
119 |         self.actor.eval()
120 |         mu = self.actor((Variable(state, volatile=True)))
121 |         self.actor.train()
122 |         mu = mu.data
123 |         if exploration is not None:
124 |             mu += torch.Tensor(exploration.noise())
125 | 
126 |         return mu.clamp(0, 4)
127 | 
128 | 
129 |     def update_parameters(self, batch):
130 |         state_batch = Variable(torch.cat(batch.state))
131 |         next_state_batch = Variable(torch.cat(batch.next_state), volatile=True)
132 |         action_batch = Variable(torch.cat(batch.action))
133 |         reward_batch = Variable(torch.cat(batch.reward))
134 |         mask_batch = Variable(torch.cat(batch.mask))
135 | 
136 |         next_action_batch = self.actor_target(next_state_batch)
137 |         next_state_action_values = self.critic_target(next_state_batch, next_action_batch)
138 | 
139 |         reward_batch = torch.unsqueeze(reward_batch, 1)
140 |         expected_state_action_batch = reward_batch + (self.gamma * next_state_action_values)
141 | 
142 |         self.critic_optim.zero_grad()
143 | 
144 |         state_action_batch = self.critic((state_batch), (action_batch))
145 | 
146 |         value_loss = MSELoss(state_action_batch, expected_state_action_batch)
147 |         value_loss.backward()
148 |         self.critic_optim.step()
149 | 
150 |         self.actor_optim.zero_grad()
151 | 
152 |         policy_loss = -self.critic((state_batch),self.actor((state_batch)))
153 | 
154 |         policy_loss = policy_loss.mean()
155 |         policy_loss.backward()
156 |         self.actor_optim.step()
157 | 
158 |         soft_update(self.actor_target, self.actor, self.tau)
159 |         soft_update(self.critic_target, self.critic, self.tau)
160 | 


--------------------------------------------------------------------------------
/ddpg_cnn.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.optim import Adam
  6 | from torch.autograd import Variable
  7 | import torch.nn.functional as F
  8 | 
  9 | MSELoss = nn.MSELoss()
 10 | 
 11 | def soft_update(target, source, tau):
 12 |     for target_param, param in zip(target.parameters(), source.parameters()):
 13 |         target_param.data.copy_(target_param.data * (1.0 - tau) + param.data * tau)
 14 | 
 15 | def hard_update(target, source):
 16 |     for target_param, param in zip(target.parameters(), source.parameters()):
 17 |         target_param.data.copy_(param.data)
 18 | 
 19 | 
 20 | class Actor(nn.Module):
 21 | 
 22 |     def __init__(self, hidden_size, num_inputs, action_space):
 23 |         super(Actor, self).__init__()
 24 |         self.action_space = action_space
 25 |         num_outputs = action_space.shape[0]
 26 |         
 27 |         self.conv1 = nn.Sequential(         
 28 |             nn.Conv1d(
 29 |                 in_channels=2,              
 30 |                 out_channels=16,            
 31 |                 kernel_size=4,             
 32 |                 stride=1,                   
 33 |                 padding=1,                 
 34 |             ),                              
 35 |             nn.ReLU(),                      
 36 |             nn.MaxPool1d(kernel_size=2),    
 37 |         )
 38 |         self.out1 = nn.Linear(48, 16)   
 39 |         
 40 |         self.conv2 = nn.Sequential(         
 41 |             nn.Conv1d(
 42 |                 in_channels=2,              
 43 |                 out_channels=16,            
 44 |                 kernel_size=4,             
 45 |                 stride=1,                   
 46 |                 padding=1,                 
 47 |             ),                              
 48 |             nn.ReLU(),                      
 49 |             nn.MaxPool1d(kernel_size=2),    
 50 |         )
 51 |         self.out2 = nn.Linear(48, 16) 
 52 |         
 53 |         
 54 | 
 55 |         self.bn0 = nn.BatchNorm1d(num_inputs)
 56 |         self.bn0.weight.data.fill_(1)
 57 |         self.bn0.bias.data.fill_(0)
 58 | 
 59 |         self.linear1 = nn.Linear(num_inputs, hidden_size)
 60 |         self.bn1 = nn.BatchNorm1d(hidden_size)
 61 |         self.bn1.weight.data.fill_(1)
 62 |         self.bn1.bias.data.fill_(0)
 63 | 
 64 |         self.linear2 = nn.Linear(hidden_size, hidden_size)
 65 |         self.bn2 = nn.BatchNorm1d(hidden_size)
 66 |         self.bn2.weight.data.fill_(1)
 67 |         self.bn2.bias.data.fill_(0)
 68 | 
 69 |         self.mu = nn.Linear(hidden_size, num_outputs)
 70 |         self.mu.weight.data.mul_(0.1)
 71 |         self.mu.bias.data.mul_(0.1)
 72 | 
 73 | 
 74 |     def forward(self, inputs):
 75 |     
 76 |         cnn1 = inputs[:, 0:8].contiguous()
 77 |         cnn1 = cnn1.view(1,2,8)
 78 |         cnn1 = self.conv1(cnn1)
 79 |         cnn1 = cnn1.view(cnn1.size(0), -1)
 80 |         cnn1 = self.out1(cnn1)
 81 |         cnn1 = cnn1.view(2,8)
 82 |         
 83 |         cnn2 = inputs[:, 8:16].contiguous()
 84 |         cnn2 = cnn2.view(1,2,8)
 85 |         cnn2 = self.conv2(cnn2)
 86 |         cnn2 = cnn2.view(cnn2.size(0), -1)
 87 |         cnn2 = self.out2(cnn2)
 88 |         cnn2 = cnn2.view(2,8)
 89 |         
 90 |         x = torch.cat((cnn1,cnn2), 1)
 91 |         x = torch.cat((x,inputs[:,16:]), 1)
 92 |         
 93 |         
 94 |         x = self.bn0(x)
 95 |         x = F.tanh(self.linear1(x))
 96 |         x = F.tanh(self.linear2(x))
 97 | 
 98 |         mu = F.tanh(self.mu(x))
 99 |         return mu
100 | 
101 |     
102 | class Critic(nn.Module):
103 | 
104 |     def __init__(self, hidden_size, num_inputs, action_space):
105 |         super(Critic, self).__init__()
106 |         self.action_space = action_space
107 |         num_outputs = action_space.shape[0]
108 |         
109 |         self.conv1 = nn.Sequential(         
110 |             nn.Conv1d(
111 |                 in_channels=2,              
112 |                 out_channels=16,            
113 |                 kernel_size=4,             
114 |                 stride=1,                   
115 |                 padding=1,                 
116 |             ),                              
117 |             nn.ReLU(),                      
118 |             nn.MaxPool1d(kernel_size=2),    
119 |         )
120 |         self.out1 = nn.Linear(48, 16)   
121 |         
122 |         self.conv2 = nn.Sequential(         
123 |             nn.Conv1d(
124 |                 in_channels=2,              
125 |                 out_channels=16,            
126 |                 kernel_size=4,             
127 |                 stride=1,                   
128 |                 padding=1,                 
129 |             ),                              
130 |             nn.ReLU(),                      
131 |             nn.MaxPool1d(kernel_size=2),    
132 |         )
133 |         self.out2 = nn.Linear(48, 16)         
134 |         
135 |         self.bn0 = nn.BatchNorm1d(num_inputs)
136 |         self.bn0.weight.data.fill_(1)
137 |         self.bn0.bias.data.fill_(0)
138 | 
139 |         self.linear1 = nn.Linear(num_inputs, hidden_size)
140 |         self.bn1 = nn.BatchNorm1d(hidden_size)
141 |         self.bn1.weight.data.fill_(1)
142 |         self.bn1.bias.data.fill_(0)
143 | 
144 |         self.linear_action = nn.Linear(num_outputs, hidden_size)
145 |         self.bn_a = nn.BatchNorm1d(hidden_size)
146 |         self.bn_a.weight.data.fill_(1)
147 |         self.bn_a.bias.data.fill_(0)
148 | 
149 |         self.linear2 = nn.Linear(hidden_size + hidden_size, hidden_size)
150 |         self.bn2 = nn.BatchNorm1d(hidden_size)
151 |         self.bn2.weight.data.fill_(1)
152 |         self.bn2.bias.data.fill_(0)
153 | 
154 |         self.V = nn.Linear(hidden_size, 1)
155 |         self.V.weight.data.mul_(0.1)
156 |         self.V.bias.data.mul_(0.1)
157 | 
158 |     def forward(self, inputs, actions):
159 |         cnn1 = inputs[:, 0:8].contiguous()
160 |         cnn1 = cnn1.view(1,2,8)
161 |         cnn1 = self.conv1(cnn1)
162 |         cnn1 = cnn1.view(cnn1.size(0), -1)
163 |         cnn1 = self.out1(cnn1)
164 |         cnn1 = cnn1.view(2,8)
165 |         
166 |         cnn2 = inputs[:, 8:16].contiguous()
167 |         cnn2 = cnn2.view(1,2,8)
168 |         cnn2 = self.conv2(cnn2)
169 |         cnn2 = cnn2.view(cnn2.size(0), -1)
170 |         cnn2 = self.out2(cnn2)
171 |         cnn2 = cnn2.view(2,8)
172 |         
173 |         x = torch.cat((cnn1,cnn2), 1)
174 |         x = torch.cat((x,inputs[:,16:]), 1)        
175 |         
176 |         x = self.bn0(x)
177 |         x = F.tanh(self.linear1(x))
178 |         a = F.tanh(self.linear_action(actions))
179 |         x = torch.cat((x, a), 1)
180 |         x = F.tanh(self.linear2(x))
181 | 
182 |         V = self.V(x)
183 |         return V
184 | 
185 | 
186 | class DDPG_CNN(object):
187 |     def __init__(self, gamma, tau, hidden_size, num_inputs, action_space):
188 | 
189 |         self.num_inputs = num_inputs
190 |         self.action_space = action_space
191 | 
192 |         self.actor = Actor(hidden_size, self.num_inputs, self.action_space)
193 |         self.actor_target = Actor(hidden_size, self.num_inputs, self.action_space)
194 |         self.actor_optim = Adam(self.actor.parameters(), lr=1e-4)
195 | 
196 |         self.critic = Critic(hidden_size, self.num_inputs, self.action_space)
197 |         self.critic_target = Critic(hidden_size, self.num_inputs, self.action_space)
198 |         self.critic_optim = Adam(self.critic.parameters(), lr=1e-3)
199 | 
200 |         self.gamma = gamma
201 |         self.tau = tau
202 | 
203 |         hard_update(self.actor_target, self.actor)  # Make sure target is with the same weight
204 |         hard_update(self.critic_target, self.critic)
205 | 
206 | 
207 |     def select_action(self, state, exploration=None):
208 |         self.actor.eval()
209 |         mu = self.actor((Variable(state, volatile=True)))
210 |         self.actor.train()
211 |         mu = mu.data
212 |         if exploration is not None:
213 |             mu += torch.Tensor(exploration.noise())
214 | 
215 |         return mu.clamp(0, 4)
216 | 
217 | 
218 |     def update_parameters(self, batch):
219 |         state_batch = Variable(torch.cat(batch.state))
220 |         next_state_batch = Variable(torch.cat(batch.next_state), volatile=True)
221 |         action_batch = Variable(torch.cat(batch.action))
222 |         reward_batch = Variable(torch.cat(batch.reward))
223 |         mask_batch = Variable(torch.cat(batch.mask))
224 | 
225 |         next_action_batch = self.actor_target(next_state_batch)
226 |         next_state_action_values = self.critic_target(next_state_batch, next_action_batch)
227 | 
228 |         reward_batch = torch.unsqueeze(reward_batch, 1)
229 |         expected_state_action_batch = reward_batch + (self.gamma * next_state_action_values)
230 | 
231 |         self.critic_optim.zero_grad()
232 | 
233 |         state_action_batch = self.critic((state_batch), (action_batch))
234 | 
235 |         value_loss = MSELoss(state_action_batch, expected_state_action_batch)
236 |         value_loss.backward()
237 |         self.critic_optim.step()
238 | 
239 |         self.actor_optim.zero_grad()
240 | 
241 |         policy_loss = -self.critic((state_batch),self.actor((state_batch)))
242 | 
243 |         policy_loss = policy_loss.mean()
244 |         policy_loss.backward()
245 |         self.actor_optim.step()
246 | 
247 |         soft_update(self.actor_target, self.actor, self.tau)
248 |         soft_update(self.critic_target, self.critic, self.tau)
249 | 


--------------------------------------------------------------------------------
/env_test.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import gym
 3 | import numpy as np
 4 | from gym import wrappers
 5 | from gym import spaces
 6 | 
 7 | import torch
 8 | from ddpg_cnn import DDPG_CNN
 9 | from naf_cnn import NAF_CNN
10 | from normalized_actions import NormalizedActions
11 | from ounoise import OUNoise
12 | from replay_memory import ReplayMemory, Transition
13 | 
14 | 
15 | 
16 | class env():
17 |     """ """
18 |     def __init__(self):
19 |         self.observation_space = spaces.Box(np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]), np.array([float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf")]))
20 |         
21 |         self.action_space = spaces.Box(np.array([1]), np.array([4]))
22 | 
23 |     
24 | 
25 |     def reward(self):
26 |         rewards = 0;
27 |         for i in range(self.k):
28 |              temp = self.list[i]
29 |              for j in range(len(temp)):
30 |                  rewards = rewards + self.l * temp[j][0]
31 |         temp = self.list[-1]
32 |         for j in range(len(temp)):
33 |             rewards = rewards - self.m*temp[j][1] - self.n * temp[j][2] - self.p * (temp[j][3] - self.list[0][j][3])
34 |         return rewards
35 | 
36 |     """ reset env, return the initial state  """
37 |     def reset(self):
38 |         state = np.array([[1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8,1,2],[1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8,1,2]])
39 |         state = torch.FloatTensor(state)
40 |         return state
41 | 
42 |     """ action = [sub1_buff_size, sub2_buff_size] """
43 |     def step(self, action):
44 |         state = np.array([[1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8,1,2],[1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8,1,2]])
45 |         state = torch.FloatTensor(state)
46 |         reward = 1
47 |         done = 0
48 |         return state, reward, done
49 | 
50 | 
51 | def main():
52 |     my_env = env()
53 |     
54 |     agent = NAF_CNN(0.99, 0.001, 128,
55 |                       my_env.observation_space.shape[0], my_env.action_space)
56 |     
57 |     parser = argparse.ArgumentParser(description='PyTorch REINFORCE example')
58 |     parser.add_argument('--noise_scale', type=float, default=0.3, metavar='G',
59 |                     help='initial noise scale (default: 0.3)')
60 |     parser.add_argument('--final_noise_scale', type=float, default=0.3, metavar='G',
61 |                     help='final noise scale (default: 0.3)')      
62 |     parser.add_argument('--exploration_end', type=int, default=100, metavar='N',
63 |                     help='number of episodes with noise (default: 100)')
64 |     args = parser.parse_args()
65 |     
66 |     ounoise = OUNoise(my_env.action_space.shape[0])
67 |     ounoise.scale = (args.noise_scale - args.final_noise_scale) * max(0, args.exploration_end - 1) / args.exploration_end + args.final_noise_scale
68 |     ounoise.reset()
69 | 
70 |     state = my_env.reset()
71 |     i = 10
72 |     while i>0:
73 |         action = agent.select_action(state, ounoise)
74 |         print("action: {}".format(action))
75 |         next_state, reward, done = my_env.step(action)
76 |         if done:
77 |             break
78 |         print(reward)
79 |         i = i-1
80 |     
81 | 
82 |     
83 | 
84 | if __name__ == '__main__':
85 |     main()
86 | 


--------------------------------------------------------------------------------
/mpsched.c:
--------------------------------------------------------------------------------
  1 | #include <Python.h>
  2 | #include <linux/tcp.h>
  3 | 
  4 | static PyObject* persist_state(PyObject* self, PyObject* args)
  5 | {
  6 |   int fd;
  7 |   if(!PyArg_ParseTuple(args, "i", &fd)) {
  8 |     return NULL;
  9 |   }
 10 |   int val = MPTCP_INFO_FLAG_SAVE_MASTER;
 11 |   setsockopt(fd, SOL_TCP, MPTCP_INFO, &val, sizeof(val));
 12 |   return Py_BuildValue("i", fd);
 13 | }
 14 | 
 15 | static PyObject* get_meta_info(PyObject* self, PyObject* args)
 16 | {
 17 |     int fd;
 18 |     if(!PyArg_ParseTuple(args, "i", &fd)) {
 19 |       return NULL;
 20 |     }
 21 | 
 22 |     struct mptcp_info minfo;
 23 |     struct mptcp_meta_info meta_info;
 24 |     struct tcp_info initial;
 25 |     struct tcp_info others[NUM_SUBFLOWS];
 26 |     struct mptcp_sub_info others_info[NUM_SUBFLOWS];
 27 | 
 28 |     minfo.tcp_info_len = sizeof(struct tcp_info);
 29 |     minfo.sub_len = sizeof(others);
 30 |     minfo.meta_len = sizeof(struct mptcp_meta_info);
 31 |     minfo.meta_info = &meta_info;
 32 |     minfo.initial = &initial;
 33 |     minfo.subflows = &others;
 34 |     minfo.sub_info_len = sizeof(struct mptcp_sub_info);
 35 |     minfo.total_sub_info_len = sizeof(others_info);
 36 |     minfo.subflow_info = &others_info;
 37 | 
 38 |     socklen_t len = sizeof(minfo);
 39 | 
 40 |     getsockopt(fd, SOL_TCP, MPTCP_INFO, &minfo, &len);
 41 |     PyObject *list = PyList_New(0);
 42 |     PyList_Append(list, Py_BuildValue("I", meta_info.mptcpi_unacked));
 43 |     PyList_Append(list, Py_BuildValue("I", meta_info.mptcpi_retransmits));
 44 |     return list;
 45 | }
 46 | 
 47 | static PyObject* get_sub_info(PyObject* self, PyObject* args)
 48 | {
 49 |   int fd;
 50 |   if(!PyArg_ParseTuple(args, "i", &fd)) {
 51 |     return NULL;
 52 |   }
 53 | 
 54 |   struct mptcp_info minfo;
 55 |   struct mptcp_meta_info meta_info;
 56 |   struct tcp_info initial;
 57 |   struct tcp_info others[NUM_SUBFLOWS];
 58 |   struct mptcp_sub_info others_info[NUM_SUBFLOWS];
 59 | 
 60 |   minfo.tcp_info_len = sizeof(struct tcp_info);
 61 |   minfo.sub_len = sizeof(others);
 62 |   minfo.meta_len = sizeof(struct mptcp_meta_info);
 63 |   minfo.meta_info = &meta_info;
 64 |   minfo.initial = &initial;
 65 |   minfo.subflows = &others;
 66 |   minfo.sub_info_len = sizeof(struct mptcp_sub_info);
 67 |   minfo.total_sub_info_len = sizeof(others_info);
 68 |   minfo.subflow_info = &others_info;
 69 | 
 70 |   socklen_t len = sizeof(minfo);
 71 | 
 72 |   getsockopt(fd, SOL_TCP, MPTCP_INFO, &minfo, &len);
 73 | 
 74 |   PyObject *list = PyList_New(0);
 75 |   int i;
 76 |   for(i=0; i < NUM_SUBFLOWS; i++){
 77 | 
 78 |     if(others[i].tcpi_state != 1)
 79 |       break;
 80 | 
 81 |     PyObject *subflows = PyList_New(0);
 82 |     PyList_Append(subflows, Py_BuildValue("I", others[i].tcpi_segs_out));
 83 |     PyList_Append(subflows, Py_BuildValue("I", others[i].tcpi_rtt));
 84 |     PyList_Append(subflows, Py_BuildValue("I", others[i].tcpi_snd_cwnd));
 85 |     //PyList_Append(subflows, Py_BuildValue("I", others[i].tcpi_unacked));
 86 |     //PyList_Append(subflows, Py_BuildValue("I", others[i].tcpi_total_retrans)); /* Packets which are "in flight"	*/
 87 | 
 88 |     PyList_Append(list, subflows);
 89 |   }
 90 |   return list;
 91 | }
 92 | 
 93 | 
 94 | static PyObject* set_seg(PyObject* self, PyObject* args)
 95 | {
 96 |   PyObject * listObj;
 97 |   if (! PyArg_ParseTuple( args, "O", &listObj ))
 98 |     return NULL;
 99 | 
100 |   long length = PyList_Size(listObj);
101 |   int fd = (int)PyLong_AsLong(PyList_GetItem(listObj, 0));
102 |   int i;
103 | 
104 |   struct mptcp_sched_info sched_info;
105 |   sched_info.len = length-1;
106 |   unsigned char quota[NUM_SUBFLOWS];
107 |   unsigned char segments[NUM_SUBFLOWS];
108 | 
109 |   sched_info.quota = &quota;
110 |   sched_info.num_segments = &segments;
111 | 
112 |   for(i=1; i<length; i++) {
113 |     PyObject* temp = PyList_GetItem(listObj, i);
114 |     long elem = PyLong_AsLong(temp);
115 | 
116 |     segments[i-1] = (unsigned char) elem;
117 |   }
118 | 
119 |   setsockopt(fd, SOL_TCP, MPTCP_SCHED_INFO, &sched_info, sizeof(sched_info));
120 | 
121 |   return Py_BuildValue("i", fd);
122 | }
123 | 
124 | static PyMethodDef Methods[] = {
125 |   {"persist_state", persist_state, METH_VARARGS, "persist mptcp subflows tate"},
126 |   {"get_meta_info", get_meta_info, METH_VARARGS, "get mptcp recv buff size"},
127 |   {"get_sub_info", get_sub_info, METH_VARARGS, "get mptcp subflows info"},
128 |   {"set_seg", set_seg, METH_VARARGS, "set num of segments in all mptcp subflows"},
129 |   {NULL, NULL, 0, NULL}
130 | };
131 | 
132 | static struct PyModuleDef Def = {
133 |   PyModuleDef_HEAD_INIT,
134 |   "mpsched",
135 |   "mpctp scheduler \"mysched\" adjuset args",
136 |   -1,
137 |   Methods
138 | };
139 | 
140 | PyMODINIT_FUNC PyInit_mpsched(void)
141 | {
142 |   return PyModule_Create(&Def);
143 | }
144 | 


--------------------------------------------------------------------------------
/mptcp_recv/info.c:
--------------------------------------------------------------------------------
 1 | #include <Python.h>
 2 | #include <linux/tcp.h>
 3 | 
 4 | static PyObject* persist_state(PyObject* self, PyObject* args)
 5 | {
 6 |   int fd;
 7 |   if(!PyArg_ParseTuple(args, "i", &fd)) {
 8 |     return NULL;
 9 |   }
10 |   int val = MPTCP_INFO_FLAG_SAVE_MASTER;
11 |   setsockopt(fd, SOL_TCP, MPTCP_INFO, &val, sizeof(val));
12 |   return Py_BuildValue("i", fd);
13 | }
14 | 
15 | static PyObject* get_info(PyObject* self, PyObject* args)
16 | {
17 |   int fd;
18 |   if(!PyArg_ParseTuple(args, "i", &fd)) {
19 |     return NULL;
20 |   }
21 | 
22 |   struct mptcp_info minfo;
23 |   struct mptcp_meta_info meta_info;
24 |   struct tcp_info initial;
25 |   struct tcp_info others[NUM_SUBFLOWS];
26 |   struct mptcp_sub_info others_info[NUM_SUBFLOWS];
27 | 
28 |   minfo.tcp_info_len = sizeof(struct tcp_info);
29 |   minfo.sub_len = sizeof(others);
30 |   minfo.meta_len = sizeof(struct mptcp_meta_info);
31 |   minfo.meta_info = &meta_info;
32 |   minfo.initial = &initial;
33 |   minfo.subflows = &others;
34 |   minfo.sub_info_len = sizeof(struct mptcp_sub_info);
35 |   minfo.total_sub_info_len = sizeof(others_info);
36 |   minfo.subflow_info = &others_info;
37 | 
38 |   socklen_t len = sizeof(minfo);
39 | 
40 |   getsockopt(fd, SOL_TCP, MPTCP_INFO, &minfo, &len);
41 | 
42 |   PyObject *list = PyList_New(0);
43 |   if(others[0].tcpi_state == 1)
44 |   {
45 |       PyList_Append(list, Py_BuildValue("I", others[0].tcpi_bytes_received));
46 |       PyList_Append(list, Py_BuildValue("I", others[1].tcpi_bytes_received));
47 |       PyList_Append(list, Py_BuildValue("I", meta_info.mptcpi_recv_ofo_buff));
48 |   }
49 |   return list;
50 | }
51 | 
52 | 
53 | static PyMethodDef Methods[] = {
54 |   {"persist_state", persist_state, METH_VARARGS, "persist mptcp subflows tate"},
55 |   {"get_info", get_info, METH_VARARGS, "get recv info"},
56 |   {NULL, NULL, 0, NULL}
57 | };
58 | 
59 | static struct PyModuleDef Def = {
60 |   PyModuleDef_HEAD_INIT,
61 |   "info",
62 |   "get recv info",
63 |   -1,
64 |   Methods
65 | };
66 | 
67 | PyMODINIT_FUNC PyInit_info(void)
68 | {
69 |   return PyModule_Create(&Def);
70 | }
71 | 


--------------------------------------------------------------------------------
/mptcp_recv/recv.py:
--------------------------------------------------------------------------------
  1 | import socket
  2 | import threading
  3 | import info
  4 | import time
  5 | 
  6 | 
  7 | class recv_thread(threading.Thread):
  8 | 
  9 |     def __init__(self, sock, buff_size=2048):
 10 |         threading.Thread.__init__(self)
 11 |         self.sock = sock
 12 |         self.buffer_size = buff_size
 13 | 
 14 |     def run(self):
 15 |         buff = self.sock.recv(self.buffer_size)
 16 |         filename = str(buff, encoding='utf8')
 17 |         fp = open(filename, 'wb')
 18 |         if not fp:
 19 |             print("open file error.\n")
 20 |             self.sock.send(bytes("open file error.", encoding='utf8'))
 21 |             pass
 22 |         else:
 23 |             self.sock.send(bytes("ok", encoding='utf8'))
 24 |             while(True):
 25 |                 buff = self.sock.recv(self.buffer_size)
 26 |                 if not buff:
 27 |                     break
 28 |                 else:
 29 |                     fp.write(buff)
 30 |             print("recieve file {} from sender finished.".format(filename))
 31 |             fp.close()
 32 | 
 33 | 
 34 | class record(object):
 35 |     """docstring for record."""
 36 |     def __init__(self, timestep=0.2, datafile="record"):
 37 |         self.data = []
 38 |         self.timestep = timestep
 39 |         self.datafile = datafile
 40 | 
 41 |     def save(self):
 42 |         lenth = len(self.data)
 43 |         with open(self.datafile, 'w') as f:
 44 |             f.write(str(self.timestep))
 45 |             f.write('\n')
 46 |             f.write(str(lenth))
 47 |             f.write('\n')
 48 |             for i in range(lenth):
 49 |                 f.write('%d %d %d\n' % (self.data[i][0], self.data[i][1], self.data[i][2]))
 50 |         f.close()
 51 | 
 52 |     def load(self, datafile):
 53 |         self.datafile = datafile
 54 |         try:
 55 |             f = open(datafile, 'r')
 56 |             self.timestep = float(f.readline())
 57 |             lenth = int(f.readline())
 58 |             for i in range(lenth):
 59 |                 s = f.readline().split(' ')
 60 |                 self.data.append([int(s[0]), int(s[1]), int(s[2])])
 61 |         finally:
 62 |             if f:
 63 |                 f.close()
 64 | 
 65 |     def put(self, recd):
 66 |         self.data.append(recd)
 67 | 
 68 |     def draw(self):
 69 |         pass
 70 | 
 71 | 
 72 | def main():
 73 |     server = socket.socket()
 74 |     host = '*'
 75 |     port = 6669
 76 |     server.bind((host, port))
 77 | 
 78 |     server.listen(1)
 79 |     num = 0;
 80 |     while True:
 81 |         c, addr = server.accept()
 82 |         print('connect addr : {}'.format(addr))
 83 |         fd = c.fileno()
 84 |         io = recv_thread(c)
 85 |         info.persist_state(fd)
 86 |         io.start()
 87 | 
 88 |         timestep = 0.2
 89 |         r = record(timestep=timestep, datafile="record{}".format(num))
 90 |         time.sleep(1)
 91 |         while True:
 92 |             time.sleep(timestep)
 93 |             data = info.get_info(fd)
 94 |             if len(data) == 0:
 95 |                 io.join()
 96 |                 break
 97 |             r.put(data)
 98 | 
 99 |         r.save()
100 |         num = num + 1
101 | 
102 | 
103 | if __name__ == '__main__':
104 |     main()
105 | 


--------------------------------------------------------------------------------
/mptcp_recv/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup, Extension
 2 | 
 3 | setup(name='info',
 4 |       ext_modules=[
 5 |           Extension('info',
 6 |                     ['info.c'],
 7 |                     include_dirs=['/usr/src/linux-headers-4.4.110-mptcp+/include/uapi', '/usr/src/linux-headers-4.4.110-mptcp+/include', '/usr/include/python3.5m'],
 8 |                     define_macros=[('NUM_SUBFLOWS', '2'), ('SOL_TCP', '6')]
 9 |                     )
10 |       ])
11 | 


--------------------------------------------------------------------------------
/mptcp_results/cal:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kallen666/MPTCP-Deep-Reinforcement-Learning/6fa1bf8eb8d39527c8bc91148730ba4120093523/mptcp_results/cal


--------------------------------------------------------------------------------
/mptcp_results/cal_throughput.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *            README
  3 |  * 该文件用于计算指定单位时间内，多个tcp数据字段的长度和，即吞吐量
  4 |  * 输入文件为 mptcplog.c程序的输出文件
  5 |  * 输入文件名后缀需为：_out.txt， 例如：s_2_2.cap_port0_59607_out.txt
  6 |  * 程序会自动遍历所在当前目录下具有该后缀的所有文件，并处理输出相应文件
  7 |  * 
  8 |  * 
  9 |  ***输入数据格式***
 10 |  * 第一列为时间戳，第二列为tcp数据字段长度, 两列之间为Tab键，即'\t'
 11 |  * 例如：
 12 | //时间戳             数据长度
 13 | 1436339533.121812	8
 14 | 1436339533.121852	8
 15 | 1436339533.127700	8
 16 | 1436339533.137741	8
 17 | 1436339533.137794	8
 18 | 1436339533.137803	8
 19 | 
 20 | ...
 21 | 
 22 |  ***输出数据格式***
 23 |  * 第一列为调整后的单位时间戳，第二列为该单位时间内的数据长度和
 24 |  * 例如单位时间指定为 0.01秒 (precision==2)
 25 |  * 那么由以上输入文件的数据得出的输出为：
 26 | //单位时间戳      求和后数据长度
 27 | 1436339533.12	24      //3*8=24
 28 | 1436339533.13	24		//两列之间用'\t'分开
 29 | 
 30 | 
 31 | 
 32 | encoding: utf-8
 33 | created time: 2015-07-10
 34 | 
 35 |   
 36 |  */
 37 | 
 38 | 
 39 | #include <stdio.h>
 40 | #include <stdlib.h>
 41 | #include <string.h>
 42 | #include <assert.h>
 43 | #include <dirent.h> //opendir
 44 | 
 45 | 
 46 | //precision用于指定时间戳的小数点后位数
 47 | //例如 precision == 2时， 单位时间为0.01
 48 | //更改precision的值，可调节精度
 49 | // 0 < precision < 小数部分长度
 50 | #define precision 2
 51 | 
 52 | //文件行最大长度
 53 | #define MAX_LINE_LEN 1024
 54 | 
 55 | //时间戳最大长度
 56 | #define MAX_TIME_LEN 64
 57 | 
 58 | int total_sum = 0;
 59 | 
 60 | void calculate(char* in_file_name, char* out_file_name)
 61 | {
 62 | 	FILE* fread = fopen(in_file_name,"r");
 63 | 	
 64 | 	//w+ 打开可读写文件，若文件存在则文件长度清为零，即该文件内容会消失。若文件不存在则建立该文件。
 65 | 	FILE* fwrite = fopen(out_file_name,"w+");
 66 | 	
 67 | 	char line[MAX_LINE_LEN];
 68 | 	
 69 | 	//记录之前时间，整数部分时间和precision部分的小数部分
 70 | 	char pre_time[MAX_TIME_LEN];
 71 | 	pre_time[0]='\0';
 72 | 	pre_time[MAX_TIME_LEN-1]='\0';
 73 | 	
 74 | 	//记录当前时间，整数部分时间和precision部分的小数部分
 75 | 	char current_time[MAX_TIME_LEN];
 76 | 	current_time[0]='\0';
 77 | 	current_time[MAX_TIME_LEN-1]='\0';
 78 | 	
 79 | 	
 80 | 	
 81 | 	
 82 | 	//注意：若数据长度过长，可能超过unsigned int范围
 83 | 	unsigned int current_data_len = 0;
 84 | 	unsigned int sum = 0;
 85 | 	
 86 | 	//时间长度，整数部分 + precision
 87 | 	int  time_len = 0;
 88 | 	
 89 | 	while(1)
 90 | 	{
 91 | 		//line_count++;
 92 | 		fgets(line, MAX_LINE_LEN, fread);
 93 | 		if (feof(fread))
 94 | 			break;
 95 | 		
 96 | 		//printf("line: %s\n", line);
 97 | 		
 98 | 		char* p1 = strchr(line, '.');
 99 | 		char* p2 = strchr(line, '\t');
100 | 		if (p1==NULL || p2==NULL)
101 | 		{
102 | 			if (feof(fread))
103 | 				break;
104 | 			
105 | 			printf("Input data format error! or end of file\n");
106 | 			fclose(fread);
107 | 			fclose(fwrite);
108 | 			assert(0);
109 | 		}
110 | 		
111 | 		//tcp数据字段长度
112 | 		current_data_len = atoi(p2+1);
113 | 		
114 | 		time_len = p1 + precision - line + 1;
115 | 		
116 | 		strncpy(current_time, line, time_len);
117 | 		current_time[time_len]='\0';
118 | 		
119 | 		//printf ("%s\n",current_time);
120 | 	
121 | 		int i=0;
122 | 		for (i=0; i<time_len; i++)
123 | 		{
124 | 			if (current_time[i]!=pre_time[i]) 
125 | 			{
126 | 				break;
127 | 			}
128 | 		}
129 | 		
130 | 		
131 | 		//在指定精度下，当前时间和之前时间一致
132 | 		//累加 sum
133 | 		if (i==time_len)
134 | 		{
135 | 			sum += current_data_len;
136 | 		}
137 | 		
138 | 		//两次时间不一致
139 | 		//写入 单位时间戳和sum
140 | 		else 
141 | 		{
142 | 			//若pre_time[0]=='\0'，则表示读入文件第一行时，两次时间不一致
143 | 			//忽略该次不一致
144 | 			//不等于'\0'时，写入文件
145 | 			if (pre_time[0]!='\0')
146 | 			{
147 | 				//写入 单位时间戳和sum
148 | 				fprintf(fwrite, "%s\t%d\n", pre_time, sum);
149 | 				total_sum += sum;
150 | 			}
151 | 			
152 | 			
153 | 			//更新pre_time
154 | 			//初始化 sum
155 | 			int i=0;
156 | 			for (i=0; i<time_len; i++)
157 | 			{
158 | 				pre_time[i] = current_time[i];
159 | 			}
160 | 			pre_time[time_len]='\0';
161 | 			sum = current_data_len;
162 | 		}
163 | 		//printf("%s.%s, %u\n",second, current_time, current_data_len);
164 | 		
165 | 		
166 | 	}
167 | 	//end of file 跳出循环
168 | 	//写入最后一次
169 | 	fprintf(fwrite, "%s\t%d\n", pre_time, sum);
170 | 	
171 | 	
172 | 	
173 | 	fclose(fread);
174 | 	fclose(fwrite);
175 | }
176 | 
177 | int main(int argc, char *argv[])
178 | {	
179 | 	struct dirent *pDirEntry = NULL;
180 |     DIR *pDir = NULL;
181 |     if( (pDir = opendir("./")) == NULL )
182 |     {
183 | 		printf("opendir failed!\n");
184 | 		return 1;
185 | 	}
186 |     else
187 |     {
188 | 		while( pDirEntry = readdir(pDir) )
189 | 		{
190 | 			char outfilename[64];
191 | 			
192 | 			
193 | 			//判断是否为指定类型的文件
194 | 			//该判断条件，仅判断是否含有指定字串，而不是后缀，可能存在问题
195 | 			if(strstr(pDirEntry->d_name, "_out.txt"))
196 | 			{
197 | 				int len = strlen(pDirEntry->d_name);
198 | 				len = len-8;
199 | 				strncpy(outfilename,pDirEntry->d_name,len);
200 | 				outfilename[len]='\0';
201 | 				strcat(outfilename,"_throughput.txt");
202 | 				
203 |                 printf("输入文件名：%s\t 输出文件名：%s\n",pDirEntry->d_name, outfilename);
204 | 				
205 | 				calculate(pDirEntry->d_name, outfilename);
206 | 				printf("total data byte: %d\n", total_sum);
207 | 				printf("------------------------------------\n");
208 | 				total_sum = 0;
209 | 			}
210 | 		}
211 | 		closedir(pDir);
212 | 
213 | 	}       
214 | 	
215 | 	
216 | 	
217 | 	return 0;
218 | }
219 | 
220 | 
221 | 
222 | 
223 | 
224 | 


--------------------------------------------------------------------------------
/mptcp_results/eth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kallen666/MPTCP-Deep-Reinforcement-Learning/6fa1bf8eb8d39527c8bc91148730ba4120093523/mptcp_results/eth


--------------------------------------------------------------------------------
/mptcp_results/mptcplog.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stddef.h>
  3 | #include <stdlib.h>
  4 | #include <time.h>
  5 | #include <ctype.h>
  6 | #include <string.h>
  7 | #include <pcap.h>
  8 | 
  9 | #include <dirent.h> //opendir
 10 | #include <assert.h>
 11 | 
 12 | #define SIZE 15120
 13 | 
 14 | 
 15 | /*
 16 |  *    readme
 17 |  * (1) install libpcap  参考： http://blog.csdn.net/qinggebuyao/article/details/7715843
 18 |  * (2) gcc -o programname filename.c -lpcap
 19 |  * (3) sudo ./programname 
 20 |  * (4) 如果有libpcap.so.1 链接错误，请建立软链接，参考：http://blog.csdn.net/cfjtaishan/article/details/7096085
 21 |  * 
 22 |  * 
 23 |  */
 24 | 
 25 | //最大tcp目标端口数，和mptcp子流数相关
 26 | #define MAX_PORT_NUM 20
 27 | 
 28 | //ip address 用于过滤数据包
 29 | #define dst_ip "114.212.80.16" 
 30 | 
 31 | //#define src_ip "192.168.1.106" //wifi2
 32 | //#define src_ip "172.27.152.212"
 33 | //#define src_ip "192.168.1.104" //wifi1
 34 | //#define src_ip "114.212.85.15" //lte
 35 | #define src_ip "114.212.83.34" //eth
 36 | 
 37 | 
 38 | unsigned short tcp_dst_port_table[MAX_PORT_NUM];
 39 | int port_count = 0;
 40 | 
 41 | //查表/填表
 42 | int search_table(unsigned short port)
 43 | {
 44 | 	int i=0;
 45 | 	for (i=0; i<port_count; i++)
 46 | 	{
 47 | 		if (port == tcp_dst_port_table[i])
 48 | 			break;
 49 | 	}
 50 | 	if (i==port_count)
 51 | 	{
 52 | 		port_count++;
 53 | 		tcp_dst_port_table[i] = port;
 54 | 		return 0;
 55 | 	}
 56 | 	else
 57 | 	{
 58 | 		return 1;
 59 | 	}
 60 | }
 61 | 
 62 | //打印tcp目标端口号表
 63 | void display_table()
 64 | {
 65 | 	int i=0;
 66 | 	for (i=0; i<port_count; i++)
 67 | 	{
 68 | 		printf("port %d: %u\n",i, tcp_dst_port_table[i]);
 69 | 	}
 70 | }
 71 | 
 72 | //表初始化
 73 | void table_init()
 74 | {
 75 | 	port_count = 0;
 76 | }
 77 | 
 78 | //遍历文件，获取tcp目的端口号信息
 79 | void load_table(char* filename)
 80 | {
 81 | 	int reval;   
 82 | 	struct pcap_pkthdr* header;
 83 | 	u_char *pkt_data;
 84 | 	pcap_t *pcap_handle;
 85 | 	char error_content[PCAP_ERRBUF_SIZE];
 86 | 	
 87 | 	pcap_handle=pcap_open_offline(filename,error_content);
 88 | 	reval = pcap_next_ex(pcap_handle, &header, (const u_char **)&pkt_data);
 89 | 	
 90 | 	
 91 | 	unsigned int src,dst;
 92 | 	
 93 | 	inet_aton(src_ip,(struct in_addr*)&src);
 94 | 	inet_aton(dst_ip,(struct in_addr*)&dst);
 95 | 	
 96 | 	
 97 | 	unsigned short tcp_dst_port;
 98 | 	
 99 | 	
100 | 	while(pkt_data!=NULL && reval > 0)
101 | 	{
102 | 		
103 | 		if(*(pkt_data+12)==0x08 && *(pkt_data+13)==0x00 && *(unsigned int*)(pkt_data+26)==src && *(unsigned int*)(pkt_data+30)==dst) //MAC type==IP ip.src==client, ip.dst=server
104 | 		{
105 | 
106 | 			if(*(pkt_data+23)==0x06) // TCP 6
107 | 			{
108 | 				tcp_dst_port = *(unsigned short*)(pkt_data+36);		
109 | 				tcp_dst_port = ntohs(tcp_dst_port);
110 | 				search_table(tcp_dst_port);
111 | 			}
112 | 		}
113 | 		
114 | 		reval = pcap_next_ex(pcap_handle, &header, (const u_char **)&pkt_data);
115 | 		
116 | 	}
117 | 	
118 | 	
119 | 	
120 | 	return;
121 | }
122 | 
123 | //打印tcp时间戳和数据字段长度（区分不同的tcp目的端口）
124 | int get_tcp_info(char *filename)
125 | {
126 | 	int reval;   
127 | 	struct pcap_pkthdr* header;
128 | 	u_char *pkt_data;
129 | 	pcap_t *pcap_handle;
130 | 	char error_content[PCAP_ERRBUF_SIZE];
131 | 	
132 | 	char tempname[256];
133 | 	char dst_port_info[32];
134 | 	
135 | 	FILE *fd;
136 | 	
137 | 	int i=0;
138 | 	
139 | 	//循环输出多个端口tcp 数据信息到不同的文件中
140 | 	for (i=0; i<port_count; i++)
141 | 	{
142 | 		
143 | 		dst_port_info[0]='\0';
144 | 		tempname[0] = '\0';
145 | 	
146 | 		sprintf(dst_port_info, "_port%d_out.txt",i);
147 | 		strcpy(tempname,filename);
148 | 		strcat(tempname,dst_port_info);
149 | 	
150 | 		printf ("port%d 端口号: %u, 输出文件：%s\n",i, tcp_dst_port_table[i],tempname);
151 | 	
152 | 		
153 | 	
154 | 		//w+ 打开可读写文件，若文件存在则文件长度清为零，即该文件内容会消失。若文件不存在则建立该文件。
155 | 		fd=fopen(tempname,"w+");
156 | 		pcap_handle=pcap_open_offline(filename,error_content);
157 | 	
158 | 		if(!pcap_handle)
159 | 		{
160 | 			fprintf(stderr, "Error in opening savefile, %s, for reading: %s\n",filename,error_content );
161 | 			exit(1);
162 | 		}
163 | 		reval = pcap_next_ex(pcap_handle, &header, (const u_char **)&pkt_data);
164 | 
165 | 	
166 | 		unsigned int src,dst;
167 | 	
168 | 		inet_aton(src_ip,(struct in_addr*)&src);
169 | 		inet_aton(dst_ip,(struct in_addr*)&dst);
170 | 	
171 | 	
172 | 		unsigned short tcp_dst_port;
173 | 		double time_stamp = 0.0;
174 | 		
175 | 		while(pkt_data!=NULL && reval > 0)
176 | 		{
177 | 		
178 | 			if(*(pkt_data+12)==0x08 && *(pkt_data+13)==0x00 && *(unsigned int*)(pkt_data+26)==src && *(unsigned int*)(pkt_data+30)==dst) //MAC type==IP ip.src==client, ip.dst=server
179 | 			{
180 | 				if(*(pkt_data+23)==0x06) // TCP 6
181 | 				{	
182 | 					tcp_dst_port = *(unsigned short*)(pkt_data+36);		
183 | 					tcp_dst_port = ntohs(tcp_dst_port);
184 | 					
185 | 					if(tcp_dst_port == tcp_dst_port_table[i])
186 | 					{
187 | 						double usec = (double)header->ts.tv_usec;
188 | 						usec /= 1000000;   
189 | 						time_stamp = header->ts.tv_sec + usec;
190 | 					  
191 | 						//fprintf(fd,"%ld.%ld\t",header->ts.tv_sec,header->ts.tv_usec); // has a bug
192 | 						
193 | 						fprintf(fd, "%lf\t", time_stamp);
194 |                                                 //total frame len - tcp header len - lens before tcp frame
195 | 						fprintf(fd,"%d\n",header->len-*(pkt_data+46)/4-34);
196 |                                                 //printf("%d, %d\n", header->len, *(pkt_data+46)/4);
197 | 						// check port 
198 | 						//fprintf(fd,"%d\n",tcp_dst_port);
199 | 					}
200 | 				}
201 | 			}		
202 | 			reval = pcap_next_ex(pcap_handle, &header, (const u_char **)&pkt_data);	
203 | 		}
204 | 		fclose(fd);
205 | 	
206 | 	}
207 | }
208 | 
209 | int main(int argc, char *argv[])
210 | {
211 | 
212 | 	struct dirent *pDirEntry = NULL;
213 |     DIR *pDir = NULL;
214 |     if( (pDir = opendir("./")) == NULL )
215 |     {
216 | 		printf("opendir failed!\n");
217 | 		return 1;
218 | 	}
219 |     else
220 |     {
221 | 		while( pDirEntry = readdir(pDir) )
222 | 		{
223 | 			//判断是否为指定类型的.cap file
224 | 			/*
225 | 			if (pDirEntry->d_name[strlen(pDirEntry->d_name)-4]=='.'
226 | 				&& pDirEntry->d_name[strlen(pDirEntry->d_name)-3]=='c'
227 | 				&& pDirEntry->d_name[strlen(pDirEntry->d_name)-2]=='a' 
228 | 				&& pDirEntry->d_name[strlen(pDirEntry->d_name)-1]=='p')
229 | 			*/	
230 | 			//.pcap files
231 | 			
232 | 			if (pDirEntry->d_name[strlen(pDirEntry->d_name)-5]=='.'
233 |     			&&pDirEntry->d_name[strlen(pDirEntry->d_name)-4]=='p'
234 | 				&& pDirEntry->d_name[strlen(pDirEntry->d_name)-3]=='c'
235 | 				&& pDirEntry->d_name[strlen(pDirEntry->d_name)-2]=='a' 
236 | 				&& pDirEntry->d_name[strlen(pDirEntry->d_name)-1]=='p')
237 | 			{
238 |                 printf("输入文件：%s\n",pDirEntry->d_name);
239 | 				
240 | 				load_table(pDirEntry->d_name);
241 | 				get_tcp_info(pDirEntry->d_name);
242 | 				//display_table();
243 | 				table_init();
244 | 				printf("------------------------------------\n");
245 | 			}
246 | 		}
247 | 		closedir(pDir);
248 | 
249 | 	}       
250 | 	
251 | 	
252 | 	
253 | 	return 0;
254 | }
255 | 


--------------------------------------------------------------------------------
/mptcp_results/tcpdump.sh:
--------------------------------------------------------------------------------
1 | sudo tcpdump -i wlp3s0 -w wifi1.pcap &
2 | sudo tcpdump -i wlx485d601fa31c -w wifi2.pcap &
3 | sudo tcpdump -i enp2s0 -w eth.pcap &
4 | 


--------------------------------------------------------------------------------
/mptcp_results/w0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kallen666/MPTCP-Deep-Reinforcement-Learning/6fa1bf8eb8d39527c8bc91148730ba4120093523/mptcp_results/w0


--------------------------------------------------------------------------------
/mptcp_results/w1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kallen666/MPTCP-Deep-Reinforcement-Learning/6fa1bf8eb8d39527c8bc91148730ba4120093523/mptcp_results/w1


--------------------------------------------------------------------------------
/mptcp_results/w2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kallen666/MPTCP-Deep-Reinforcement-Learning/6fa1bf8eb8d39527c8bc91148730ba4120093523/mptcp_results/w2


--------------------------------------------------------------------------------
/naf.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.optim import Adam
  6 | from torch.autograd import Variable
  7 | import torch.nn.functional as F
  8 | 
  9 | MSELoss = nn.MSELoss()
 10 | 
 11 | def soft_update(target, source, tau):
 12 |     for target_param, param in zip(target.parameters(), source.parameters()):
 13 |         target_param.data.copy_(target_param.data * (1.0 - tau) + param.data * tau)
 14 | 
 15 | def hard_update(target, source):
 16 |     for target_param, param in zip(target.parameters(), source.parameters()):
 17 |         target_param.data.copy_(param.data)
 18 | 
 19 | class Policy(nn.Module):
 20 | 
 21 |     def __init__(self, hidden_size, num_inputs, action_space):
 22 |         super(Policy, self).__init__()
 23 |         self.action_space = action_space
 24 |         num_outputs = action_space.shape[0]
 25 | 
 26 |         self.bn0 = nn.BatchNorm1d(num_inputs)
 27 |         self.bn0.weight.data.fill_(1)
 28 |         self.bn0.bias.data.fill_(0)
 29 | 
 30 |         self.linear1 = nn.Linear(num_inputs, hidden_size)
 31 |         self.bn1 = nn.BatchNorm1d(hidden_size)
 32 |         self.bn1.weight.data.fill_(1)
 33 |         self.bn1.bias.data.fill_(0)
 34 | 
 35 |         self.linear2 = nn.Linear(hidden_size, hidden_size)
 36 |         self.bn2 = nn.BatchNorm1d(hidden_size)
 37 |         self.bn2.weight.data.fill_(1)
 38 |         self.bn2.bias.data.fill_(0)
 39 | 
 40 |         self.V = nn.Linear(hidden_size, 1)
 41 |         self.V.weight.data.mul_(0.1)
 42 |         self.V.bias.data.mul_(0.1)
 43 | 
 44 |         self.mu = nn.Linear(hidden_size, num_outputs)
 45 |         self.mu.weight.data.mul_(0.1)
 46 |         self.mu.bias.data.mul_(0.1)
 47 | 
 48 |         self.L = nn.Linear(hidden_size, num_outputs ** 2)
 49 |         self.L.weight.data.mul_(0.1)
 50 |         self.L.bias.data.mul_(0.1)
 51 | 
 52 |         self.tril_mask = Variable(torch.tril(torch.ones(
 53 |             num_outputs, num_outputs), diagonal=-1).unsqueeze(0))
 54 |         self.diag_mask = Variable(torch.diag(torch.diag(
 55 |             torch.ones(num_outputs, num_outputs))).unsqueeze(0))
 56 | 
 57 |     def forward(self, inputs):
 58 |         x, u = inputs
 59 |         x = self.bn0(x)
 60 |         x = F.tanh(self.linear1(x))
 61 |         x = F.tanh(self.linear2(x))
 62 | 
 63 |         V = self.V(x)
 64 |         mu = F.tanh(self.mu(x))
 65 | 
 66 |         Q = None
 67 |         if u is not None:
 68 |             num_outputs = mu.size(1)
 69 |             L = self.L(x).view(-1, num_outputs, num_outputs)
 70 |             L = L * \
 71 |                 self.tril_mask.expand_as(
 72 |                     L) + torch.exp(L) * self.diag_mask.expand_as(L)
 73 |             P = torch.bmm(L, L.transpose(2, 1))
 74 | 
 75 |             u_mu = (u - mu).unsqueeze(2)
 76 |             A = -0.5 * \
 77 |                 torch.bmm(torch.bmm(u_mu.transpose(2, 1), P), u_mu)[:, :, 0]
 78 | 
 79 |             Q = A + V
 80 | 
 81 |         return mu, Q, V
 82 | 
 83 | 
 84 | class NAF:
 85 | 
 86 |     def __init__(self, gamma, tau, hidden_size, num_inputs, action_space):
 87 |         self.action_space = action_space
 88 |         self.num_inputs = num_inputs
 89 |         
 90 |         self.model = Policy(hidden_size, num_inputs, action_space)
 91 |         self.target_model = Policy(hidden_size, num_inputs, action_space)
 92 |         self.optimizer = Adam(self.model.parameters(), lr=1e-3)
 93 | 
 94 |         self.gamma = gamma
 95 |         self.tau = tau
 96 | 
 97 |         hard_update(self.target_model, self.model)
 98 | 
 99 |     def select_action(self, state, exploration=None):
100 |         self.model.eval()
101 |         mu, _, _ = self.model((Variable(state, volatile=True), None))
102 |         self.model.train()
103 |         mu = mu.data
104 |         if exploration is not None:
105 |             mu += torch.Tensor(exploration.noise())
106 | 
107 |         return mu.clamp(1, 4)
108 | 
109 |     def update_parameters(self, batch):
110 |         state_batch = Variable(torch.cat(batch.state))
111 |         next_state_batch = Variable(torch.cat(batch.next_state), volatile=True)
112 |         action_batch = Variable(torch.cat(batch.action))
113 |         reward_batch = Variable(torch.cat(batch.reward))
114 |         mask_batch = Variable(torch.cat(batch.mask))
115 | 
116 |         _, _, next_state_values = self.target_model((next_state_batch, None))
117 | 
118 |         reward_batch = (torch.unsqueeze(reward_batch, 1))
119 |         expected_state_action_values = reward_batch + (next_state_values * self.gamma)
120 | 
121 |         _, state_action_values, _ = self.model((state_batch, action_batch))
122 | 
123 |         loss = MSELoss(state_action_values, expected_state_action_values)
124 | 
125 |         self.optimizer.zero_grad()
126 |         loss.backward()
127 |         torch.nn.utils.clip_grad_norm(self.model.parameters(), 1)
128 |         self.optimizer.step()
129 | 
130 |         soft_update(self.target_model, self.model, self.tau)
131 | 


--------------------------------------------------------------------------------
/naf_cnn.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.optim import Adam
  6 | from torch.autograd import Variable
  7 | import torch.nn.functional as F
  8 | 
  9 | MSELoss = nn.MSELoss()
 10 | 
 11 | def soft_update(target, source, tau):
 12 |     for target_param, param in zip(target.parameters(), source.parameters()):
 13 |         target_param.data.copy_(target_param.data * (1.0 - tau) + param.data * tau)
 14 | 
 15 | def hard_update(target, source):
 16 |     for target_param, param in zip(target.parameters(), source.parameters()):
 17 |         target_param.data.copy_(param.data)
 18 | 
 19 | class Policy(nn.Module):
 20 | 
 21 |     def __init__(self, hidden_size, num_inputs, action_space):
 22 |         super(Policy, self).__init__()
 23 |         self.action_space = action_space
 24 |         num_outputs = action_space.shape[0]
 25 |         
 26 |         self.conv1 = nn.Sequential(         
 27 |             nn.Conv1d(
 28 |                 in_channels=2,              
 29 |                 out_channels=16,            
 30 |                 kernel_size=4,             
 31 |                 stride=1,                   
 32 |                 padding=1,                 
 33 |             ),                              
 34 |             nn.ReLU(),                      
 35 |             nn.MaxPool1d(kernel_size=2),    
 36 |         )
 37 |         self.out1 = nn.Linear(48, 16)   
 38 |         
 39 |         self.conv2 = nn.Sequential(         
 40 |             nn.Conv1d(
 41 |                 in_channels=2,              
 42 |                 out_channels=16,            
 43 |                 kernel_size=4,             
 44 |                 stride=1,                   
 45 |                 padding=1,                 
 46 |             ),                              
 47 |             nn.ReLU(),                      
 48 |             nn.MaxPool1d(kernel_size=2),    
 49 |         )
 50 |         self.out2 = nn.Linear(48, 16)  
 51 |         
 52 |         self.conv3 = nn.Sequential(         
 53 |             nn.Conv1d(
 54 |                 in_channels=2,              
 55 |                 out_channels=16,            
 56 |                 kernel_size=4,             
 57 |                 stride=1,                   
 58 |                 padding=1,                 
 59 |             ),                              
 60 |             nn.ReLU(),                      
 61 |             nn.MaxPool1d(kernel_size=2),    
 62 |         )
 63 |         self.out3 = nn.Linear(48, 16)  
 64 | 
 65 |         self.bn0 = nn.BatchNorm1d(num_inputs)
 66 |         self.bn0.weight.data.fill_(1)
 67 |         self.bn0.bias.data.fill_(0)
 68 | 
 69 |         self.linear1 = nn.Linear(num_inputs, hidden_size)
 70 |         self.bn1 = nn.BatchNorm1d(hidden_size)
 71 |         self.bn1.weight.data.fill_(1)
 72 |         self.bn1.bias.data.fill_(0)
 73 | 
 74 |         self.linear2 = nn.Linear(hidden_size, hidden_size)
 75 |         self.bn2 = nn.BatchNorm1d(hidden_size)
 76 |         self.bn2.weight.data.fill_(1)
 77 |         self.bn2.bias.data.fill_(0)
 78 | 
 79 |         self.V = nn.Linear(hidden_size, 1)
 80 |         self.V.weight.data.mul_(0.1)
 81 |         self.V.bias.data.mul_(0.1)
 82 | 
 83 |         self.mu = nn.Linear(hidden_size, num_outputs)
 84 |         self.mu.weight.data.mul_(0.1)
 85 |         self.mu.bias.data.mul_(0.1)
 86 | 
 87 |         self.L = nn.Linear(hidden_size, num_outputs ** 2)
 88 |         self.L.weight.data.mul_(0.1)
 89 |         self.L.bias.data.mul_(0.1)
 90 | 
 91 |         self.tril_mask = Variable(torch.tril(torch.ones(
 92 |             num_outputs, num_outputs), diagonal=-1).unsqueeze(0))
 93 |         self.diag_mask = Variable(torch.diag(torch.diag(
 94 |             torch.ones(num_outputs, num_outputs))).unsqueeze(0))
 95 | 
 96 |     def forward(self, inputs):
 97 |         inputs, u = inputs
 98 |         cnn1 = inputs[:, 0:8].contiguous()
 99 |         cnn1 = cnn1.view(1,2,8)
100 |         cnn1 = self.conv1(cnn1)
101 |         cnn1 = cnn1.view(cnn1.size(0), -1)
102 |         cnn1 = self.out1(cnn1)
103 |         cnn1 = cnn1.view(2,8)
104 |         
105 |         cnn2 = inputs[:, 8:16].contiguous()
106 |         cnn2 = cnn2.view(1,2,8)
107 |         cnn2 = self.conv2(cnn2)
108 |         cnn2 = cnn2.view(cnn2.size(0), -1)
109 |         cnn2 = self.out2(cnn2)
110 |         cnn2 = cnn2.view(2,8)
111 |         
112 |         cnn3 = inputs[:, 16:24].contiguous()
113 |         cnn3 = cnn3.view(1,2,8)
114 |         cnn3 = self.conv3(cnn3)
115 |         cnn3 = cnn3.view(cnn3.size(0), -1)
116 |         cnn3 = self.out3(cnn3)
117 |         cnn3 = cnn3.view(2,8)
118 |         
119 |         x = torch.cat((cnn1,cnn2), 1)
120 |         x = torch.cat((x,cnn3), 1)
121 |         x = torch.cat((x,inputs[:,24:]), 1)    
122 |         
123 |         
124 |         x = self.bn0(x)
125 |         x = F.tanh(self.linear1(x))
126 |         x = F.tanh(self.linear2(x))
127 | 
128 |         V = self.V(x)
129 |         mu = F.tanh(self.mu(x))
130 | 
131 |         Q = None
132 |         if u is not None:
133 |             num_outputs = mu.size(1)
134 |             L = self.L(x).view(-1, num_outputs, num_outputs)
135 |             L = L * \
136 |                 self.tril_mask.expand_as(
137 |                     L) + torch.exp(L) * self.diag_mask.expand_as(L)
138 |             P = torch.bmm(L, L.transpose(2, 1))
139 | 
140 |             u_mu = (u - mu).unsqueeze(2)
141 |             A = -0.5 * \
142 |                 torch.bmm(torch.bmm(u_mu.transpose(2, 1), P), u_mu)[:, :, 0]
143 | 
144 |             Q = A + V
145 | 
146 |         return mu, Q, V
147 | 
148 | 
149 | class NAF_CNN:
150 | 
151 |     def __init__(self, gamma, tau, hidden_size, num_inputs, action_space):
152 |         self.action_space = action_space
153 |         self.num_inputs = num_inputs
154 |         
155 |         self.model = Policy(hidden_size, num_inputs, action_space)
156 |         self.target_model = Policy(hidden_size, num_inputs, action_space)
157 |         self.optimizer = Adam(self.model.parameters(), lr=1e-3)
158 | 
159 |         self.gamma = gamma
160 |         self.tau = tau
161 | 
162 |         hard_update(self.target_model, self.model)
163 | 
164 |     def select_action(self, state, exploration=None):
165 |         self.model.eval()
166 |         mu, _, _ = self.model((Variable(state, volatile=True), None))
167 |         self.model.train()
168 |         mu = mu.data
169 |         if exploration is not None:
170 |             mu += torch.Tensor(exploration.noise())
171 | 
172 |         return mu.clamp(1, 4)
173 | 
174 |     def update_parameters(self, batch):
175 |         state_batch = Variable(torch.cat(batch.state))
176 |         next_state_batch = Variable(torch.cat(batch.next_state), volatile=True)
177 |         action_batch = Variable(torch.cat(batch.action))
178 |         reward_batch = Variable(torch.cat(batch.reward))
179 |         mask_batch = Variable(torch.cat(batch.mask))
180 | 
181 |         _, _, next_state_values = self.target_model((next_state_batch, None))
182 | 
183 |         reward_batch = (torch.unsqueeze(reward_batch, 1))
184 |         expected_state_action_values = reward_batch + (next_state_values * self.gamma)
185 | 
186 |         _, state_action_values, _ = self.model((state_batch, action_batch))
187 | 
188 |         loss = MSELoss(state_action_values, expected_state_action_values)
189 | 
190 |         self.optimizer.zero_grad()
191 |         loss.backward()
192 |         torch.nn.utils.clip_grad_norm(self.model.parameters(), 1)
193 |         self.optimizer.step()
194 | 
195 |         soft_update(self.target_model, self.model, self.tau)
196 | 


--------------------------------------------------------------------------------
/normalized_actions.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | 
 3 | 
 4 | class NormalizedActions(gym.ActionWrapper):
 5 | 
 6 |     def _action(self, action):
 7 |         action = (action + 1) / 2  # [-1, 1] => [0, 1]
 8 |         action *= (self.action_space.high - self.action_space.low)
 9 |         action += self.action_space.low
10 |         return action
11 | 
12 |     def _reverse_action(self, action):
13 |         action -= self.action_space.low
14 |         action /= (self.action_space.high - self.action_space.low)
15 |         action = action * 2 - 1
16 |         return actions
17 | 


--------------------------------------------------------------------------------
/old_main.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import math
  3 | from collections import namedtuple
  4 | from itertools import count
  5 | import sys
  6 | import gym
  7 | import numpy as np
  8 | from gym import wrappers
  9 | 
 10 | import torch
 11 | from ddpg import DDPG
 12 | from naf import NAF
 13 | from normalized_actions import NormalizedActions
 14 | from ounoise import OUNoise
 15 | from replay_memory import ReplayMemory, Transition
 16 | 
 17 | parser = argparse.ArgumentParser(description='PyTorch REINFORCE example')
 18 | parser.add_argument('--algo', default='NAF',
 19 |                     help='algorithm to use: DDPG | NAF')
 20 | 
 21 | parser.add_argument('--gamma', type=float, default=0.99, metavar='G',
 22 |                     help='discount factor for reward (default: 0.99)')
 23 |                     
 24 | parser.add_argument('--tau', type=float, default=0.001, metavar='G',
 25 |                     help='discount factor for model (default: 0.001)')
 26 |                     
 27 | parser.add_argument('--noise_scale', type=float, default=0.3, metavar='G',
 28 |                     help='initial noise scale (default: 0.3)')
 29 |                     
 30 | parser.add_argument('--final_noise_scale', type=float, default=0.3, metavar='G',
 31 |                     help='final noise scale (default: 0.3)')
 32 |                     
 33 | parser.add_argument('--exploration_end', type=int, default=100, metavar='N',
 34 |                     help='number of episodes with noise (default: 100)')
 35 |                     
 36 | parser.add_argument('--seed', type=int, default=4, metavar='N',
 37 |                     help='random seed (default: 4)')
 38 |                     
 39 | parser.add_argument('--batch_size', type=int, default=128, metavar='N',
 40 |                     help='batch size (default: 128)')
 41 |                     
 42 | parser.add_argument('--num_steps', type=int, default=1000, metavar='N',
 43 |                     help='max episode length (default: 1000)')
 44 |                     
 45 | parser.add_argument('--num_episodes', type=int, default=1000, metavar='N',
 46 |                     help='number of episodes (default: 1000)')
 47 |                     
 48 | parser.add_argument('--hidden_size', type=int, default=128, metavar='N',
 49 |                     help='number of hidden size (default: 128)')
 50 |                     
 51 | parser.add_argument('--updates_per_step', type=int, default=5, metavar='N',
 52 |                     help='model updates per simulator step (default: 5)')
 53 |                     
 54 | parser.add_argument('--replay_size', type=int, default=1000000, metavar='N',
 55 |                     help='size of replay buffer (default: 1000000)')
 56 |                     
 57 | parser.add_argument('--render', action='store_true',
 58 |                     help='render the environment')
 59 |                     
 60 | args = parser.parse_args()
 61 | 
 62 | env_name = 'Pendulum-v0'
 63 | env = NormalizedActions(gym.make(env_name))
 64 | 
 65 | #env = wrappers.Monitor(env, '/tmp/{}-experiment'.format(env_name), force=True)
 66 | 
 67 | env.seed(args.seed)
 68 | torch.manual_seed(args.seed)
 69 | np.random.seed(args.seed)
 70 | if args.algo == "NAF":
 71 |     agent = NAF(args.gamma, args.tau, args.hidden_size,
 72 |                       env.observation_space.shape[0], env.action_space)
 73 | else:
 74 |     agent = DDPG(args.gamma, args.tau, args.hidden_size,
 75 |                       env.observation_space.shape[0], env.action_space)
 76 | 
 77 | memory = ReplayMemory(args.replay_size)
 78 | ounoise = OUNoise(env.action_space.shape[0])
 79 | 
 80 | rewards = []
 81 | for i_episode in range(args.num_episodes):
 82 |     if i_episode < args.num_episodes // 2:
 83 |         state = torch.Tensor([env.reset()])
 84 |         ounoise.scale = (args.noise_scale - args.final_noise_scale) * max(0, args.exploration_end -
 85 |                                                                           i_episode) / args.exploration_end + args.final_noise_scale
 86 |         ounoise.reset()
 87 |         episode_reward = 0
 88 |         for t in range(args.num_steps):
 89 |             print("state: {}\n".format(state))
 90 |             action = agent.select_action(state, ounoise)
 91 |             print("action: {}\n".format(action))
 92 |             next_state, reward, done, _ = env.step(action.numpy()[0])
 93 |             episode_reward += reward
 94 |             
 95 |             action = torch.Tensor(action)
 96 |             mask = torch.Tensor([not done])
 97 |             next_state = torch.Tensor([next_state])
 98 |             reward = torch.Tensor([reward])
 99 | #            sys.exit(0)
100 | #            if i_episode % 10 == 0:
101 | #                env.render()
102 | 
103 |             memory.push(state, action, mask, next_state, reward)
104 | 
105 |             state = next_state
106 | 
107 |             if len(memory) > args.batch_size * 5:
108 |                 for _ in range(args.updates_per_step):
109 |                     transitions = memory.sample(args.batch_size)
110 |                     batch = Transition(*zip(*transitions))
111 | 
112 |                     agent.update_parameters(batch)
113 | 
114 |             if done:
115 | 
116 |                 break
117 |         rewards.append(episode_reward)
118 |     else:
119 |         state = torch.Tensor([env.reset()])
120 |         episode_reward = 0
121 |         for t in range(args.num_steps):
122 |             action = agent.select_action(state)
123 | 
124 |             next_state, reward, done, _ = env.step(action.numpy()[0])
125 |             episode_reward += reward
126 | 
127 |             next_state = torch.Tensor([next_state])
128 | 
129 | #            if i_episode % 10 == 0:
130 | #                env.render()
131 | 
132 |             state = next_state
133 |             if done:
134 |                 break
135 | 
136 |         rewards.append(episode_reward)
137 |     print("Episode: {}, noise: {}, reward: {}, average reward: {}".format(i_episode, ounoise.scale,
138 |                                                                           rewards[-1], np.mean(rewards[-100:])))
139 |     
140 | env.close()
141 | 


--------------------------------------------------------------------------------
/ounoise.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | # from https://github.com/songrotek/DDPG/blob/master/ou_noise.py
 5 | class OUNoise:
 6 | 
 7 |     def __init__(self, action_dimension, scale=0.1, mu=0, theta=0.15, sigma=0.2):
 8 |         self.action_dimension = action_dimension
 9 |         self.scale = scale
10 |         self.mu = mu
11 |         self.theta = theta
12 |         self.sigma = sigma
13 |         self.state = np.ones(self.action_dimension) * self.mu
14 |         self.reset()
15 | 
16 |     def reset(self):
17 |         self.state = np.ones(self.action_dimension) * self.mu
18 | 
19 |     def noise(self):
20 |         x = self.state
21 |         dx = self.theta * (self.mu - x) + self.sigma * np.random.randn(len(x))
22 |         self.state = x + dx
23 |         return self.state * self.scale
24 | 


--------------------------------------------------------------------------------
/out_log_0325:
--------------------------------------------------------------------------------
  1 | state: 
  2 |     28  20565    781      1      1
  3 |      4  16354    749      1      1
  4 | [torch.FloatTensor of size 2x5]
  5 | 
  6 |  ounoise: <ounoise.OUNoise object at 0x7f3cd5313cf8>
  7 | next state: 
  8 |     10  12537    694      1      1
  9 |      3   9980    404      1      1
 10 | [torch.FloatTensor of size 2x5]
 11 | 
 12 | state: 
 13 |     28  20565    781      1      1
 14 |      4  16354    749      1      1
 15 | [torch.FloatTensor of size 2x5]
 16 | 
 17 |  ounoise: <ounoise.OUNoise object at 0x7f3cd5313cf8>
 18 | next state: 
 19 |      6  14712    711      1      1
 20 |      3   8424    571      1      1
 21 | [torch.FloatTensor of size 2x5]
 22 | 
 23 | state: 
 24 |     28  20565    781      1      1
 25 |      4  16354    749      1      1
 26 | [torch.FloatTensor of size 2x5]
 27 | 
 28 |  ounoise: <ounoise.OUNoise object at 0x7f3cd5313cf8>
 29 | next state: 
 30 |      4  26811    676      1      1
 31 |      3  15007    573      1      1
 32 | [torch.FloatTensor of size 2x5]
 33 | 
 34 | state: 
 35 |     28  20565    781      1      1
 36 |      4  16354    749      1      1
 37 | [torch.FloatTensor of size 2x5]
 38 | 
 39 |  ounoise: <ounoise.OUNoise object at 0x7f3cd5313cf8>
 40 | next state: 
 41 |     14  30643    573      1      1
 42 |      4   5557    583      1      1
 43 | [torch.FloatTensor of size 2x5]
 44 | 
 45 | state: 
 46 |     28  20565    781      1      1
 47 |      4  16354    749      1      1
 48 | [torch.FloatTensor of size 2x5]
 49 | 
 50 |  ounoise: <ounoise.OUNoise object at 0x7f3cd5313cf8>
 51 | next state: 
 52 |     45  40812    115      1      1
 53 |      2  14306    951      1      1
 54 | [torch.FloatTensor of size 2x5]
 55 | 
 56 | state: 
 57 |     28  20565    781      1      1
 58 |      4  16354    749      1      1
 59 | [torch.FloatTensor of size 2x5]
 60 | 
 61 |  ounoise: <ounoise.OUNoise object at 0x7f3cd5313cf8>
 62 | update --------------------
 63 | update --------------------
 64 | update --------------------
 65 | update --------------------
 66 | update --------------------
 67 | next state: 
 68 |     45  40812      1      1      1
 69 |      2   7986    959      1      1
 70 | [torch.FloatTensor of size 2x5]
 71 | 
 72 | state: 
 73 |     28  20565    781      1      1
 74 |      4  16354    749      1      1
 75 | [torch.FloatTensor of size 2x5]
 76 | 
 77 |  ounoise: <ounoise.OUNoise object at 0x7f3cd5313cf8>
 78 | update --------------------
 79 | update --------------------
 80 | update --------------------
 81 | update --------------------
 82 | update --------------------
 83 | next state: 
 84 |     45  40812      1      1      1
 85 |      4   8664    895      1      1
 86 | [torch.FloatTensor of size 2x5]
 87 | 
 88 | state: 
 89 |     28  20565    781      1      1
 90 |      4  16354    749      1      1
 91 | [torch.FloatTensor of size 2x5]
 92 | 
 93 |  ounoise: <ounoise.OUNoise object at 0x7f3cd5313cf8>
 94 | update --------------------
 95 | update --------------------
 96 | update --------------------
 97 | update --------------------
 98 | update --------------------
 99 | next state: 
100 |     45  40812      0      1      1
101 |      2   5386   1159      1      1
102 | [torch.FloatTensor of size 2x5]
103 | 
104 | state: 
105 |     28  20565    781      1      1
106 |      4  16354    749      1      1
107 | [torch.FloatTensor of size 2x5]
108 | 
109 |  ounoise: <ounoise.OUNoise object at 0x7f3cd5313cf8>
110 | update --------------------
111 | update --------------------
112 | update --------------------
113 | update --------------------
114 | update --------------------
115 | next state: 
116 |     45  40812      1      1      1
117 |      3   5939   1074      1      1
118 | [torch.FloatTensor of size 2x5]
119 | 
120 | state: 
121 |     28  20565    781      1      1
122 |      4  16354    749      1      1
123 | [torch.FloatTensor of size 2x5]
124 | 
125 |  ounoise: <ounoise.OUNoise object at 0x7f3cd5313cf8>
126 | update --------------------
127 | update --------------------
128 | update --------------------
129 | update --------------------
130 | update --------------------
131 | next state: 
132 |     45  40812      0      1      1
133 |      2   3583   1323      1      1
134 | [torch.FloatTensor of size 2x5]
135 | 
136 | state: 
137 |     28  20565    781      1      1
138 |      4  16354    749      1      1
139 | [torch.FloatTensor of size 2x5]
140 | 
141 |  ounoise: <ounoise.OUNoise object at 0x7f3cd5313cf8>
142 | update --------------------
143 | update --------------------
144 | update --------------------
145 | update --------------------
146 | update --------------------
147 | next state: 
148 |     45  40812      0      1      1
149 |      5  12816   1143      1      1
150 | [torch.FloatTensor of size 2x5]
151 | 
152 | state: 
153 |     28  20565    781      1      1
154 |      4  16354    749      1      1
155 | [torch.FloatTensor of size 2x5]
156 | 
157 |  ounoise: <ounoise.OUNoise object at 0x7f3cd5313cf8>
158 | 
159 | 


--------------------------------------------------------------------------------
/replay_memory.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from collections import namedtuple
 3 | 
 4 | # Taken from
 5 | # https://github.com/pytorch/tutorials/blob/master/Reinforcement%20(Q-)Learning%20with%20PyTorch.ipynb
 6 | 
 7 | Transition = namedtuple(
 8 |     'Transition', ('state', 'action', 'mask', 'next_state', 'reward'))
 9 | 
10 | 
11 | class ReplayMemory(object):
12 | 
13 |     def __init__(self, capacity):
14 |         self.capacity = capacity
15 |         self.memory = []
16 |         self.position = 0
17 | 
18 |     def push(self, *args):
19 |         """Saves a transition."""
20 |         if len(self.memory) < self.capacity:
21 |             self.memory.append(None)
22 |         self.memory[self.position] = Transition(*args)
23 |         self.position = (self.position + 1) % self.capacity
24 | 
25 |     def sample(self, batch_size):
26 |         return random.sample(self.memory, batch_size)
27 | 
28 |     def __len__(self):
29 |         return len(self.memory)
30 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup, Extension
 2 | 
 3 | setup(name='mpsched',
 4 |       ext_modules=[
 5 |           Extension('mpsched',
 6 |                     ['mpsched.c'],
 7 |                     include_dirs=['/usr/src/linux-headers-4.4.110-mptcp+/include/uapi', '/usr/src/linux-headers-4.4.110-mptcp+/include', '/usr/include/python3.5m'],
 8 |                     define_macros=[('NUM_SUBFLOWS', '2'), ('SOL_TCP', '6')]
 9 |                     )
10 |       ])
11 | 


--------------------------------------------------------------------------------
/tc.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | ETH0=enp0s31f6 #有线网卡
 3 | WIFI=wlx485d605766e1 #无线网卡
 4 | 
 5 | ETH_RTT=50ms
 6 | ETH_RATE=7040kbit
 7 | 
 8 | WIFI_RTT=70ms
 9 | WIFI_RATE=9185kbit
10 | 
11 | tc qd del dev $ETH0 root
12 | tc qd add dev $ETH0 root handle 1:0 tbf rate $ETH_RATE latency 50ms burst 1540
13 | tc qd add dev $ETH0 parent 1:0 handle 10:0 netem delay $ETH_RTT
14 | 
15 | tc qd del dev $WIFI root
16 | tc qd add dev $WIFI root handle 1:0 tbf rate $WIFI_RATE latency 50ms burst 1540
17 | tc qd add dev $WIFI parent 1:0 handle 10:0 netem delay $WIFI_RTT
18 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import threading
  2 | import time
  3 | import socket
  4 | from configparser import ConfigParser
  5 | import mpsched
  6 | 
  7 | 
  8 | class io_thread(threading.Thread):
  9 | 
 10 |     def __init__(self, sock, filename, buffer_size):
 11 |         threading.Thread.__init__(self)
 12 |         self.sock = sock
 13 |         self.buffer_size = buffer_size
 14 |         self.filename = filename
 15 | 
 16 |     def run(self):
 17 |         fp = open(self.filename, 'rb')
 18 |         self.sock.send(bytes(self.filename, encoding='utf8'))
 19 |         buff = self.sock.recv(16)
 20 |         print(str(buff, encoding='utf8'))
 21 | 
 22 |         while(True):
 23 |             buff = fp.read(self.buffer_size)
 24 |             if not buff:
 25 |                 break
 26 |             self.sock.send(buff)
 27 |         self.sock.close()
 28 |         fp.close()
 29 | 
 30 | 
 31 | class env():
 32 |     """ """
 33 |     def __init__(self, fd, buff_size, time, k, l, n, p):
 34 |         self.fd = fd
 35 |         self.buff_size = buff_size
 36 |         self.k = k  ##对以往k个时间段的观测
 37 |         self.l = l  ##吞吐量的奖励因子
 38 |         #self.m = m  ##RTT惩罚因子
 39 |         self.n = n  ##缓冲区膨胀惩罚因子
 40 |         self.p = p  ##重传惩罚因子
 41 |         self.time = time
 42 |         self.last = []
 43 |         self.tp = [[], []]
 44 |         self.rtt = [[], []]
 45 |         self.cwnd = [[], []]
 46 |         self.rr = 0
 47 |         self.count = 1
 48 |         self.recv_buff_size = 0
 49 | 
 50 | 
 51 |     """ adjust info to get goodput """
 52 |     def adjust(self, state):
 53 |         for j in range(len(state)):
 54 |             self.tp[j].pop(0)
 55 |             self.tp[j].append(state[j][0]-self.last[j][0])
 56 |             self.rtt[j].pop(0)
 57 |             self.rtt[j].append(state[j][1]-self.last[j][1])
 58 |             self.cwnd[j].pop(0)
 59 |             self.cwnd[j].append(state[j][2])
 60 |         self.last = state
 61 |         mate = mpsched.get_meta_info(self.fd)
 62 |         self.recv_buff_size = mate[0]
 63 |         self.rr = mate[1] - self.rr
 64 |         return [self.tp[0] + self.rtt[0] + self.cwnd[0] + [self.recv_buff_size, self.rr], self.tp[1] + self.rtt[1] + self.cwnd[1]+ [self.recv_buff_size, self.rr]]
 65 | 
 66 |     def reward(self):
 67 |         rewards = self.l * (sum(self.tp[0]) + sum(self.tp[1]))
 68 |         #rewards = rewards - self.m * (sum(self.rtt[0]) + sum(self.rtt[1]))
 69 |         rewards = rewards + self.n * self.recv_buff_size
 70 |         rewards = rewards - self.p * self.rr
 71 |         return rewards
 72 | 
 73 |     """ reset env, return the initial state  """
 74 |     def reset(self):
 75 |         mpsched.persist_state(self.fd)
 76 |         time.sleep(1)
 77 |         self.last = mpsched.get_sub_info(self.fd)
 78 | 
 79 |         for i in range(self.k):
 80 |             subs = mpsched.get_sub_info(self.fd)
 81 |             for j in range(len(subs)):
 82 |                  self.tp[j].append(subs[j][0] - self.last[j][0])
 83 |                  self.rtt[j].append(subs[j][1] - self.last[j][1])
 84 |                  self.cwnd[j].append(subs[j][2])
 85 |             self.last = subs
 86 |             time.sleep(self.time)
 87 |         mate = mpsched.get_meta_info(self.fd)
 88 |         self.recv_buff_size = mate[0]
 89 |         self.rr = mate[1]
 90 |         return [self.tp[0] + self.rtt[0] + self.cwnd[0] + [self.recv_buff_size, self.rr], self.tp[1] + self.rtt[1] + self.cwnd[1]+ [self.recv_buff_size, self.rr]]
 91 | 
 92 |     """ action = [sub1_buff_size, sub2_buff_size] """
 93 |     def step(self, action):
 94 |         # A = [self.fd, action[0], action[1]]
 95 |         # mpsched.set_seg(A)
 96 |         time.sleep(self.time)
 97 |         state_nxt = mpsched.get_sub_info(self.fd)
 98 |         done = False
 99 |         if len(state_nxt) == 0:
100 |             done = True
101 |         self.count = self.count + 1
102 |         return self.adjust(state_nxt), self.reward(), self.count, self.recv_buff_size, done
103 | 
104 | 
105 | def main():
106 |     cfg = ConfigParser()
107 |     cfg.read('config.ini')
108 | 
109 |     IP = cfg.get('server', 'ip')
110 |     PORT = cfg.getint('server', 'port')
111 |     FILE = cfg.get('file', 'file')
112 |     SIZE = cfg.getint('env', 'buffer_size')
113 |     TIME = cfg.getfloat('env', 'time')
114 | 
115 |     sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
116 |     sock.connect((IP, PORT))
117 |     fd = sock.fileno()
118 |     io = io_thread(sock=sock, filename=FILE, buffer_size=SIZE)
119 |     mpsched.persist_state(fd)
120 | 
121 |     io.start()
122 |     my_env = env(fd=fd, buff_size=SIZE, time=TIME, k=4, l=0.01, n=0.03, p=0.05)
123 | 
124 |     state = my_env.reset()
125 |     while True:
126 |         action = []
127 |         state_nxt, reward, count, recv_buff_size, done = my_env.step(action)
128 |         if done:
129 |             break
130 |         print(reward)
131 |         print(recv_buff_size)
132 |     print(count)
133 | 
134 |     io.join()
135 | 
136 | 
137 | if __name__ == '__main__':
138 |     main()
139 | 


--------------------------------------------------------------------------------
/train_2.py:
--------------------------------------------------------------------------------
  1 | import threading
  2 | import time
  3 | import socket
  4 | from configparser import ConfigParser
  5 | import mpsched
  6 | 
  7 | 
  8 | import argparse
  9 | import gym
 10 | import numpy as np
 11 | from gym import wrappers
 12 | from gym import spaces
 13 | 
 14 | import torch
 15 | from ddpg_cnn import DDPG_CNN
 16 | from naf_cnn import NAF_CNN
 17 | from normalized_actions import NormalizedActions
 18 | from ounoise import OUNoise
 19 | from replay_memory import ReplayMemory, Transition
 20 | 
 21 | 
 22 | class io_thread(threading.Thread):
 23 | 
 24 |     def __init__(self, sock, filename, buffer_size):
 25 |         threading.Thread.__init__(self)
 26 |         self.sock = sock
 27 |         self.buffer_size = buffer_size
 28 |         self.filename = filename
 29 | 
 30 |     def run(self):
 31 |         fp = open(self.filename, 'rb')
 32 |         self.sock.send(bytes(self.filename, encoding='utf8'))
 33 |         buff = self.sock.recv(16)
 34 |         print(str(buff, encoding='utf8'))
 35 | 
 36 |         while(True):
 37 |             buff = fp.read(self.buffer_size)
 38 |             if not buff:
 39 |                 break
 40 |             self.sock.send(buff)
 41 |         self.sock.close()
 42 |         fp.close()
 43 | 
 44 | 
 45 | class env():
 46 |     """ """
 47 |     def __init__(self, fd, buff_size, time, k, l, n, p):
 48 |         self.fd = fd
 49 |         self.buff_size = buff_size
 50 |         self.k = k  ##对以往k个时间段的观测
 51 |         self.l = l  ##吞吐量的奖励因子
 52 |         #self.m = m  ##RTT惩罚因子
 53 |         self.n = n  ##缓冲区膨胀惩罚因子
 54 |         self.p = p  ##重传惩罚因子
 55 |         self.time = time
 56 |         self.last = []
 57 |         self.tp = [[], []]
 58 |         self.rtt = [[], []]
 59 |         self.cwnd = [[], []]
 60 |         self.rr = 0
 61 |         self.count = 1
 62 |         self.recv_buff_size = 0
 63 |         
 64 |         self.observation_space = spaces.Box(np.array([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]), np.array([float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf"),float("inf")]))
 65 |         
 66 |         self.action_space = spaces.Box(np.array([1]), np.array([4]))
 67 | 
 68 |     
 69 |     """ adjust info to get goodput """
 70 |     def adjust(self, state):
 71 |         for j in range(len(state)):
 72 |             self.tp[j].pop(0)
 73 |             self.tp[j].append(state[j][0]-self.last[j])
 74 |             self.rtt[j].pop(0)
 75 |             self.rtt[j].append(state[j][1])
 76 |             self.cwnd[j].pop(0)
 77 |             self.cwnd[j].append(state[j][2])
 78 |         self.last = [x[0] for x in state]
 79 |         mate = mpsched.get_meta_info(self.fd)
 80 |         self.recv_buff_size = mate[0]
 81 |         self.rr = mate[1] - self.rr
 82 |         return [self.tp[0] + self.rtt[0] + self.cwnd[0] + [self.recv_buff_size, self.rr], self.tp[1] + self.rtt[1] + self.cwnd[1]+ [self.recv_buff_size, self.rr]]
 83 | 
 84 |     def reward(self):
 85 |         rewards = self.l * (sum(self.tp[0]) + sum(self.tp[1]))
 86 |         #rewards = rewards - self.m * (sum(self.rtt[0]) + sum(self.rtt[1]))
 87 |         rewards = rewards + self.n * self.recv_buff_size
 88 |         rewards = rewards - self.p * self.rr
 89 |         return rewards
 90 | 
 91 |     """ reset env, return the initial state  """
 92 |     def reset(self):
 93 |         mpsched.persist_state(self.fd)
 94 |         time.sleep(1)
 95 |         self.last = [x[0] for x in mpsched.get_sub_info(self.fd)]
 96 | 
 97 |         for i in range(self.k):
 98 |             subs = mpsched.get_sub_info(self.fd)
 99 |             for j in range(len(subs)):
100 |                  self.tp[j].append(subs[j][0]-self.last[j])
101 |                  self.rtt[j].append(subs[j][1])
102 |                  self.cwnd[j].append(subs[j][2])
103 |             self.last = [x[0] for x in subs]
104 |             time.sleep(self.time)
105 |         mate = mpsched.get_meta_info(self.fd)
106 |         self.recv_buff_size = mate[0]
107 |         self.rr = mate[1]
108 |         return [self.tp[0] + self.rtt[0] + self.cwnd[0] + [self.recv_buff_size, self.rr], self.tp[1] + self.rtt[1] + self.cwnd[1]+ [self.recv_buff_size, self.rr]]
109 | 
110 |     """ action = [sub1_buff_size, sub2_buff_size] """
111 |     def step(self, action):
112 |         # A = [self.fd, action[0], action[1]]
113 |         # mpsched.set_seg(A)
114 |         time.sleep(self.time)
115 |         state_nxt = mpsched.get_sub_info(self.fd)
116 |         done = False
117 |         if len(state_nxt) == 0:
118 |             done = True
119 |         self.count = self.count + 1
120 |         return self.adjust(state_nxt), self.reward(), self.count, self.recv_buff_size, done
121 | 
122 | 
123 | def main():
124 |     cfg = ConfigParser()
125 |     cfg.read('config.ini')
126 | 
127 |     IP = cfg.get('server', 'ip')
128 |     PORT = cfg.getint('server', 'port')
129 |     FILE = cfg.get('file', 'file')
130 |     SIZE = cfg.getint('env', 'buffer_size')
131 |     TIME = cfg.getfloat('env', 'time')
132 |     EPISODE = cfg.getint('env', 'episode')
133 | 
134 |     parser = argparse.ArgumentParser(description='PyTorch REINFORCE example')
135 | 
136 |     parser.add_argument('--gamma', type=float, default=0.99, metavar='G',
137 |                     help='discount factor for reward (default: 0.99)')
138 |     parser.add_argument('--tau', type=float, default=0.001, metavar='G',
139 |                     help='discount factor for model (default: 0.001)')
140 |                     
141 |     parser.add_argument('--noise_scale', type=float, default=0.3, metavar='G',
142 |                     help='initial noise scale (default: 0.3)')
143 |     parser.add_argument('--final_noise_scale', type=float, default=0.3, metavar='G',
144 |                     help='final noise scale (default: 0.3)')      
145 |     parser.add_argument('--exploration_end', type=int, default=100, metavar='N',
146 |                     help='number of episodes with noise (default: 100)')
147 |                     
148 |     parser.add_argument('--hidden_size', type=int, default=128, metavar='N',
149 |                     help='number of hidden size (default: 128)')
150 |     parser.add_argument('--replay_size', type=int, default=1000000, metavar='N',
151 |                     help='size of replay buffer (default: 1000000)')
152 |     parser.add_argument('--updates_per_step', type=int, default=5, metavar='N',
153 |                     help='model updates per simulator step (default: 5)')
154 |     parser.add_argument('--batch_size', type=int, default=64, metavar='N',
155 |                     help='batch size (default: 128)')
156 | 
157 | 
158 |     sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
159 |     sock.connect((IP, PORT))
160 |     fd = sock.fileno()
161 |     my_env = env(fd=fd, buff_size=SIZE, time=TIME, k=8, l=0.01, n=0.03, p=0.05)
162 |     mpsched.persist_state(fd)
163 | 
164 |     args = parser.parse_args()
165 |     agent = NAF_CNN(args.gamma, args.tau, args.hidden_size,
166 |                       my_env.observation_space.shape[0], my_env.action_space)
167 |     memory = ReplayMemory(args.replay_size)
168 |     ounoise = OUNoise(my_env.action_space.shape[0])
169 | 
170 |     rewards = []
171 |     times = []
172 |     for i_episode in range(EPISODE):
173 |         if (i_episode < 0.9*EPISODE):  # training
174 |             io = io_thread(sock=sock, filename=FILE, buffer_size=SIZE)
175 |             io.start()
176 |             
177 |             state=my_env.reset()
178 |             
179 |             ounoise.scale = (args.noise_scale - args.final_noise_scale) * max(0, args.exploration_end - i_episode) / args.exploration_end + args.final_noise_scale
180 |             ounoise.reset()
181 |             print(state)
182 |             episode_reward = 0
183 |             while True:
184 |                 state = torch.FloatTensor(state)
185 |                 #print("state: {}\n ounoise: {}".format(state, ounoise.scale))
186 |                 action = agent.select_action(state, ounoise)
187 |                 #print("action: {}".format(action))
188 |                 next_state, reward, count, recv_buff_size, done = my_env.step(action)
189 |                 #print("buff size: ",recv_buff_size)
190 |                 #print("reward: ", reward)
191 |                 episode_reward += reward
192 |                 
193 |                 action = torch.FloatTensor(action)
194 |                 mask = torch.Tensor([not done])
195 |                 next_state = torch.FloatTensor(next_state)
196 |                 reward = torch.FloatTensor([float(reward)]) 
197 |                 memory.push(state, action, mask, next_state, reward)
198 |                 
199 |                 state = next_state
200 | 
201 |                 if len(memory) > args.batch_size * 5:
202 |                     for _ in range(args.updates_per_step):
203 |                         transitions = memory.sample(args.batch_size)
204 |                         batch = Transition(*zip(*transitions))
205 |                         #print("update",10*'--')
206 |                         agent.update_parameters(batch)
207 |                     
208 |                 if done:
209 |                     break
210 |             rewards.append(episode_reward)
211 |             io.join()
212 |         else:  # testing
213 |             io = io_thread(sock=sock, filename=FILE, buffer_size=SIZE)
214 |             io.start()
215 |             state=my_env.reset()
216 |             episode_reward = 0
217 |             start_time = time.time()
218 |             while True:
219 |                 state = torch.FloatTensor(state)
220 |                 #print("state: {}\n".format(state))
221 |                 action = agent.select_action(state)
222 |                 #print("action: {}".format(action))
223 |                 next_state, reward, count, done = my_env.step(action)
224 |                 episode_reward += reward
225 |                 state = next_state
226 | 
227 |                 if done:
228 |                     break
229 |             rewards.append(episode_reward)
230 |             times.append(str(time.time() - start_time) + "\n")
231 |             io.join()
232 |         #print("Episode: {}, noise: {}, reward: {}, average reward: {}".format(i_episode, ounoise.scale, rewards[-1], np.mean(rewards[-100:])))
233 |         fo = open("times.txt", "w")
234 |         fo.writelines(lines)
235 |         fo.close()
236 |             
237 |     sock.close()
238 | 
239 | 
240 | if __name__ == '__main__':
241 |     main()
242 | 


--------------------------------------------------------------------------------
/train_test.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | import time
 3 | import socket
 4 | from configparser import ConfigParser
 5 | import mpsched
 6 | 
 7 | 
 8 | class io_thread(threading.Thread):
 9 | 
10 |     def __init__(self, sock, filename, buffer_size):
11 |         threading.Thread.__init__(self)
12 |         self.sock = sock
13 |         self.buffer_size = buffer_size
14 |         self.filename = filename
15 | 
16 |     def run(self):
17 |         fp = open(self.filename, 'rb')
18 |         self.sock.send(bytes(self.filename, encoding='utf8'))
19 |         buff = self.sock.recv(16)
20 |         print(str(buff, encoding='utf8'))
21 | 
22 |         while(True):
23 |             buff = fp.read(self.buffer_size)
24 |             if not buff:
25 |                 break
26 |             self.sock.send(buff)
27 |         self.sock.close()
28 |         fp.close()
29 | 
30 | 
31 | class record(object):
32 |     """docstring for record."""
33 |     def __init__(self, timestep=0.2, datafile="record"):
34 |         self.data = []
35 |         self.timestep = timestep
36 |         self.datafile = datafile
37 | 
38 |     def save(self):
39 |         lenth = len(self.data)
40 |         with open(self.datafile, 'w') as f:
41 |             f.write(str(self.timestep))
42 |             f.write('\n')
43 |             f.write(str(lenth))
44 |             f.write('\n')
45 |             for i in range(lenth):
46 |                 f.write('%d %d\n' % (self.data[i][0][1], self.data[i][1][1]))
47 |         f.close()
48 | 
49 |     def put(self, recd):
50 |         self.data.append(recd)
51 | 
52 |     def draw(self):
53 |         pass
54 | 
55 | 
56 | def main():
57 |     cfg = ConfigParser()
58 |     cfg.read('config.ini')
59 | 
60 |     IP = cfg.get('server', 'ip')
61 |     PORT = cfg.getint('server', 'port')
62 |     FILE = cfg.get('file', 'file')
63 |     SIZE = cfg.getint('env', 'buffer_size')
64 |     timestep = cfg.getfloat('env', 'time')
65 | 
66 |     sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
67 |     sock.connect((IP, PORT))
68 |     fd = sock.fileno()
69 | 
70 |     io = io_thread(sock=sock, filename='./256mb.dat', buffer_size=SIZE)
71 |     
72 | 
73 |     start_time = time.time()
74 |     io.start()
75 |     io.join();
76 |    
77 |     end_time = time.time()
78 |     print("completion time: ", end_time - start_time)
79 | 
80 | 
81 | if __name__ == '__main__':
82 |     main()
83 | 


--------------------------------------------------------------------------------