├── README.md
├── api_opt.py
├── api_opt_native.py
├── heuristic_api.py
└── optimize_mip_single_peer_split.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Cascara
 2 | This repository has APIs needed to run Cascara and its variants given traffic demands, link costs and topology information as input. We implemented Cascara using both `CVXPY` with `GUROBI` as the solver and native `gurobipy` API bindings. 
 3 | 
 4 | * The native implementation is in `api_opt_native.py` and the `CVXPY` version is in `api_native.py`. The native implementation gave us control over setting parameters of `GUROBI`. 
 5 | 
 6 | * Most likely the set of parameters that give you the fastest results from the solver will be slightly different from ours. This is highly dependent on your traffic demands and link capacities.
 7 | * `solve_optimization` is the main function in both versions. It takes demands as a dictionary along with other details about the network. It sets up the problem formulation with relevant constraints and objective. Once solutions are found, it dumps them into a `CSV`.
 8 | 
 9 | * We have removed all references to our input/output files. So, those variables will not be available. Replace them with your own file names. Please get in touch with us for more information.
10 | 
11 | ## Notes on Cascara-online
12 | * The `heuristic_api.py` has all the functionality required for Cascara-online, Entact, GFA and related algorithms.
13 | 
14 | * For confidentiality reasons we have removed the `alpha` and `beta` values from the code. These can be found by a parameter sweep. Check the technical report for details.
15 | 


--------------------------------------------------------------------------------
/api_opt.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import os
  3 | import json
  4 | from datetime import datetime
  5 | import pandas as pd
  6 | from consts import *
  7 | import pdb
  8 | import csv
  9 | import numpy as np
 10 | from cvxpy import *
 11 | 
 12 | def initialize_opti_vars_previous_work(flow_values, router_set):
 13 |     '''
 14 |     The difference between this initialization of optimization
 15 |     variables and and the one in 'initialize_opti_vars' is that
 16 |     this assigns a vector of variables for each router and every
 17 |     index of the router vector is for a timeslot in the month.
 18 |     These per-router vectors allow the use of built-in convex
 19 |     functions like 'sum_largest' which only accept vectors and not
 20 |     lists of scalars.
 21 |     https://www.cvxpy.org/tutorial/functions/index.html
 22 |     '''
 23 |     num_vars = len(flow_values)
 24 |     rtr_to_var_vector = {}
 25 |     for rtr in router_set:
 26 |         rtr_var = Variable(num_vars)
 27 |         rtr_to_var_vector[rtr] = rtr_var
 28 |         
 29 |     optimization_variables = {}
 30 |     tses = sorted(flow_values.keys())
 31 |     for ind, ts in enumerate(tses):
 32 |         if ts not in optimization_variables:
 33 |             optimization_variables[ts] = {}
 34 |         for rtr in router_set:
 35 |             optimization_variables[ts][rtr] = rtr_to_var_vector[rtr][ind]
 36 | 
 37 |     return optimization_variables, rtr_to_var_vector
 38 | 
 39 | def initialize_opti_vars(flow_values, router_set):
 40 |     optimization_variables = {}
 41 |     delta_vars = {}
 42 |     for ts in flow_values:
 43 |         if ts not in optimization_variables:
 44 |             optimization_variables[ts] = {}
 45 |             delta_vars[ts] = {}
 46 |         for rtr in router_set:
 47 |             optimization_variables[ts][rtr] = Variable()
 48 |             delta_vars[ts][rtr] = Bool()
 49 | 
 50 |     z_vars = {}
 51 |     for rtr in router_set:
 52 |         z_vars[rtr] = Variable()
 53 |     return optimization_variables, delta_vars, z_vars
 54 | 
 55 | def get_routers_from_assignments(flows_per_ts):
 56 |     rtr_set = set()
 57 |     for ts in flows_per_ts:
 58 |         rtr_set.update(flows_per_ts[ts].keys())
 59 |     return list(rtr_set)
 60 | 
 61 | def from_ts_to_rtr_key(ts_key, router_set):
 62 |     rtr_key = {}
 63 |     num_tses = len(ts_key)
 64 |     for ts in ts_key:
 65 |         for rtr in router_set:
 66 |             if rtr not in rtr_key:
 67 |                 rtr_key[rtr] = []
 68 |             if rtr in ts_key[ts]:
 69 |                 rtr_key[rtr].append(ts_key[ts][rtr])
 70 |             else:
 71 |                rtr_key[rtr].append(0)     
 72 |     return rtr_key
 73 | 
 74 | def get_flow_assignments(optimization_variables):
 75 |     optimal_assignments = {}
 76 |     for ts in optimization_variables:
 77 |         if ts not in optimal_assignments:
 78 |             optimal_assignments[ts] = {}
 79 |         for rtr in optimization_variables[ts]:
 80 |             optimal_assignments[ts][rtr] = optimization_variables[ts][rtr].value
 81 |     return optimal_assignments
 82 | 
 83 | 
 84 | def get_positivity_constraints(optimization_variables, z_vars):
 85 |     constraints = []
 86 |     for op_key_ts in optimization_variables:
 87 |         for op_key_rtr in optimization_variables[op_key_ts]:
 88 |             op_var = optimization_variables[op_key_ts][op_key_rtr]
 89 |             constraints.append(op_var >= 0)
 90 |     for rtr in z_vars:
 91 |         var = z_vars[rtr]
 92 |         constraints.append(var >= 0)
 93 |     return constraints
 94 | 
 95 | def get_peer_capacity_constraints(optimization_variables, ifspeeds):
 96 |     constraints = []
 97 |     for op_key_ts in optimization_variables:
 98 |         for op_key_rtr in optimization_variables[op_key_ts]:
 99 |             op_var = optimization_variables[op_key_ts][op_key_rtr]
100 |             intf_capacity = ifspeeds[op_key_rtr]
101 |             constraints.append(op_var <=  intf_capacity)
102 |     return constraints
103 | 
104 | def get_demand_completion_constraints(optimization_variables, flow_per_ts):
105 |     constraints = []
106 |     for op_key_ts in optimization_variables:
107 |         all_flows_at_ts = optimization_variables[op_key_ts].values()
108 |         constraints.append(sum(all_flows_at_ts) >= flow_per_ts[op_key_ts])
109 |     return constraints
110 | 
111 | def get_delta_constraints(delta_vars, router_set, k):
112 |     delta_vars_by_rtr = from_ts_to_rtr_key(delta_vars, router_set)
113 |     constraints = []
114 |     for rtr in delta_vars_by_rtr:
115 |         constraints.append(sum(delta_vars_by_rtr[rtr]) == k - 1)
116 |     return constraints
117 | 
118 | def get_z_constraints(optimization_variables, delta_vars, z_vars, M):
119 |     constraints = []
120 |     optimization_variables_by_rtr = {}
121 |     for ts in optimization_variables:
122 |         for rtr in optimization_variables[ts]:
123 |             if rtr not in optimization_variables_by_rtr:
124 |                 optimization_variables_by_rtr[rtr] = {ts: optimization_variables[ts][rtr]}
125 |             else:
126 |                 optimization_variables_by_rtr[rtr][ts] = optimization_variables[ts][rtr]
127 |                 
128 |     for rtr in z_vars:
129 |         z = z_vars[rtr]
130 |         for ts in optimization_variables_by_rtr[rtr]:
131 |             rtr_assignment_in_ts = optimization_variables_by_rtr[rtr][ts]
132 |             constraints.append(z >= (rtr_assignment_in_ts - delta_vars[ts][rtr]*M))
133 |     return constraints
134 |         
135 | def get_constraints(optimization_variables, flow_per_ts, delta_vars,
136 |                     z_vars, k, ifspeeds, router_set, M):
137 |     all_constraints = []
138 |     print "Get positivity constraints"
139 |     positivity_constraints = get_positivity_constraints(optimization_variables, z_vars)
140 |     all_constraints.extend(positivity_constraints)
141 |     print "Total %d positivity constraints" % len(positivity_constraints)
142 |     
143 |     print "Get delta constraints"
144 |     delta_constraints = get_delta_constraints(delta_vars, router_set, k)
145 |     all_constraints.extend(delta_constraints)
146 |     print "Total %d delta constraints" % len(delta_constraints)
147 |     
148 |     print "Get peer capacity constraints"
149 |     peer_capacity_constraints = get_peer_capacity_constraints(optimization_variables,
150 |                                                               ifspeeds)
151 |     all_constraints.extend(peer_capacity_constraints)
152 |     print "Total %d peer capacity constraints" % len(peer_capacity_constraints)
153 |     
154 |     print "Demand completion constraints"
155 |     demand_completion_constraints = get_demand_completion_constraints(
156 |         optimization_variables, flow_per_ts)
157 |     all_constraints.extend(demand_completion_constraints)
158 |     print "Total %d demand completion constraints" % len(demand_completion_constraints)
159 |     
160 |     print "Z constraints"
161 |     z_constraints = get_z_constraints(optimization_variables, delta_vars, z_vars, M)
162 |     all_constraints.extend(z_constraints)
163 |     print "Total %d z constraints" % len(z_constraints)
164 | 
165 |     print "Total constraints:", len(all_constraints)
166 |     return all_constraints
167 | 
168 | def solve_optimization_previous_work(flow_per_ts, demand_per_ts, router_set, total_cost,
169 |                                      time_window, cluster_no, num_billing_slots, k_val,
170 |                                      ifspeeds, m_val, combined_flow_file_name, **kwargs):
171 |     strategy = kwargs.get('strategy', 'fixed-m')
172 |     optimization_variables, rtr_to_var_vector = \
173 |                                 initialize_opti_vars_previous_work(flow_per_ts, router_set)
174 |     all_constraints = []
175 |     
176 |     print "Get positivity constraints"
177 |     for op_key_ts in optimization_variables:
178 |         for op_key_rtr in optimization_variables[op_key_ts]:
179 |             op_var = optimization_variables[op_key_ts][op_key_rtr]
180 |             all_constraints.append(op_var >= 0)
181 |             
182 |     print "Get peer capacity constraints"
183 |     peer_capacity_constraints = get_peer_capacity_constraints(optimization_variables,
184 |                                                               ifspeeds)
185 |     all_constraints.extend(peer_capacity_constraints)
186 |     
187 |     print "Demand completion constraints"
188 |     demand_completion_constraints = get_demand_completion_constraints(
189 |         optimization_variables, demand_per_ts)
190 |     all_constraints.extend(demand_completion_constraints)
191 | 
192 |     num_slots = round(len(flow_per_ts)*0.1)
193 |     print "Top 10% slots:", num_slots
194 |     bw_cost_egress = 0
195 |     for rtr in rtr_to_var_vector:
196 |         bw_cost_egress += sum_largest(rtr_to_var_vector[rtr], num_slots)/num_slots
197 |     
198 |     obj = Minimize(bw_cost_egress)
199 |     prob = Problem(obj, all_constraints)
200 |     start_time = time.time()
201 |     print "Solving.."
202 |     try:
203 |         prob.solve(solver=GUROBI, verbose=True)
204 |         runtime = prob.solver_stats.solve_time
205 |     except error.SolverError as e:
206 |         print e
207 |         print "Gurobi failed for time window", time_window
208 |         return 
209 | 
210 |     end_time = time.time()
211 |     print "Finished in %f seconds" % (end_time - start_time)
212 |     print "status:", prob.status
213 |     print "optimal value", prob.value
214 |     optimal_assignments = get_flow_assignments(optimization_variables)
215 |     per_router_optimized_assignments = from_ts_to_rtr_key(optimal_assignments, router_set)
216 |     cost_per_rtr_optimal = calculate_cost_of_traffic_assignment(
217 |         per_router_optimized_assignments, ifspeeds, num_billing_slots, k_val)
218 |     total_cost_op = sum(cost_per_rtr_optimal.values())
219 |     percent_saving = (total_cost - total_cost_op)*100/float(total_cost)
220 |     print "Optimal cost:", total_cost_op, "Pre-op cost", total_cost
221 |     print "Percent saving (%):", percent_saving
222 |     print "Per router optimized_cost is", cost_per_rtr_optimal
223 |     log_info_solver(combined_flow_file_name, time_window, total_cost,
224 |                     total_cost_op, percent_saving, 15,
225 |                     runtime, m_val, strategy)
226 |     save_pre_post_opt_assignments_cluster(time_window, flow_per_ts, router_set,
227 |                                           optimal_assignments, cluster_no,
228 |                                           combined_flow_file_name, k_val, m_val,
229 |                                           strategy)
230 | 
231 |     
232 | def solve_optimization(flows_per_ts, demand_per_ts, router_set,
233 |                        total_cost, time_window, cluster_no,
234 |                        num_billing_slots, k_val, ifspeeds, m_val, combined_flow_file_name,
235 |                        **kwargs):
236 |     strategy = kwargs.get('strategy', 'fixed-m')
237 |     if 'warm' in strategy:
238 |         assert False, "Non-native optimization implementation does not support warm-start"
239 |         
240 |     print "Getting ready to solve the optimization for time window", time_window
241 |     k = Parameter(sign="positive")
242 |     k.value = round(k_val*num_billing_slots/100)
243 |     M = Parameter(sign="positive")
244 |     M.value = m_val
245 | 
246 |     print "Get optimization variables"
247 |     optimization_variables, delta_vars, z_vars = \
248 |                         initialize_opti_vars(flows_per_ts, router_set)
249 |     print "Get constraints"
250 |     all_constraints = get_constraints(optimization_variables, demand_per_ts,
251 |                                       delta_vars, z_vars, k, ifspeeds, router_set, M)
252 | 
253 |     print "Get bandwidth cost"
254 |     bw_cost_egress = calculate_peer_bw_cost(z_vars)
255 |     all_constraints.append(bw_cost_egress <= total_cost)
256 |     obj = Minimize(bw_cost_egress)
257 |     prob = Problem(obj, all_constraints)
258 |     start_time = time.time()
259 |     mipgap = 0.10
260 |     itlimit = 500000
261 |     root_method = 3
262 |     print "Solving.."
263 |     try:
264 |         print "Solver properties:"
265 |         print "MIPGap:", mipgap
266 |         print "root relaxation method:", root_method
267 |         prob.solve(solver=GUROBI, verbose=True, PrePasses=3, MIPGap=mipgap,
268 |                    MIPFocus=1,
269 |                    #NodeMethod=0,
270 |                    Heuristics=0.4,
271 |                    ImproveStartTime=100,
272 |                    ImproveStartGap=0.3,
273 |                    TimeLimit=100000,
274 |                    Method=root_method, Cuts=3)
275 |         runtime = prob.solver_stats.solve_time
276 |     except error.SolverError as e:
277 |         print e
278 |         print "Gurobi failed for time window", time_window
279 |         return 
280 | 
281 |     end_time = time.time()
282 |     print "Finished in %f seconds" % (end_time - start_time)
283 |     print "status:", prob.status
284 |     print "optimal value", prob.value
285 |     
286 |     optimal_assignments = get_flow_assignments(optimization_variables)
287 |     per_router_optimized_assignments = from_ts_to_rtr_key(optimal_assignments, router_set)
288 |     cost_per_rtr_optimal = calculate_cost_of_traffic_assignment(
289 |         per_router_optimized_assignments, ifspeeds, num_billing_slots, k_val)
290 |     total_cost_op = sum(cost_per_rtr_optimal.values())
291 |     percent_saving = (total_cost - total_cost_op)*100/float(total_cost)
292 |     print "Optimal cost:", total_cost_op, "Pre-op cost", total_cost
293 |     print "Percent saving:", percent_saving
294 |     print "Per router optimized_cost is", cost_per_rtr_optimal
295 |     log_info_solver(combined_flow_file_name, time_window, total_cost,
296 |                     total_cost_op, percent_saving, 15,
297 |                     runtime, m_val, strategy)
298 |     save_pre_post_opt_assignments_cluster(time_window, flows_per_ts, router_set,
299 |                                           optimal_assignments, cluster_no,
300 |                                           combined_flow_file_name, k_val, M.value, strategy)
301 |     
302 | def log_info_solver(combined_flow_file_name, time_window, total_cost,
303 |                     total_cost_op, percent_saving,
304 |                     mipgap, runtime, m_val, strategy):
305 |     with open(SOLVER_LOG_FILE, "a") as fi:
306 |         fi.write("%s,%s,%s,%d,%f,%f,%f,%f,%f\n" % (combined_flow_file_name, time_window, strategy,
307 |                                                    m_val, total_cost, total_cost_op, percent_saving,
308 |                                                    mipgap, runtime))
309 |         
310 | def calculate_peer_bw_cost(z_vars):
311 |     cost = 0
312 |     for rtr in z_vars:
313 |         rtr_cost = cost_by_rtr(rtr)
314 |         # print rtr_cost, rtr
315 |         cost += rtr_cost * z_vars[rtr]
316 |     return cost
317 | def calculate_cost_of_traffic_assignment(rtr_to_ts_assignment, ifspeeds,
318 |                                          num_billing_slots, k):
319 |     per_rtr_cost = {}
320 |     for rtr in rtr_to_ts_assignment:
321 |         traffic_on_rtr_for_billing_period = rtr_to_ts_assignment[rtr]
322 |         if len(traffic_on_rtr_for_billing_period) != num_billing_slots:
323 |             diff = num_billing_slots - len(traffic_on_rtr_for_billing_period)
324 |             assert diff > 0
325 |             print "Completing empty slots", diff, rtr
326 |             traffic_on_rtr_for_billing_period += [0] * diff
327 |             
328 |         rtr_capacity = ifspeeds[rtr]
329 |         rtr_cost = cost_by_rtr(rtr)
330 |         for per_ts_util in traffic_on_rtr_for_billing_period:
331 |             try:
332 |                 assert round(per_ts_util) <= round(rtr_capacity)
333 |             except:
334 |                 print rtr, per_ts_util, ifspeeds[rtr]
335 | 
336 |         per_rtr_cost[rtr] = rtr_cost * np.percentile(traffic_on_rtr_for_billing_period, 100-k)
337 | 
338 |     return per_rtr_cost
339 | 
340 | def save_pre_post_opt_assignments(month, non_opti_flows, opti_flows):
341 |     csv_lines = [["ts", "rtr", "flow", "type"]]
342 |     for ts in opti_flows:
343 |         for rtr in opti_flows[ts]:
344 |             csv_lines.append([ts, rtr, opti_flows[ts][rtr], "post-optimization"])
345 |             if rtr in non_opti_flows:
346 |                 csv_lines.append([ts, rtr, non_opti_flows[ts][rtr], "pre-optimization"])
347 |             else:
348 |                 csv_lines.append([ts, rtr, 0, "pre-optimization"])
349 |             
350 |     with open(TRAFFIC_ALLOCATIONS_GLOBAL_NW + "%s/%s_%d_%d.csv" %
351 |               (peer_friendly_name, month, k_val, m_val), "w") as fi:
352 |         writer = csv.writer(fi)
353 |         writer.writerows(csv_lines)
354 | 
355 | def sanity_check(flows_ts_rtr, ifspeeds, peer_billed=None):
356 |     for ts in flows_ts_rtr:
357 |         for rtr in flows_ts_rtr[ts]:
358 |             if peer_billed:
359 |                 rtr_key = '%s-%s' % (peer_billed, rtr)
360 |             else:
361 |                 rtr_key = rtr
362 |             if rtr_key in ifspeeds:
363 |                 ifsp = ifspeeds[rtr_key]
364 |                 vol = flows_ts_rtr[ts][rtr]
365 |                 if not vol <= ifsp:
366 |                     pdb.set_trace()
367 |             else:
368 |                 print "Didnt find capacity for", rtr
369 |                 continue
370 | 
371 | def save_pre_post_opt_assignments_cluster(time_window, non_opti_flows, router_set,
372 |                                           opti_flows, cluster_no,
373 |                                           combined_flow_file_name, k_val, m_val, strategy):
374 |     csv_lines = [["ts", "rtr", "flow", "type"]]
375 |     for ts in non_opti_flows:
376 |         for rtr in router_set:
377 |             if rtr not in non_opti_flows[ts]:
378 |                 csv_lines.append([ts, rtr, 0, "pre-optimization"])
379 |                 print "no flow pre opti", rtr
380 |             else:
381 |                 csv_lines.append([ts, rtr, non_opti_flows[ts][rtr], "pre-optimization"])
382 |             csv_lines.append([ts, rtr, opti_flows[ts][rtr], "post-optimization"])
383 |             
384 |     with open(TRAFFIC_ALLOCATIONS_CLUSTER_NW + "%s/%s_%d_%d_cluster%d_%s.csv" %
385 |             (combined_flow_file_name, time_window, k_val, m_val, cluster_no, strategy), "w") as fi:
386 |         writer = csv.writer(fi)
387 |         writer.writerows(csv_lines)
388 | 
389 | def write_current_allocations(combined_flow_file_name,
390 |                               input_clusters=None, num_clusters=None, overall=False):
391 |     print "Churning.."
392 |     df = pd.read_csv(CURRENT_TRAFFIC_ALLOCATIONS + "%s.csv" % combined_flow_file_name)
393 |     clusterwise_billed_peer_flow_volumes_egress = {}
394 |     router_set_by_cluster = {}
395 |     for index, row in df.iterrows():
396 |         if row[0] == 'ts': continue
397 |         ts = row.ts
398 |         rtr = row.rtr
399 |         if overall:
400 |             cluster = 9
401 |         elif input_clusters:
402 |             if rtr in input_clusters:
403 |                 cluster = input_clusters[rtr]
404 |             else:
405 |                 print rtr
406 |                 assert False, "RTR to cluster mapping not found", rtr
407 |         else:
408 |             cluster = row.cluster
409 | 
410 |         peer = row.peer
411 |         router_name = "%s-%s" % (peer, rtr)
412 |         if cluster not in router_set_by_cluster:
413 |             router_set_by_cluster[cluster] = []
414 |         if router_name not in router_set_by_cluster[cluster]:
415 |             router_set_by_cluster[cluster].append(router_name)
416 |         gb = row.flow
417 |         ts_dt = datetime.utcfromtimestamp(ts)
418 |         month = "%d-%s" % (ts_dt.month, ts_dt.year)
419 |         weeknumber = ts_dt.isocalendar()[1]
420 |         week_num = "%d-%s" % (weeknumber, ts_dt.year)
421 |         if month not in clusterwise_billed_peer_flow_volumes_egress:
422 |             clusterwise_billed_peer_flow_volumes_egress[month] = {}
423 |         if cluster not in clusterwise_billed_peer_flow_volumes_egress[month]:
424 |             clusterwise_billed_peer_flow_volumes_egress[month][cluster] = {}
425 | 
426 |         if ts not in clusterwise_billed_peer_flow_volumes_egress[month][cluster]:
427 |             clusterwise_billed_peer_flow_volumes_egress[month][cluster][ts] = {}
428 |         if router_name not in clusterwise_billed_peer_flow_volumes_egress[month][cluster][ts]:
429 |             clusterwise_billed_peer_flow_volumes_egress[month][cluster][ts][router_name] = gb 
430 |         else:
431 |             clusterwise_billed_peer_flow_volumes_egress[month][cluster][ts][router_name] += gb 
432 | 
433 | 
434 |     for month in clusterwise_billed_peer_flow_volumes_egress:
435 |         for cluster in clusterwise_billed_peer_flow_volumes_egress[month]:
436 |             print "month, cluster", month, cluster
437 |             pdb.set_trace()
438 |             if num_clusters:
439 |                 fname = MONTHLY_CURRENT_TRAFFIC_ALLOCATIONS + \
440 |                         "%s/num_clusters_%d/traffic_allocations_%s_cluster%s.json" % \
441 |                         (combined_flow_file_name, num_clusters, month, cluster)
442 |             else:
443 |                 fname = MONTHLY_CURRENT_TRAFFIC_ALLOCATIONS + \
444 |                         "%s/traffic_allocations_%s_cluster%s.json" % \
445 |                         (combined_flow_file_name, month, cluster)
446 |             print fname
447 |             with open(fname, "w") as fi:
448 |                 json.dump(clusterwise_billed_peer_flow_volumes_egress[month][cluster], fi)
449 | 
450 | def read_current_allocations(combined_flow_file_name, month, cluster, **kwargs):
451 |     num_clusters = kwargs.get('num_clusters', None)
452 |     if num_clusters:
453 |         fname = MONTHLY_CURRENT_TRAFFIC_ALLOCATIONS + \
454 |                 "%s/num_clusters_%d/traffic_allocations_%s_cluster%s.json" % \
455 |                 (combined_flow_file_name, num_clusters, month, cluster)
456 |     else:
457 |         fname = MONTHLY_CURRENT_TRAFFIC_ALLOCATIONS + \
458 |                 "%s/traffic_allocations_%s_cluster%s.json" % \
459 |                 (combined_flow_file_name, month, cluster)
460 |     data = None
461 |     if os.path.isfile(fname):
462 |         with open(fname) as fi:
463 |             data = json.load(fi)
464 |     else:
465 |         print "File name not found", fname
466 |     return data
467 | 
468 | def extract_weekly_allocations(monthly_flow_allocations):
469 |     weekly_allocations = {}
470 |     for ts in monthly_flow_allocations:
471 |         ts_dt = datetime.utcfromtimestamp(int(ts))
472 |         weeknumber = ts_dt.isocalendar()[1]
473 |         week_num = "%d-%s" % (weeknumber, ts_dt.year)
474 |         if week_num not in weekly_allocations:
475 |             weekly_allocations[week_num] = {}
476 | 
477 |         weekly_allocations[week_num][ts] = monthly_flow_allocations[ts]
478 |     return weekly_allocations
479 | 


--------------------------------------------------------------------------------
/api_opt_native.py:
--------------------------------------------------------------------------------
  1 | import radix
  2 | import calendar
  3 | from gurobipy import *
  4 | import time
  5 | import os
  6 | import json
  7 | from datetime import datetime
  8 | import pandas as pd
  9 | from consts import * # This has all the data file paths
 10 | import pdb
 11 | import csv
 12 | import numpy as np
 13 | 
 14 | def minimize_changes_in_allocation(optimization_variables, model, ifspeeds,
 15 |                                    num_billing_slots):
 16 |     tses_sorted = sorted(optimization_variables.keys())
 17 |     previous_allocations = {}
 18 |     sum_abs = {}
 19 |     for ts in tses_sorted:
 20 |         current_allocations = {}        
 21 |         for rtr in optimization_variables[ts]:
 22 |             current_allocations[rtr] = optimization_variables[ts][rtr]
 23 |         for rtr in current_allocations:
 24 |             if rtr not in sum_abs:
 25 |                 sum_abs[rtr] = 0
 26 |             if rtr in previous_allocations:
 27 |                 rtr_cost = cost_by_rtr(rtr)
 28 |                 rtr_capacity = ifspeeds[rtr]
 29 |                 y = model.addVar(lb=-rtr_capacity,
 30 |                                  ub=rtr_capacity, name="y_%s_%s" % (str(ts), rtr))
 31 |                 abs_y = model.addVar(lb=0,
 32 |                             ub=rtr_capacity, name = "abs_y_%s_%s" % (str(ts), rtr))
 33 |                 model.addConstr(y, GRB.EQUAL,
 34 |                                 current_allocations[rtr] - previous_allocations[rtr])
 35 |                 model.addGenConstrAbs(abs_y, y)
 36 |                 sum_abs[rtr] += abs_y * rtr_cost
 37 |                 
 38 |         previous_allocations = current_allocations
 39 | 
 40 |     for rtr in sum_abs:
 41 |         sum_abs[rtr] = sum_abs[rtr]/num_billing_slots
 42 | 
 43 |     return model, sum(sum_abs.values())
 44 | 
 45 | def minimize_changes_in_allocation_fixed(optimization_variables, model,
 46 |                                          ifspeeds, fraction_cap,
 47 |                                          num_billing_slots):
 48 |     sum_abs = 0
 49 |     for ts in optimization_variables:
 50 |         for rtr in optimization_variables[ts]:
 51 |             rtr_cost = cost_by_rtr(rtr)
 52 |             rtr_capacity = ifspeeds[rtr]
 53 |             y = model.addVar(lb=-rtr_capacity, ub=rtr_capacity, name="y_%s_%s" % (str(ts), rtr))
 54 |             abs_y = model.addVar(lb=0, ub=rtr_capacity, name = "abs_y_%s_%s" % (str(ts), rtr))
 55 |             model.addConstr(y, GRB.EQUAL,
 56 |                             optimization_variables[ts][rtr] - fraction_cap*rtr_capacity)
 57 |             model.addGenConstrAbs(abs_y, y)
 58 |             sum_abs += abs_y * rtr_capacity
 59 | 
 60 |     return model, sum_abs
 61 |             
 62 | def initialize_opti_vars(flow_values, router_set, model, ifspeeds, bound_by_cap = False,
 63 |                          scavenger=100):
 64 |     optimization_variables = {}
 65 |     delta_vars = {}
 66 |     for ts in flow_values:
 67 |         ts_dt = datetime.utcfromtimestamp(int(ts))
 68 |         hour = ts_dt.hour
 69 |         minute = ts_dt.minute
 70 |         day = ts_dt.day
 71 |         ts_str = "%d_%d_%d" % (hour, minute, day)
 72 |         if ts not in optimization_variables:
 73 |             optimization_variables[ts] = {}
 74 |             delta_vars[ts] = {}
 75 |         for rtr in router_set:
 76 |             if rtr in flow_values[ts]:
 77 |                 lb = (100 - scavenger) * flow_values[ts][rtr]/100.0
 78 |             else:
 79 |                 lb = 0
 80 |             optimization_variables[ts][rtr] = model.addVar(lb=lb, name="x_%s_%s" % (ts_str, rtr))
 81 |             delta_vars[ts][rtr] = model.addVar(vtype=GRB.BINARY, name="delta_%s_%s" % (ts_str, rtr))
 82 | 
 83 |     z_vars = {}
 84 |     for rtr in router_set:
 85 |         if bound_by_cap:
 86 |             lb_by_capacity_fraction = ifspeeds[rtr] * 0.1
 87 |         else:
 88 |             lb_by_capacity_fraction = 0
 89 |         z_vars[rtr] = model.addVar(lb=lb_by_capacity_fraction, name="z_%s" % rtr)
 90 |     return optimization_variables, delta_vars, z_vars, model
 91 | 
 92 | def assign_fraction_cap_start_variables(flow_values, router_set, new_model, ifspeeds,
 93 |                                         capacity_fraction):
 94 |     for ts in flow_values:
 95 |         total_demand = sum(flow_values[ts].values())
 96 |         ts_dt = datetime.utcfromtimestamp(int(ts))
 97 |         hour = ts_dt.hour
 98 |         minute = ts_dt.minute
 99 |         day = ts_dt.day
100 |         ts_str = "%d_%d_%d" % (hour, minute, day)
101 | 
102 |         for rtr in router_set:
103 |             rtr_capacity = ifspeeds[rtr]
104 |             var_new = new_model.getVarByName("x_%s_%s" % (ts_str, rtr))
105 |             var_new.start = rtr_capacity * capacity_fraction
106 | 
107 |     return new_model
108 | 
109 | def assign_warm_start_variables(flow_values, router_set, new_model, old_model):
110 |     for rtr in router_set:
111 |         var_old = old_model.getVarByName("z_%s" % rtr)
112 |         var_new = new_model.getVarByName("z_%s" % rtr)
113 |         if var_new and var_old:
114 |             var_new.start = var_old.x
115 |             
116 |     for ts in flow_values:
117 |         ts_dt = datetime.utcfromtimestamp(int(ts))
118 |         hour = ts_dt.hour
119 |         minute = ts_dt.minute
120 |         day = ts_dt.day
121 |         ts_str = "%d_%d_%d" % (hour, minute, day)
122 |         for rtr in router_set:
123 |             var_old = old_model.getVarByName("x_%s_%s" % (ts_str, rtr))
124 |             var_new = new_model.getVarByName("x_%s_%s" % (ts_str, rtr))
125 |             if var_old and var_new:
126 |                 var_new.start = var_old.x
127 | 
128 |     return new_model
129 |             
130 | def get_routers_from_assignments(flows_per_ts):
131 |     rtr_set = set()
132 |     for ts in flows_per_ts:
133 |         rtr_set.update(flows_per_ts[ts].keys())
134 |     return list(rtr_set)
135 | 
136 | def from_ts_to_rtr_key(ts_key, router_set):
137 |     rtr_key = {}
138 |     num_tses = len(ts_key)
139 |     for ts in ts_key:
140 |         for rtr in router_set:
141 |             if rtr not in rtr_key:
142 |                 rtr_key[rtr] = []
143 |             if rtr in ts_key[ts]:
144 |                 rtr_key[rtr].append(ts_key[ts][rtr])
145 |             else:
146 |                rtr_key[rtr].append(0)     
147 |     return rtr_key
148 | 
149 | def get_flow_assignments(optimization_variables):
150 |     optimal_assignments = {}
151 |     for ts in optimization_variables:
152 |         if ts not in optimal_assignments:
153 |             optimal_assignments[ts] = {}
154 |         for rtr in optimization_variables[ts]:
155 |             optimal_assignments[ts][rtr] = optimization_variables[ts][rtr].x
156 |     return optimal_assignments
157 | 
158 | 
159 | def get_positivity_constraints(optimization_variables, z_vars, model):
160 |     constraints = []
161 |     for op_key_ts in optimization_variables:
162 |         for op_key_rtr in optimization_variables[op_key_ts]:
163 |             op_var = optimization_variables[op_key_ts][op_key_rtr]
164 |             constraints.append(op_var >= 0)
165 |     for rtr in z_vars:
166 |         var = z_vars[rtr]
167 |         constraints.append(var >= 0)
168 |     return constraints, model
169 | 
170 | def get_peer_capacity_constraints(optimization_variables, ifspeeds, model):
171 |     for op_key_ts in optimization_variables:
172 |         for op_key_rtr in optimization_variables[op_key_ts]:
173 |             op_var = optimization_variables[op_key_ts][op_key_rtr]
174 |             intf_capacity = ifspeeds[op_key_rtr]
175 |             model.addConstr(op_var <=  intf_capacity)
176 |     return model
177 | 
178 | def get_demand_completion_constraints(optimization_variables, flow_per_ts, model):
179 |     for op_key_ts in optimization_variables:
180 |         all_flows_at_ts = optimization_variables[op_key_ts].values()
181 |         model.addConstr(sum(all_flows_at_ts) >= flow_per_ts[op_key_ts])
182 |     return model
183 |     
184 | def get_delta_constraints(delta_vars, router_set, k, model):
185 |     delta_vars_by_rtr = from_ts_to_rtr_key(delta_vars, router_set)
186 |     for rtr in delta_vars_by_rtr:
187 |         model.addConstr(sum(delta_vars_by_rtr[rtr]) == k - 1)
188 |     return model
189 | 
190 | def get_z_constraints(optimization_variables, delta_vars, z_vars, M, model,
191 |                       ifspeeds, strategy="fixed-m"):
192 |     optimization_variables_by_rtr = {}
193 |     for ts in optimization_variables:
194 |         for rtr in optimization_variables[ts]:
195 |             if rtr not in optimization_variables_by_rtr:
196 |                 optimization_variables_by_rtr[rtr] = {ts: optimization_variables[ts][rtr]}
197 |             else:
198 |                 optimization_variables_by_rtr[rtr][ts] = optimization_variables[ts][rtr]
199 |                 
200 |     for rtr in z_vars:
201 |         z = z_vars[rtr]
202 |         for ts in optimization_variables_by_rtr[rtr]:
203 |             rtr_assignment_in_ts = optimization_variables_by_rtr[rtr][ts]
204 |             if strategy == 'link-capacity':
205 |                 M = ifspeeds[rtr]
206 |             else:
207 |                 assert M
208 |             model.addConstr(z >= (rtr_assignment_in_ts - delta_vars[ts][rtr]*M))
209 |     return model
210 | 
211 | def log_info_solver(combined_flow_file_name, cluster, time_window, total_cost,
212 |                     total_cost_op, percent_saving,
213 |                     mipgap, runtime, m_val, strategy):
214 |     with open(SOLVER_LOG_FILE, "a") as fi:
215 |         fi.write("%s,%s,%s,%d,%f,%f,%f,%f,%f\n" % (combined_flow_file_name, time_window,
216 |                                                    strategy + "-%d" % cluster,
217 |                                                    m_val, total_cost, total_cost_op, percent_saving,
218 |                                                    mipgap, runtime))
219 |         
220 | def get_constraints(optimization_variables, flow_per_ts, delta_vars,
221 |                     z_vars, k, ifspeeds, router_set, m_val, model, strategy="fixed-m"):
222 |     all_constraints = []
223 |     print "Get delta constraints"
224 |     model = get_delta_constraints(delta_vars, router_set, k, model)
225 |     
226 |     print "Get peer capacity constraints"
227 |     model = get_peer_capacity_constraints(optimization_variables, ifspeeds, model)
228 |     
229 |     model = get_z_constraints(optimization_variables, delta_vars, z_vars, m_val,
230 |                               model, ifspeeds, strategy)
231 | 
232 |     return model
233 | 
234 | def solve_optimization(flows_per_ts, demand_per_ts, router_set,
235 |                        total_cost, time_window, cluster_no,
236 |                        num_billing_slots, k_val, ifspeeds, m_val, combined_flow_file_name,
237 |                        **kwargs):
238 |     '''
239 |     flows_per_ts has the format {ts1: {rtr1: flow1, rtr2: flow2 ..}, ts2: ..}
240 |     '''
241 |     previous_model = kwargs.get('previous_model', None)
242 |     strategy = kwargs.get('strategy', 'fixed-m')
243 |     if 'bound' in strategy:
244 |         print "Link costs bounded by 10% of link capacities"
245 |         bound_by_cap = True
246 |     else:
247 |         bound_by_cap = False
248 | 
249 |     if 'scavenger' in strategy:
250 |         scav_frac = kwargs.get('scav_frac', 40)
251 |         print "Scavenger traffic volume:", scav_frac
252 |     else:
253 |         scav_frac = 100 
254 | 
255 |     timelimit = kwargs.get("timelimit", 18000)
256 |     print "Getting ready to solve the optimization for time window", time_window
257 |     model = Model("mip")
258 |     k = round(k_val*num_billing_slots/100)
259 |     
260 |     print "Get optimization variables"
261 |     optimization_variables, delta_vars, z_vars, model = \
262 |                         initialize_opti_vars(flows_per_ts, router_set, model, ifspeeds,
263 |                                              bound_by_cap=bound_by_cap, scavenger = scav_frac)
264 |     model.update()
265 |     print "Get constraints"
266 |     model = get_constraints(optimization_variables, demand_per_ts,
267 |                             delta_vars, z_vars, k, ifspeeds,
268 |                             router_set, m_val, model, strategy)
269 |     print "Get bandwidth cost"
270 |     bw_cost_egress = calculate_peer_bw_cost(z_vars)    
271 |     # model, change_in_allocs = \
272 |     #                minimize_changes_in_allocation(optimization_variables, model, ifspeeds,
273 |     #                                               num_billing_slots)
274 | 
275 |     # model, change_in_allocs = \
276 |     #             minimize_changes_in_allocation_fixed(optimization_variables, model, ifspeeds, 0.1,
277 |     #                                                  num_billing_slots)
278 | 
279 |     # epsilon = 0.1
280 |     # print "Epsilon:", epsilon
281 |     # objective = bw_cost_egress + (epsilon * change_in_allocs)
282 |     
283 |     objective = bw_cost_egress
284 |     model.addConstr(bw_cost_egress <= total_cost)
285 |     model.setObjective(objective)
286 |     
287 |     model.write(MODEL_SAVE + "%s_%s.lp" % (time_window, strategy))
288 |     start_time = time.time()
289 |     model.setParam("mipgap", 0.15)
290 |     # model.setParam("GomoryPasses", 15)
291 |     model.setParam("IterationLimit", 5000000)
292 |     model.setParam("heuristics", 0.5)
293 |     model.setParam("timelimit", timelimit)
294 |     model.setParam("improvestarttime", 100)
295 |     model.setParam("improvestartgap", 0.3)
296 |     #model.setParam("Sifting", 2)
297 |     model.setParam("Method", 3)
298 |     #model.setParam("Crossover", 4)
299 | 
300 |     # Warm start from previous allocations
301 |     if "warm" in strategy and previous_model:
302 |         print "Assigning variable values from previous optimal assignment: warm start"
303 |         model = assign_warm_start_variables(flows_per_ts, router_set, model, previous_model)
304 |     elif "frac_cap" in strategy:
305 |         print "Setting init allocations to be fraction of link capacities"
306 |         model = assign_fraction_cap_start_variables(flows_per_ts, router_set, model, ifspeeds,
307 |                                                     0.2)
308 |     model.update()    
309 |     print "Solving.."
310 |     try:
311 |         print "Solver properties:"
312 |         model.optimize()
313 |         runtime = model.Runtime
314 |         mipgap = model.MIPGap * 100 # Percent further from the LP optimal
315 |     except error.SolverError as e:
316 |         print e
317 |         print "Gurobi failed for time window", time_window
318 |         return 
319 | 
320 |     end_time = time.time()
321 | 
322 |     print "Finished in %f seconds" % (end_time - start_time)
323 | 
324 |     if model.SolCount == 0:
325 |         print "Solver failed, no solution."
326 |         return None
327 |     
328 |     optimal_assignments = get_flow_assignments(optimization_variables)
329 |     per_router_optimized_assignments = from_ts_to_rtr_key(optimal_assignments, router_set)
330 |     cost_per_rtr_optimal = calculate_cost_of_traffic_assignment(
331 |         per_router_optimized_assignments, ifspeeds, num_billing_slots, k_val)
332 |     total_cost_op = sum(cost_per_rtr_optimal.values())
333 |     percent_saving = (total_cost - total_cost_op)*100/float(total_cost)
334 |     print "Optimal cost:", total_cost_op, "Pre-op cost", total_cost
335 |     print "Percent saving:", percent_saving
336 |     print "Per router optimized_cost is", cost_per_rtr_optimal
337 |     log_info_solver(combined_flow_file_name,cluster_no, time_window, total_cost,
338 |                     total_cost_op, percent_saving, mipgap,
339 |                     runtime, m_val, strategy)
340 |     save_pre_post_opt_assignments_cluster(time_window, flows_per_ts, router_set,
341 |                                           optimal_assignments, cluster_no,
342 |                                           combined_flow_file_name, k_val, m_val, strategy)
343 |     return model
344 | 
345 | def calculate_peer_bw_cost(z_vars):
346 |     cost = 0
347 |     for rtr in z_vars:
348 |         rtr_cost = cost_by_rtr(rtr)
349 |         cost += rtr_cost * z_vars[rtr]
350 |     return cost
351 |         
352 | def cost_by_rtr(rtr):
353 |     print "Enter your peering rates here"
354 | 
355 | 
356 | def calculate_cost_of_traffic_assignment(rtr_to_ts_assignment, ifspeeds,
357 |                                          num_billing_slots, k):
358 |     per_rtr_cost = {}
359 |     for rtr in rtr_to_ts_assignment:
360 |         traffic_on_rtr_for_billing_period = rtr_to_ts_assignment[rtr]
361 |         if len(traffic_on_rtr_for_billing_period) != num_billing_slots:
362 |             diff = num_billing_slots - len(traffic_on_rtr_for_billing_period)
363 |             assert diff > 0
364 |             print "Completing empty slots", diff, rtr
365 |             traffic_on_rtr_for_billing_period += [0] * diff
366 |             
367 |         rtr_capacity = ifspeeds[rtr]
368 |         rtr_cost = cost_by_rtr(rtr)
369 |         for per_ts_util in traffic_on_rtr_for_billing_period:
370 |             try:
371 |                 assert round(per_ts_util) <= round(rtr_capacity)
372 |             except:
373 |                 pdb.set_trace()
374 |                 print rtr, per_ts_util, ifspeeds[rtr]
375 | 
376 |         per_rtr_cost[rtr] = rtr_cost * np.percentile(traffic_on_rtr_for_billing_period, 100-k)
377 | 
378 |     return per_rtr_cost
379 | 
380 | def save_pre_post_opt_assignments(month, non_opti_flows, opti_flows):
381 |     csv_lines = [["ts", "rtr", "flow", "type"]]
382 |     for ts in opti_flows:
383 |         for rtr in opti_flows[ts]:
384 |             csv_lines.append([ts, rtr, opti_flows[ts][rtr], "post-optimization"])
385 |             if rtr in non_opti_flows:
386 |                 csv_lines.append([ts, rtr, non_opti_flows[ts][rtr], "pre-optimization"])
387 |             else:
388 |                 csv_lines.append([ts, rtr, 0, "pre-optimization"])
389 |             
390 |     with open(TRAFFIC_ALLOCATIONS_GLOBAL_NW + "%s/%s_%d_%d.csv" %
391 |               (peer_friendly_name, month, k_val, m_val), "w") as fi:
392 |         writer = csv.writer(fi)
393 |         writer.writerows(csv_lines)
394 | 
395 | def sanity_check(flows_ts_rtr, ifspeeds, peer_billed=None):
396 |     for ts in flows_ts_rtr:
397 |         for rtr in flows_ts_rtr[ts]:
398 |             if peer_billed:
399 |                 rtr_key = '%s-%s' % (peer_billed, rtr)
400 |             else:
401 |                 rtr_key = rtr
402 |             if rtr_key in ifspeeds:
403 |                 ifsp = ifspeeds[rtr_key]
404 |                 vol = flows_ts_rtr[ts][rtr]
405 |                 if not vol <= ifsp:
406 |                     pdb.set_trace()
407 |             else:
408 |                 print "Didnt find capacity for", rtr
409 |                 continue
410 | 
411 | def save_pre_post_opt_assignments_cluster(time_window, non_opti_flows, router_set,
412 |                                           opti_flows, cluster_no,
413 |                                           combined_flow_file_name, k_val, m_val, strategy):
414 |     csv_lines = [["ts", "rtr", "flow", "type"]]
415 |     for ts in non_opti_flows:
416 |         for rtr in router_set:
417 |             if rtr not in non_opti_flows[ts]:
418 |                 csv_lines.append([ts, rtr, 0, "pre-optimization"])
419 |                 print "no flow pre opti", rtr
420 |             else:
421 |                 csv_lines.append([ts, rtr, non_opti_flows[ts][rtr], "pre-optimization"])
422 |             csv_lines.append([ts, rtr, opti_flows[ts][rtr], "post-optimization"])
423 |             
424 |     with open(TRAFFIC_ALLOCATIONS_CLUSTER_NW + "%s/%s_%d_%d_cluster%d_%s.csv" %
425 |               (combined_flow_file_name, time_window, k_val, m_val, cluster_no, strategy), "w") as fi:
426 |         writer = csv.writer(fi)
427 |         writer.writerows(csv_lines)
428 | 
429 | def parse_raw_ipfix_to_combined_file(IPFIX_DUMP, combined_file_name, month_number_global=None,
430 |                                      cluster_map=None):
431 |     def get_cluster(link):
432 |         if not cluster_map:
433 |             return 9
434 |         if link in cluster_map:
435 |             return cluster_map[link]
436 | 
437 |     print "Enter your peer subset here"
438 |     peer_subset = []
439 |     fd = open(CURRENT_TRAFFIC_ALLOCATIONS +  combined_file_name + ".csv", "w")
440 |     fd.write("ts,rtr,peer,cluster,flow\n")
441 |     with open(IPFIX_DUMP) as fi:
442 |         reader = csv.reader(fi)
443 |         for row in reader:
444 |             tsstr = row[0]
445 |             try:
446 |                 dt_ts = datetime.strptime(tsstr, '%m/%d/%Y %I:%M:%S %p')
447 |             except:
448 |                 pdb.set_trace()
449 |             if month_number_global and dt_ts.month != month_number_global:
450 |                 continue
451 |             ts_int = calendar.timegm(dt_ts.timetuple())
452 |             rtr = row[1]
453 |             peerlink = row[2]
454 |             if peerlink not in peer_subset: continue
455 |             direction = row[3]
456 |             if direction != "OUTBOUND": continue
457 |             cluster = get_cluster(peerlink)
458 |             bytes = int(row[4])
459 |             mb = bytes * 8/(1024*1024.0)
460 |             fd.write("%s,%s,%s,%s,%f\n" % (ts_int, rtr, peerlink, cluster, mb))
461 |     fd.close()
462 |     
463 | def parse_metro_pfx_mapping(fname):
464 |     rtree = radix.Radix()
465 |     pfx_to_metro = {}
466 |     with open(fname) as fi:
467 |         reader = csv.reader(fi)
468 |         for row in reader:
469 |             if row[0] == 'prefix': continue
470 |             pfx = row[0]
471 |             metro = row[1]
472 |             try:
473 |                 rnode = rtree.add(pfx)
474 |             except:
475 |                 pdb.set_trace()
476 |             rnode.data["metro"] = metro
477 |             if pfx not in pfx_to_metro:
478 |                 pfx_to_metro[pfx] = metro
479 |             else:
480 |                 assert False, pfx_to_metro[pfx]
481 |     return pfx_to_metro, rtree
482 |                 
483 | def parse_raw_ipfix_to_combined_file_clientmetro(IPFIX_DUMP_CLIENT_PFX, combined_file_name,
484 |                                                  month_number_global=None,
485 |                                                  cluster_map=None):
486 |     pfx_to_metro, rtree = parse_metro_pfx_mapping(METRO_PFX_MAP)
487 |     rtr_to_peer = map_rtr_ip_to_peerlink()
488 |     def get_metro(pfx):
489 |         if pfx in pfx_to_metro:
490 |             return [pfx_to_metro[pfx]]
491 |         rnodes = rtree.search_covered(pfx)
492 |         if not rnodes: return ["Unknown"]
493 |         return list(set([x.data["metro"] for x in rnodes]))
494 | 
495 |     print "Enter your peer subset here"
496 |     peer_subset = []
497 |     nometro = set()
498 |     peerlink_to_client_metro = {}
499 |     with open(CURRENT_TRAFFIC_ALLOCATIONS + IPFIX_DUMP_CLIENT_PFX) as fi:
500 |         reader = csv.reader(fi)
501 |         for row in reader:
502 |             tsstr = row[0]
503 |             try:
504 |                 dt_ts = datetime.strptime(tsstr, '%m/%d/%Y %I:%M:%S %p')
505 |             except:
506 |                 pdb.set_trace()
507 |             if month_number_global and dt_ts.month != month_number_global:
508 |                 continue
509 |             ts_int = calendar.timegm(dt_ts.timetuple())
510 |             rtr = row[1]
511 |             rtrip = row[2]
512 |             ifindex = row[3]
513 |             if (rtr, rtrip, ifindex) not in rtr_to_peer: continue
514 |             peerasn = rtr_to_peer[(rtr, rtrip, ifindex)]
515 |             asn = "AS%d" % peerasn
516 |             asname = "ENTER AS MAPPINGS HERE"
517 |             peerlink = "%s-%s" % (asn, asname)
518 |             assert peerlink in peer_subset
519 |             peerlink = "%s-%s" % (peerlink,  rtr)
520 |             
521 |             if peerlink not in peerlink_to_client_metro:
522 |                 peerlink_to_client_metro[peerlink] = {}
523 | 
524 |             if ts_int not in peerlink_to_client_metro[peerlink]:
525 |                 peerlink_to_client_metro[peerlink][ts_int] = {}
526 |             
527 |             dstpfx = row[4]
528 |             dstasn = row[5]
529 |             metrolist = get_metro(dstpfx)
530 |             num_metros = len(metrolist)
531 |             for metro in metrolist:
532 |                 client_metro_asn = "%s-%s" % (metro, dstasn)
533 |                 mb = float(row[6])
534 |                 mb = mb / 1024.0 # Getting it in Gbs
535 |                 mb = mb/num_metros
536 |                 if client_metro_asn not in peerlink_to_client_metro[peerlink][ts_int]:
537 |                     peerlink_to_client_metro[peerlink][ts_int][client_metro_asn] = 0
538 |                 peerlink_to_client_metro[peerlink][ts_int][client_metro_asn] += mb
539 | 
540 |     with open(CURRENT_TRAFFIC_ALLOCATIONS + combined_file_name + ".json", "w") as fi:
541 |         json.dump(peerlink_to_client_metro, fi)    
542 | 
543 | def client_to_pop_stickiness(combined_file_name):
544 |     '''
545 |     Based on IPFIX data, this function maps which clients (metro + ASN)
546 |     were served content from which set of peer links (RTR + PEER).
547 |     '''
548 |     with open(CURRENT_TRAFFIC_ALLOCATIONS + combined_file_name + ".json") as fi:
549 |         peerlink_to_client_metro = json.load(fi)
550 | 
551 |     client_to_links = {}
552 |     for peerlink in peerlink_to_client_metro:
553 |         for ts in peerlink_to_client_metro[peerlink]:
554 |             for metroasn in peerlink_to_client_metro[peerlink][ts]:
555 |                 if metroasn not in client_to_links:
556 |                     client_to_links[metroasn] = []
557 |                 if peerlink not in client_to_links[metroasn]:
558 |                     client_to_links[metroasn].append(peerlink)
559 | 
560 |     return client_to_links
561 | 
562 | def write_current_allocations_by_cluster(combined_flow_file_name, final_dir,
563 |                                          peer_to_cluster_map, overall=False):
564 |     df = pd.read_csv(CURRENT_TRAFFIC_ALLOCATIONS + "%s.csv" % combined_flow_file_name)
565 |     clusterwise_billed_peer_flow_volumes_egress = {}
566 |     router_set_by_cluster = {}
567 |     for index, row in df.iterrows():
568 |         if row[0] == 'ts': continue
569 |         ts = row.ts
570 |         rtr = row.rtr
571 |         peer = row.peer
572 |         # peerasn = row.peer.split('-')[0].split('AS')[-1]
573 |         peerlink = "%s-%s" % (rtr, peer)
574 |         if overall:
575 |             cluster = 9
576 |         elif peer_to_cluster_map:
577 |             if peerlink in peer_to_cluster_map:
578 |                 cluster = peer_to_cluster_map[peerlink]
579 |             else:
580 |                 continue
581 |         else:
582 |             cluster = row.cluster
583 | 
584 |         peer = row.peer
585 |         router_name = "%s-%s" % (peer, rtr)
586 |         if cluster not in router_set_by_cluster:
587 |             router_set_by_cluster[cluster] = []
588 |         if router_name not in router_set_by_cluster[cluster]:
589 |             router_set_by_cluster[cluster].append(router_name)
590 |         gb = row.flow/1024.0
591 |         ts_dt = datetime.utcfromtimestamp(ts)
592 |         month = "%d-%s" % (ts_dt.month, ts_dt.year)
593 |         weeknumber = ts_dt.isocalendar()[1]
594 |         week_num = "%d-%s" % (weeknumber, ts_dt.year)
595 |         if month not in clusterwise_billed_peer_flow_volumes_egress:
596 |             clusterwise_billed_peer_flow_volumes_egress[month] = {}
597 |         if cluster not in clusterwise_billed_peer_flow_volumes_egress[month]:
598 |             clusterwise_billed_peer_flow_volumes_egress[month][cluster] = {}
599 | 
600 |         if ts not in clusterwise_billed_peer_flow_volumes_egress[month][cluster]:
601 |             clusterwise_billed_peer_flow_volumes_egress[month][cluster][ts] = {}
602 |         if router_name not in clusterwise_billed_peer_flow_volumes_egress[month][cluster][ts]:
603 |             clusterwise_billed_peer_flow_volumes_egress[month][cluster][ts][router_name] = gb 
604 |         else:
605 |             clusterwise_billed_peer_flow_volumes_egress[month][cluster][ts][router_name] += gb 
606 | 
607 | 
608 |     for month in clusterwise_billed_peer_flow_volumes_egress:
609 |         for cluster in clusterwise_billed_peer_flow_volumes_egress[month]:
610 |             print "month, cluster", month, cluster
611 |             with open(MONTHLY_CURRENT_TRAFFIC_ALLOCATIONS_PERF + \
612 |                       "%s/traffic_allocations_%s_cluster%s.json" %
613 |                       (final_dir, month, cluster), "w") as fi:
614 |                 json.dump(clusterwise_billed_peer_flow_volumes_egress[month][cluster], fi)
615 | 
616 | def write_current_allocations(combined_flow_file_name,
617 |                               cluster_fname=None, overall=False):
618 |     df = pd.read_csv(CURRENT_TRAFFIC_ALLOCATIONS + "%s.csv" % combined_flow_file_name)
619 |     peer_to_cluster_map = None
620 |         
621 |     clusterwise_billed_peer_flow_volumes_egress = {}
622 |     router_set_by_cluster = {}
623 |     for index, row in df.iterrows():
624 |         if row[0] == 'ts': continue
625 |         ts = row.ts
626 |         rtr = row.rtr
627 |         peerasn = row.peer.split('-')[0].split('AS')[-1]
628 |         peerlink = "%s-%s" % (rtr, peerasn)
629 |         if overall:
630 |             cluster = 9
631 |         elif peer_to_cluster_map:
632 |             if peerlink in peer_to_cluster_map:
633 |                 cluster = peer_to_cluster_map[peerlink]
634 |             else:
635 |                 continue
636 |         else:
637 |             cluster = row.cluster
638 | 
639 |         peer = row.peer
640 |         router_name = "%s-%s" % (peer, rtr)
641 |         if cluster not in router_set_by_cluster:
642 |             router_set_by_cluster[cluster] = []
643 |         if router_name not in router_set_by_cluster[cluster]:
644 |             router_set_by_cluster[cluster].append(router_name)
645 |         gb = row.flow
646 |         ts_dt = datetime.utcfromtimestamp(ts)
647 |         month = "%d-%s" % (ts_dt.month, ts_dt.year)
648 |         weeknumber = ts_dt.isocalendar()[1]
649 |         week_num = "%d-%s" % (weeknumber, ts_dt.year)
650 |         if month not in clusterwise_billed_peer_flow_volumes_egress:
651 |             clusterwise_billed_peer_flow_volumes_egress[month] = {}
652 |         if cluster not in clusterwise_billed_peer_flow_volumes_egress[month]:
653 |             clusterwise_billed_peer_flow_volumes_egress[month][cluster] = {}
654 | 
655 |         if ts not in clusterwise_billed_peer_flow_volumes_egress[month][cluster]:
656 |             clusterwise_billed_peer_flow_volumes_egress[month][cluster][ts] = {}
657 |         if router_name not in clusterwise_billed_peer_flow_volumes_egress[month][cluster][ts]:
658 |             clusterwise_billed_peer_flow_volumes_egress[month][cluster][ts][router_name] = gb 
659 |         else:
660 |             clusterwise_billed_peer_flow_volumes_egress[month][cluster][ts][router_name] += gb 
661 | 
662 | 
663 |     for month in clusterwise_billed_peer_flow_volumes_egress:
664 |         for cluster in clusterwise_billed_peer_flow_volumes_egress[month]:
665 |             print "month, cluster", month, cluster
666 |             with open(MONTHLY_CURRENT_TRAFFIC_ALLOCATIONS_PERF + \
667 |                       "%s/traffic_allocations_%s_cluster%s.json" %
668 |                       (combined_flow_file_name, month, cluster), "w") as fi:
669 |                 json.dump(clusterwise_billed_peer_flow_volumes_egress[month][cluster], fi)
670 | 
671 | def map_rtr_ip_to_peerlink():
672 |     rtr_to_peer = {}
673 |     with open(CURRENT_TRAFFIC_ALLOCATIONS + RTR_IP_TO_PEER_MAP) as fi:
674 |         reader = csv.reader(fi)
675 |         for row in reader:
676 |             if row[1] == "DeviceIp": continue
677 |             rtr = row[0]
678 |             ip = row[1]
679 |             ifint = row[2]
680 |             try:
681 |                 asn = int(row[3])
682 |             except: pdb.set_trace()
683 |             if (rtr, ip, ifint) in rtr_to_peer:
684 |                 pdb.set_trace()
685 |             rtr_to_peer[(rtr, ip, ifint)] = asn                    
686 |     return rtr_to_peer
687 | 
688 | def read_current_allocations(combined_flow_file_name, month, cluster, strategy, test=False):
689 |     fname = MONTHLY_CURRENT_TRAFFIC_ALLOCATIONS_PERF + \
690 |             "%s/traffic_allocations_%s_cluster%s.json" % \
691 |             (combined_flow_file_name, month, cluster)
692 |     data = None
693 |     if os.path.isfile(fname):
694 |         with open(fname) as fi:
695 |             data = json.load(fi)
696 |     return data
697 | 
698 | def extract_weekly_allocations(monthly_flow_allocations):
699 |     weekly_allocations = {}
700 |     for ts in monthly_flow_allocations:
701 |         ts_dt = datetime.utcfromtimestamp(int(ts))
702 |         weeknumber = ts_dt.isocalendar()[1]
703 |         week_num = "%d-%s" % (weeknumber, ts_dt.year)
704 |         if week_num not in weekly_allocations:
705 |             weekly_allocations[week_num] = {}
706 | 
707 |         weekly_allocations[week_num][ts] = monthly_flow_allocations[ts]
708 |     return weekly_allocations
709 | 


--------------------------------------------------------------------------------
/heuristic_api.py:
--------------------------------------------------------------------------------
   1 | import timeit, functools
   2 | from peer_to_links_map import *
   3 | import os
   4 | import sys
   5 | sys.path.append(os.path.abspath('..'))
   6 | from  api_opt_native import *
   7 | import random
   8 | import json
   9 | from heapq import *
  10 | import math
  11 | from datetime import datetime
  12 | import glob
  13 | import numpy as np
  14 | import pdb
  15 | import csv
  16 | import sys
  17 | from consts import *
  18 | 
  19 | combined_flow_file_name = None
  20 | def read_allocation(month, cluster_no):
  21 |     rtr_to_cluster = {}
  22 |     cluster_to_rtr = {}
  23 |     if cluster_no == 9:
  24 |         fnames = glob.glob(TRAFFIC_ALLOCATIONS_CLUSTER_NW +
  25 |                            "%s/%s_%d_%d_cluster%s_warm-fixed-m.csv" %
  26 |                            (combined_flow_file_name, month, 5, 10000, cluster_no))
  27 |     else:
  28 |         fnames = glob.glob(TRAFFIC_ALLOCATIONS_CLUSTER_NW + 
  29 |                            "%s/%s_%d_%d_cluster%s_warm-fixed-m-perf.csv" %
  30 |                            (combined_flow_file_name, month, 5, 10000, cluster_no))
  31 |     if not fnames: return
  32 |     fname = fnames[0]
  33 |     pre_op = {}
  34 |     post_op = {}
  35 |     pre_op_by_rtr = {}
  36 |     post_op_by_rtr = {}
  37 |     set_tses = set()        
  38 |     link_set = set()
  39 |     with open(fname) as fi:
  40 |         reader = csv.reader(fi)
  41 |         for row in reader:
  42 |             if row[0] == 'ts': continue
  43 |             ts = int(row[0])
  44 |             set_tses.add(ts)
  45 |             if ts not in pre_op:
  46 |                 pre_op[ts] = {}
  47 |             if ts not in post_op:
  48 |                 post_op[ts] = {}
  49 | 
  50 |             link = row[1]
  51 |                     
  52 |             if link not in pre_op_by_rtr:
  53 |                 pre_op_by_rtr[link] = {}
  54 | 
  55 |             if link not in post_op_by_rtr:
  56 |                 post_op_by_rtr[link] = {}
  57 |                     
  58 |             link_set.add(link)
  59 |             rtr_to_cluster[link] = cluster_no
  60 |             value = float(row[2])
  61 |             
  62 |             try:
  63 |                 if row[-1] == 'pre-optimization':
  64 |                     assert ts not in pre_op_by_rtr[link]
  65 |                     assert link not in pre_op[ts]
  66 |                     pre_op_by_rtr[link][ts] = value
  67 |                     pre_op[ts][link] = value
  68 |                 elif row[-1] == 'post-optimization':
  69 |                     assert link not in post_op[ts]
  70 |                     assert ts not in post_op_by_rtr[link]
  71 |                     post_op[ts][link] = value
  72 |                     post_op_by_rtr[link][ts] = value
  73 |             except AssertionError:
  74 |                 pdb.set_trace()
  75 | 
  76 |         cluster_to_rtr[cluster_no] = list(link_set)
  77 |         print "Cluster number:", cluster_no, "has %d links" % len(link_set)
  78 |         
  79 |     post_op_by_rtr_list = {}
  80 |     pre_op_by_rtr_list = {}
  81 |     for rtr in post_op_by_rtr:
  82 |         post_op_by_rtr_list[rtr] = post_op_by_rtr[rtr].values()
  83 |     for rtr in pre_op_by_rtr:
  84 |         pre_op_by_rtr_list[rtr] = pre_op_by_rtr[rtr].values()
  85 |             
  86 |     return pre_op, post_op, pre_op_by_rtr_list, post_op_by_rtr_list, link_set
  87 | 
  88 | def calculate_cost_of_traffic_assignment(rtr_to_ts_assignment, ifspeeds,
  89 |                                          num_billing_slots, k):
  90 |     per_rtr_cost = {}
  91 |     for rtr in rtr_to_ts_assignment:
  92 |         traffic_on_rtr_for_billing_period = rtr_to_ts_assignment[rtr]
  93 |         if len(traffic_on_rtr_for_billing_period) != num_billing_slots:
  94 |             diff = num_billing_slots - len(traffic_on_rtr_for_billing_period)
  95 |             assert diff > 0
  96 |             print "Completing empty slots", diff, rtr
  97 |             traffic_on_rtr_for_billing_period += [0] * diff
  98 |             
  99 |         rtr_capacity = ifspeeds[rtr]
 100 |         rtr_cost = cost_by_rtr(rtr)
 101 |         for per_ts_util in traffic_on_rtr_for_billing_period:
 102 |             try:
 103 |                 assert round(per_ts_util) <= round(rtr_capacity)
 104 |             except:
 105 |                 pdb.set_trace()
 106 |                 print rtr, per_ts_util, ifspeeds[rtr]
 107 | 
 108 |         per_rtr_cost[rtr] = rtr_cost * np.percentile(traffic_on_rtr_for_billing_period, 100-k)
 109 | 
 110 |     return per_rtr_cost
 111 | 
 112 | class PiorityQueue:
 113 |     def __init__(self, ifspeeds):
 114 |         self.entry_finder = {}
 115 |         self.linkheap = []
 116 |         self.ifspeeds = ifspeeds
 117 |         self.REMOVED = '<removed-task>'
 118 |         
 119 |     def get_bca(self, used_links):
 120 |         # BCA is burstable capacity at a given point of time.
 121 |         # It captures the value by which we can burst if all links
 122 |         # with a free slot burst
 123 |         bca = 0
 124 |         for link in self.entry_finder:
 125 |             if self.entry_finder[link][2] > 0:
 126 |                 if link in used_links:
 127 |                     # If this link already has some traffic assigned to it
 128 |                     # in the present time slot, its capacity for bursting
 129 |                     # is reduced by the traffic it is already carrying.
 130 |                     bca += self.ifspeeds[link] - used_links[link]
 131 |                 else:
 132 |                     bca += self.ifspeeds[link]
 133 |         return bca
 134 |     
 135 |     def add_link(self, linkname, slots, priority=0):
 136 |         if linkname in self.entry_finder:
 137 |             self.remove_link(linkname)
 138 |         capacity = self.ifspeeds[linkname]
 139 |         # entry = [priority, capacity, slots, linkname]
 140 |         entry = [priority, slots, linkname]
 141 |         self.entry_finder[linkname] = entry
 142 |         heappush(self.linkheap, entry)
 143 | 
 144 |     def remove_link(self,linkname):
 145 |         'Mark an existing link as REMOVED.  Raise KeyError if not found.'
 146 |         entry = self.entry_finder.pop(linkname)
 147 |         entry[-1] = self.REMOVED
 148 | 
 149 |     def pop_link(self):
 150 |         'Remove and return the lowest priority link. Raise KeyError if empty.'
 151 |         while self.linkheap:
 152 |             # priority, capacity, slots, link = heappop(self.linkheap)
 153 |             priority, slots, link = heappop(self.linkheap)
 154 |             if link is not self.REMOVED and slots > 0:
 155 |                 del self.entry_finder[link]
 156 |                 return link, slots, priority
 157 |         raise KeyError('pop from an empty priority queue')
 158 | 
 159 | def sigcomm04_heuristic_offline(demand, links, ifspeeds):
 160 |     FREE_SLOTS = round(len(demand)*5/100.0)  - 1
 161 |     def select_random_links_with_slots(links, slots, p_k, k):
 162 |         link_subset = [link for link in links if slots[link] > 0]
 163 |         if len(link_subset) == 0:
 164 |             return []
 165 |         elif len(link_subset) >= k:
 166 |             return random.sample(link_subset, k)
 167 |         else:
 168 |             return link_subset
 169 |     
 170 |     def is_assignable(demands, V_0, p_k):
 171 |         slots = {}
 172 |         for link in links:
 173 |             slots[link] = FREE_SLOTS
 174 | 
 175 |         for ts in demands:
 176 |             demand = demands[ts]
 177 |             if demand <= V_0: continue
 178 |             demand_met = False
 179 |             for j in range(1, len(links)):
 180 |                 peak_links = select_random_links_with_slots(links, slots, p_k, j)
 181 |                 burst_cap = sum([ifspeeds[x] for x in peak_links])
 182 |                 burst_cap_deduct = sum([p_k[x] for x in peak_links])
 183 |                 burst_cap = V_0 + burst_cap - burst_cap_deduct
 184 |                 if burst_cap >= demand:
 185 |                     demand_met = True
 186 |                     break
 187 |                 
 188 |             if demand_met:
 189 |                 for link in peak_links:
 190 |                     slots[link] -= 1
 191 |             else:
 192 |                 return False
 193 |                     
 194 |         for link in slots:
 195 |             assert slots[link] >= 0
 196 |         
 197 |         return True
 198 |                 
 199 |     sorted_tses = sorted(demand.keys())
 200 |     I = len(sorted_tses)
 201 |     delta = 0.01
 202 |     f = min([len(links) * 0.05, 1])
 203 |     sat = False
 204 |     while not sat:
 205 |         V_0 = np.percentile(demand.values(), (1-f)*100)
 206 |         # print V_0, (1-f)* 100
 207 |         link_costs = [(link, cost_by_rtr(link)) for link in links]
 208 |         link_costs = sorted(link_costs, key = lambda x: x[1])
 209 |         remaining = V_0
 210 |         p_k = {}
 211 |         for link, cost in link_costs:
 212 |             if remaining <= 0:
 213 |                 p_k[link] = 0
 214 |                 continue
 215 |             cap = ifspeeds[link]
 216 |             if remaining < cap:
 217 |                 p_k[link] = remaining
 218 |                 remaining = 0
 219 |             else:
 220 |                 p_k[link] = cap
 221 |                 remaining = remaining - cap
 222 |         
 223 |         sat = is_assignable(demand, V_0, p_k)
 224 |         f = f - delta
 225 |         if f < 0: return None, None, None
 226 | 
 227 |     if sat:
 228 |         cost = sum([p_k[link]*cost_by_rtr(link) for link in p_k])
 229 |         return p_k, cost, V_0
 230 |     else:
 231 |         return None, None, None
 232 | 
 233 | def bin_pack_links(links, ifspeeds, traffic_volume):
 234 |     ''' Given links with different costs and capacities,
 235 |     this method assigns billable bandwidth to the links such that
 236 |     the cost is minimzed and capacities are respected.
 237 |     '''
 238 |     links_by_cost = {}
 239 |     for link in links:
 240 |         link_cost = cost_by_rtr(link)
 241 |         if link_cost not in links_by_cost:
 242 |             links_by_cost[link_cost] = []
 243 |         links_by_cost[link_cost].append(link)
 244 |         
 245 |     remaining_volume = traffic_volume
 246 |     link_assignments = {}
 247 |     
 248 |     while remaining_volume > 0:
 249 |         # No links remain with available capacity
 250 |         if not links_by_cost:
 251 |             print "Cannot satisfy %f with this link set" % traffic_volume
 252 |             return None
 253 |         
 254 |         # Cheapest set of links
 255 |         link_set = sorted(links_by_cost.iteritems(), key=lambda x:x[0])[0][1]
 256 |         link_cost = sorted(links_by_cost.iteritems(), key=lambda x:x[0])[0][0]
 257 |         link_set_capacity = sum([ifspeeds[x] for x in link_set])
 258 |         
 259 |         if link_set_capacity >= remaining_volume:
 260 |             # Can assign the remaining volume in one shot
 261 |             # Max-min fairness in allocation
 262 |             # Increase the assignments to all links in the link set, equally
 263 |             # in every round and then stop when a link reaches capacity/
 264 |             rate_of_allocation = 3000
 265 |             remaining_volume_ = remaining_volume
 266 |             while remaining_volume_ > 0:
 267 |                 for link in link_set:
 268 |                     if remaining_volume_ == 0 : break
 269 |                     assignment = None
 270 |                     if link not in link_assignments:
 271 |                         assignment = min([rate_of_allocation, remaining_volume_])
 272 |                         link_assignments[link] = assignment
 273 |                     elif ifspeeds[link] - link_assignments[link] > 0:
 274 |                         assignment = min([ifspeeds[link] - link_assignments[link],
 275 |                                           rate_of_allocation, remaining_volume_])
 276 |                         link_assignments[link] += assignment
 277 |                     else:
 278 |                         continue
 279 |                     remaining_volume_ -= assignment
 280 | 
 281 |             assert remaining_volume_ == 0
 282 |             remaining_volume = 0
 283 |         else:
 284 |             remaining_volume = remaining_volume - link_set_capacity
 285 |             for link in link_set:
 286 |                 link_assignments[link] = ifspeeds[link]
 287 |                 
 288 |         links_by_cost.pop(link_cost)
 289 | 
 290 |     for link in link_assignments:
 291 |         assert link_assignments[link] <= ifspeeds[link]
 292 |     assert sum(link_assignments.values()) == traffic_volume
 293 |     
 294 |     return link_assignments
 295 | 
 296 | def bin_pack_links_balanced(links, ifspeeds, traffic_volume, min_frac=0.1):
 297 |     ''' Given links with different costs and capacities,
 298 |     this method assigns billable bandwidth to the links such that
 299 |     the cost is minimzed and capacities are respected. This flavor of the
 300 |     bin packing is not optimal because it ensures that min_frac fraction of the total
 301 |     network capacity is spread evenly across all links. The goal of this is 
 302 |     to have the expensive links also be used since some clients
 303 |     are only connected via these links.
 304 |     '''
 305 |     assert min_frac <= 0.5
 306 |     link_assignments = {}
 307 |     pre_assigned  = 0
 308 |     for link in links:
 309 |         link_assignments[link] = min_frac * ifspeeds[link]
 310 |         pre_assigned += min_frac * ifspeeds[link]
 311 |     
 312 |     remaining_volume = traffic_volume - pre_assigned
 313 |     
 314 |     links_by_cost = {}
 315 |     for link in links:
 316 |         link_cost = cost_by_rtr(link)
 317 |         if link_cost not in links_by_cost:
 318 |             links_by_cost[link_cost] = []
 319 |         links_by_cost[link_cost].append(link)
 320 | 
 321 |     while remaining_volume > 0:
 322 |         # No links remain with available capacity
 323 |         if not links_by_cost:
 324 |             print "Cannot satisfy %f with this link set" % traffic_volume
 325 |             return None
 326 |         
 327 |         # Cheapest set of links
 328 |         link_set = sorted(links_by_cost.iteritems(), key=lambda x:x[0])[0][1]
 329 |         link_cost = sorted(links_by_cost.iteritems(), key=lambda x:x[0])[0][0]
 330 | 
 331 |         link_set_capacity = sum([ifspeeds[x]-link_assignments[x] for x in link_set])
 332 | 
 333 |         if link_set_capacity >= remaining_volume:
 334 |             # Can assign the remaining volume in one shot
 335 |             # Max-min fairness in allocation
 336 |             # Increase the assignments to all links in the link set, equally
 337 |             # in every round and then stop when a link reaches capacity/
 338 |             rate_of_allocation = 3000
 339 |             remaining_volume_ = remaining_volume
 340 |             while remaining_volume_ > 0:
 341 |                 # print remaining_volume_, link_assignments
 342 |                 for link in link_set:
 343 |                     if remaining_volume_ == 0 : break
 344 |                     assignment = 0
 345 |                     if ifspeeds[link] - link_assignments[link] > 0:
 346 |                         assignment = min([ifspeeds[link] - link_assignments[link],
 347 |                                           rate_of_allocation, remaining_volume_])
 348 |                         link_assignments[link] += assignment
 349 |                         try:
 350 |                             assert math.floor(link_assignments[link]) <= ifspeeds[link]
 351 |                         except:pdb.set_trace()
 352 |                     remaining_volume_ -= assignment
 353 | 
 354 |             assert remaining_volume_ == 0
 355 |             remaining_volume = 0
 356 |         else:
 357 |             remaining_volume = remaining_volume - link_set_capacity
 358 |             for link in link_set:
 359 |                 link_assignments[link] = ifspeeds[link]
 360 |                 
 361 |         links_by_cost.pop(link_cost)
 362 | 
 363 |     for link in link_assignments:
 364 |         try:
 365 |             assert math.floor(link_assignments[link]) <= ifspeeds[link]
 366 |         except:
 367 |             pdb.set_trace()
 368 | 
 369 |     try:
 370 |         assert math.ceil(sum(link_assignments.values())) >= math.floor(traffic_volume)
 371 |     except:
 372 |         pdb.set_trace()
 373 |     
 374 |     return link_assignments
 375 | 
 376 | def cascara_traffic_allocation(demand, init_fraction, links, ifspeeds, online=False,
 377 |                                alpha=None, beta=None):
 378 |     ''' init_fraction is the fraction of the total network capacity
 379 |     that we are willing to use for allocating traffic. It might be unfeasible
 380 |     to meet the traffic demand using this fraction, in which case we will update
 381 |     the fraction of the network capacity in use -- outside of the burst intervals.'''
 382 |     
 383 |     online_allocations = {}
 384 |     links_by_cost = {}
 385 |     linkq = PiorityQueue(ifspeeds)
 386 |     START_PRIO = len(demand)
 387 |     FREE_SLOTS = round(len(demand)*5/100.0)  - 1
 388 |     num_tses = len(demand)
 389 |     for link in links:
 390 |         linkq.add_link(link, FREE_SLOTS, START_PRIO)
 391 | 
 392 |     # Assigning the usable capacity fraction should be done effectively:
 393 |     # use links in an increasing order of their cost.
 394 |     total_capacity = float(sum([ifspeeds[x] for x in links]))
 395 |     capacity_fraction = init_fraction * total_capacity
 396 |     link_assignments = bin_pack_links(links, ifspeeds, capacity_fraction)
 397 |     if not link_assignments:
 398 |         print "Infeasible initial capacity fraction:", init_fraction
 399 |         return False, False
 400 |     
 401 |     sorted_tses = sorted(demand.keys())
 402 | 
 403 |     def allocate_timestep(ts, demand_in_ts, fraction):
 404 |         C_frac = fraction * total_capacity
 405 |         link_assignments = bin_pack_links(links, ifspeeds, C_frac)
 406 |         if demand_in_ts <= C_frac:
 407 |             for link in links:
 408 |                 if link in link_assignments:
 409 |                     online_allocations[ts][link] = link_assignments[link]
 410 |                 else:
 411 |                     online_allocations[ts][link] = 0
 412 |             return True
 413 |         else:
 414 |             over_demand = demand_in_ts - C_frac
 415 |             links_maxed_in_round = []
 416 |             while over_demand > 0:
 417 |                 try:
 418 |                     linkname, slots, prio = linkq.pop_link()
 419 |                     links_maxed_in_round.append((linkname, slots, prio))
 420 |                 except KeyError:
 421 |                     # print "%f Demand of timestamp %d not met yet and no links to max out" % \
 422 |                     #    (over_demand, ts)
 423 |                     for link, slots, prio in links_maxed_in_round:
 424 |                         linkq.add_link(link, slots, prio)
 425 |                     return False
 426 |                 
 427 |                 if linkname in link_assignments:
 428 |                     link_contrib = ifspeeds[linkname] - link_assignments[linkname]
 429 |                 else:
 430 |                     link_contrib = ifspeeds[linkname]
 431 |                 link_contrib = min([over_demand, link_contrib])
 432 |                 over_demand = over_demand - link_contrib
 433 | 
 434 |             for link in links:
 435 |                 if link in link_assignments:
 436 |                     online_allocations[ts][link] = link_assignments[link]
 437 |                 else:
 438 |                     online_allocations[ts][link] = 0
 439 | 
 440 |             for link, slots, prio in links_maxed_in_round:
 441 |                 linkq.add_link(link, slots - 1, prio - 1)
 442 |                 online_allocations[ts][link] = ifspeeds[link]
 443 | 
 444 |             assert sum(online_allocations[ts].values()) >= demand_in_ts
 445 | 
 446 |             for link in online_allocations[ts]:
 447 |                 assert online_allocations[ts][link] <= ifspeeds[link]
 448 |             return True
 449 |         
 450 |         assert False
 451 | 
 452 |     def inc_frac(frac, value):
 453 |         frac = frac + value
 454 |         if frac > 1:
 455 |             frac = 1
 456 |         return frac
 457 |     
 458 |     fraction = init_fraction
 459 |     count = 0
 460 |     week_count = 1
 461 |     init_bca = linkq.get_bca(link_assignments)
 462 |     for ts in sorted_tses:
 463 |         demand_in_ts = demand[ts]
 464 |         C_frac = fraction * total_capacity
 465 |         new_link_assignments = bin_pack_links(links, ifspeeds, C_frac)
 466 |         count += 1
 467 |         if count % 2016 == 0 and online:
 468 |             # one week is complete
 469 |             if linkq.get_bca(new_link_assignments) < init_bca * (4-week_count)/4.0:
 470 |                 fraction = inc_frac(fraction, alpha)
 471 |             week_count += 1
 472 |                 
 473 |         online_allocations[ts] = {}
 474 |         while not allocate_timestep(ts, demand_in_ts, fraction):
 475 |             # print "Allocation attempt in ts %d failed with capacity fraction %f" % (ts, fraction)
 476 |             # print "Demand was %f and burstable capacity was %f" % \
 477 |             #    (demand_in_ts, linkq.get_bca(new_link_assignments))
 478 |             if not online:
 479 |                 return None, None
 480 |             fraction = inc_frac(fraction, beta)
 481 |             print week_count, fraction    
 482 | 
 483 |     online_allocations_by_rtr = {}
 484 |     for ts in online_allocations:
 485 |             for rtr in online_allocations[ts]:
 486 |                 if rtr not in online_allocations_by_rtr:
 487 |                     online_allocations_by_rtr[rtr] = []
 488 |                 online_allocations_by_rtr[rtr].append(online_allocations[ts][rtr])
 489 | 
 490 |     return online_allocations_by_rtr, fraction
 491 | 
 492 | def exponential_weighted_moving_average_demand_prediction(demands, beta=1):
 493 |     '''
 494 |     Exponential weighted moving average prediction of traffic
 495 |     on peering links or overall demand as suggested by Goldenberg et. al 
 496 |     in "Optimizing Cost and Performance for Multihoming" in SIGCOMM 2004.
 497 |     '''
 498 |     sorted_tses = sorted(demands.keys())
 499 |     prediction = 0
 500 |     predictions = {}
 501 |     for ts in sorted_tses:
 502 |         index = sorted_tses.index(ts)
 503 |         if index + 1 >= len(sorted_tses):
 504 |             continue
 505 |         next_ts = sorted_tses[index + 1]
 506 |         current_demand = demands[ts]
 507 |         next_prediction = beta * current_demand  + (1-beta) * prediction
 508 |         predictions[next_ts] = next_prediction
 509 |         prediction = next_prediction
 510 |         
 511 |     return predictions
 512 | 
 513 | def get_demand_func_time(post_op, pre_op):
 514 |     demand = {}
 515 |     for ts in pre_op:
 516 |         if ts not in demand:
 517 |             demand[ts] = 0
 518 |         demand[ts] = sum(pre_op[ts].values())
 519 |         try:
 520 |             assert round(sum(post_op[ts].values())) >= round(demand[ts])
 521 |         except:
 522 |             pdb.set_trace()
 523 |     return demand
 524 | 
 525 | 
 526 | def read_contiguous_traffic_demand(cluster_no):
 527 |     all_demand = {}
 528 |     for month in [
 529 |             '6-2018',
 530 |             '7-2018',
 531 |             '8-2018',
 532 |             '9-2018',
 533 |             '10-2018',
 534 |             '11-2018',
 535 |             '12-2018',
 536 |             '1-2019',
 537 |             '2-2019',
 538 |             '3-2019',
 539 |             '4-2019',
 540 |             '5-2019',
 541 |             '6-2019'
 542 |     ]:
 543 |         pre_op, post_op, pre_op_by_rtr_list, post_op_by_rtr_list,  link_set =\
 544 |                     read_allocation(month, cluster_no)
 545 |         demand = get_demand_func_time(post_op, pre_op)
 546 |         all_demand.update(demand)
 547 |     return all_demand
 548 |         
 549 | def offline_V_0(all_demand, cluster, end_ts, I, links, ifspeeds):
 550 |     sorted_tses = sorted(all_demand.keys())
 551 |     end_ts_index = sorted_tses.index(end_ts)
 552 |     start_ts_index = end_ts_index - I
 553 |     assert end_ts_index > 0
 554 |     start_ts_index = max([0, start_ts_index])
 555 |     assert start_ts_index >= 0
 556 | 
 557 |     start_ts = sorted_tses[start_ts_index]
 558 |     
 559 |     relevant_demand = {}
 560 |     for ts in sorted_tses:
 561 |         if ts >= start_ts and ts <= end_ts:
 562 |             relevant_demand[ts] = all_demand[ts]
 563 | 
 564 |     _, _, V_0 = sigcomm04_heuristic_offline(relevant_demand, links, ifspeeds)
 565 |     return V_0
 566 |     
 567 | def gia_online(demand, all_demand, links, ifspeeds, V_0, cluster):
 568 |     ''' This is the GIA-online algorithm from "Optimizing
 569 |     Cost and Performance for Multihoming" paper from SIGCOMM 2004 by
 570 |     Goldenberg et.al.'''
 571 |     
 572 |     predicted_total_demand = exponential_weighted_moving_average_demand_prediction(demand)
 573 |     def select_random_links_with_slots(links, slots, p_k, k):
 574 |         link_subset = [link for link in links if slots[link] > 0]
 575 |         if len(link_subset) == 0:
 576 |             return []
 577 |         elif len(link_subset) >= k:
 578 |             return random.sample(link_subset, k)
 579 |         else:
 580 |             return link_subset
 581 |     
 582 |     sorted_tses = sorted(demand.keys())
 583 |     I = len(sorted_tses)
 584 |     f = min([len(links) * 0.05, 1])
 585 |     V_0 = np.percentile(demand.values(), (1-f)*100)
 586 |     delta = 0.01
 587 |     slots = {}
 588 |     FREE_SLOTS = round(len(demand)*5/100.0)  - 1
 589 |     
 590 |     for link in links:
 591 |         slots[link] = FREE_SLOTS
 592 | 
 593 |     def assign_predicted_flow(flow, V_0, p_k, links, slots):
 594 |         if flow <= V_0: return slots, True
 595 |         demand_met = False
 596 |         for j in range(1, len(links) + 1):
 597 |             peak_links = select_random_links_with_slots(links, slots, p_k, j)
 598 |             burst_cap = sum([ifspeeds[x] for x in peak_links])
 599 |             burst_cap_deduct = sum([p_k[x] for x in peak_links])
 600 |             burst_cap = V_0 + burst_cap - burst_cap_deduct
 601 |             if burst_cap >= flow:
 602 |                 demand_met = True
 603 |                 break
 604 |                 
 605 |         if demand_met:
 606 |             for link in peak_links:
 607 |                 slots[link] -= 1
 608 |         else:
 609 |             return slots, False
 610 |         return slots, True
 611 | 
 612 |     previous_demand = None
 613 |     previous_p_k = {}
 614 |     for link in links:
 615 |         previous_p_k[link] = 0
 616 | 
 617 |     count = 0 
 618 |     for ts in sorted_tses:
 619 |         count += 1
 620 |         if count % 2016 == 0:
 621 |             V_0_prime = offline_V_0(all_demand, cluster, ts, I, links, ifspeeds)
 622 |             if V_0_prime > V_0:
 623 |                 #print "increasing v_0"
 624 |                 V_0 = 1.05* V_0
 625 |             
 626 |         margin = 0.05 * V_0
 627 |         demand_in_ts = demand[ts]
 628 |         if ts in predicted_total_demand:
 629 |             predicted_demand_in_ts = predicted_total_demand[ts]
 630 |         else:
 631 |             predicted_demand_in_ts = demand_in_ts
 632 |         link_costs = [(link, cost_by_rtr(link)) for link in links]
 633 |         link_costs = sorted(link_costs, key = lambda x: x[1])
 634 |         remaining = V_0 + margin * len(links)
 635 |         p_k = previous_p_k.copy()
 636 |         for link, cost in link_costs:
 637 |             if remaining <= 0:
 638 |                 continue
 639 |             cap = ifspeeds[link]
 640 |             if remaining < cap:
 641 |                 p_k[link] = max([remaining, previous_p_k[link]])
 642 |                 remaining = 0
 643 |             else:
 644 |                 p_k[link] = cap
 645 |                 remaining = remaining - cap
 646 | 
 647 |         updated_p_k = {}
 648 |         for link in p_k:
 649 |             update =  max([0, p_k[link] - margin])
 650 |             updated_p_k[link] = max([previous_p_k[link], update])
 651 |             assert updated_p_k[link] >= previous_p_k[link]
 652 | 
 653 |         assert sum(updated_p_k.values()) >= V_0
 654 |         # assign the predicted flow
 655 |         slots, feasible = assign_predicted_flow(demand_in_ts, sum(updated_p_k.values()),
 656 |                                                 updated_p_k, links, slots)
 657 |         if not feasible:
 658 |             return None, None
 659 |             
 660 |         previous_p_k = updated_p_k
 661 | 
 662 |     cost = sum([updated_p_k[link]*cost_by_rtr(link) for link in updated_p_k])
 663 |     return updated_p_k, cost
 664 | 
 665 | def entact(demand, all_demand, links, ifspeeds):
 666 |     '''
 667 |     Entact algorithm from https://www.usenix.org/legacy/event/nsdi10/tech/full_papers/zhang.pdf
 668 |     The core objective function minimizes the cost in every 5-minute time window.
 669 |     Cost is calculated as a product of link price and traffic assigned to it.
 670 |     This is essentially the greedy objective which can be minimized by assigning
 671 |     as much flow as is possible to the cheapest link, then the next cheapest and so on.
 672 |     '''
 673 |     sorted_tses = sorted(demand.keys())
 674 |     online_allocations = {}
 675 |     for ts in sorted_tses:
 676 |         online_allocations[ts] = {}
 677 |         demand_in_ts = demand[ts]
 678 |         link_assignments = bin_pack_links(links, ifspeeds, demand_in_ts)
 679 |         if not link_assignments:
 680 |             pdb.set_trace()
 681 |         for link in links:
 682 |             if link in link_assignments:
 683 |                 online_allocations[ts][link] = link_assignments[link]
 684 |             else:
 685 |                 online_allocations[ts][link] = 0
 686 | 
 687 |     online_allocations_by_rtr = {}
 688 |     for ts in online_allocations:
 689 |             for rtr in online_allocations[ts]:
 690 |                 if rtr not in online_allocations_by_rtr:
 691 |                     online_allocations_by_rtr[rtr] = []
 692 |                 online_allocations_by_rtr[rtr].append(online_allocations[ts][rtr])
 693 | 
 694 |     return online_allocations_by_rtr
 695 | 
 696 | def cascara_traffic_allocation_stable(demand, init_fraction, links, ifspeeds, online=False,
 697 |                                       alpha=None, beta=None, max_rate=50):
 698 |     ''' init_fraction is the fraction of the total network capacity
 699 |     that we are willing to use for allocating traffic. It might be unfeasible
 700 |     to meet the traffic demand using this fraction, in which case we will update
 701 |     the fraction of the network capacity in use -- outside of the burst intervals.'''
 702 |     allocation_step = max_rate * 5 * 60 # 10G for 5 minutes
 703 |     online_allocations = {}
 704 |     links_by_cost = {}
 705 |     linkq = PiorityQueue(ifspeeds)
 706 |     START_PRIO = len(demand)
 707 |     FREE_SLOTS = round(len(demand)*5/100.0)  - 1
 708 |     num_tses = len(demand)
 709 |     for link in links:
 710 |         linkq.add_link(link, FREE_SLOTS, START_PRIO)
 711 | 
 712 |     # Assigning the usable capacity fraction should be done effectively:
 713 |     # use links in an increasing order of their cost.
 714 |     total_capacity = float(sum([ifspeeds[x] for x in links]))
 715 |     capacity_fraction = init_fraction * total_capacity
 716 |     link_assignments = bin_pack_links(links, ifspeeds, capacity_fraction)
 717 |     if not link_assignments:
 718 |         # print "Infeasible initial capacity fraction:", init_fraction
 719 |         return False, False
 720 |     
 721 |     sorted_tses = sorted(demand.keys())
 722 | 
 723 |     def allocate_timestep(ts, demand_in_ts, fraction, previous_ts):
 724 |         C_frac = fraction * total_capacity
 725 |         # print ts, C_frac
 726 |         link_assignments = bin_pack_links(links, ifspeeds, C_frac)
 727 |         if demand_in_ts <= C_frac:
 728 |             for link in links:
 729 |                 if link in link_assignments:
 730 |                     online_allocations[ts][link] = link_assignments[link]
 731 |                 else:
 732 |                     online_allocations[ts][link] = 0
 733 |             return True
 734 |         else:
 735 |             over_demand = demand_in_ts - C_frac
 736 |             allocations_previous_timestep = {}
 737 |             if previous_ts in online_allocations:
 738 |                 allocations_previous_timestep = online_allocations[previous_ts]
 739 |             links_maxed_in_round = []
 740 |             link_to_contrib = {}
 741 |             while over_demand > 0:
 742 |                 try:
 743 |                     linkname, slots, prio = linkq.pop_link()
 744 |                     links_maxed_in_round.append((linkname, slots, prio))
 745 |                 except KeyError:
 746 |                     # print "%f Demand of timestamp %d not met yet and no links to max out" % \
 747 |                     #    (over_demand, ts)
 748 |                     for link, slots, prio in links_maxed_in_round:
 749 |                         linkq.add_link(link, slots, prio)
 750 |                     return False
 751 |                 if linkname in allocations_previous_timestep:
 752 |                     previous_alloc = allocations_previous_timestep[linkname]
 753 |                 elif linkname in link_assignments:
 754 |                     previous_alloc = link_assignments[linkname]
 755 |                 else:
 756 |                     previous_alloc = 0
 757 | 
 758 |                 # link_contrib <= ifspeed
 759 |                 # link_contrib <= previous_alloc + alloc_timestep
 760 |                 link_contrib = min([previous_alloc + allocation_step, ifspeeds[linkname]])
 761 |                 # If this link has some assignment by bin packing,
 762 |                 # it could have been increased by the allocation step
 763 |                 # so map that to the link's contribution.
 764 |                 # however change the contribution to be subtracted
 765 |                 # from the over_demand to the excess over earlier
 766 |                 # allocation -- over_demand already counts the
 767 |                 # earlier allocation.
 768 |                 if linkname in link_assignments:
 769 |                     if link_contrib > link_assignments[linkname]:
 770 |                         link_contrib = link_contrib - link_assignments[linkname]
 771 |                     else:
 772 |                         link_contrib = 0
 773 | 
 774 |                 # link_contrib <= over_demand                    
 775 |                 link_contrib = min([over_demand, link_contrib])                    
 776 |                 assert link_contrib >= 0
 777 |                 over_demand = over_demand - link_contrib
 778 |                 link_to_contrib[linkname] = link_contrib
 779 |                 
 780 |             for link in links:
 781 |                 if link in link_assignments:
 782 |                     online_allocations[ts][link] = link_assignments[link]
 783 |                 else:
 784 |                     online_allocations[ts][link] = 0
 785 | 
 786 |             for link, slots, prio in links_maxed_in_round:
 787 |                 linkq.add_link(link, slots - 1, prio - 1)
 788 |                 if link not in online_allocations[ts]:
 789 |                     online_allocations[ts][link] = 0
 790 |                 online_allocations[ts][link] += link_to_contrib[link]
 791 | 
 792 |             assert sum(online_allocations[ts].values()) >= demand_in_ts
 793 | 
 794 |             for link in online_allocations[ts]:
 795 |                 assert online_allocations[ts][link] <= ifspeeds[link]
 796 |             return True
 797 |         
 798 |         assert False
 799 | 
 800 |     def inc_frac(frac, value):
 801 |         frac = frac + value
 802 |         if frac > 1:
 803 |             frac = 1
 804 |         return frac
 805 |     
 806 |     fraction = init_fraction
 807 |     count = 0
 808 |     week_count = 1
 809 |     init_bca = linkq.get_bca(link_assignments)
 810 |     previous_ts = None
 811 |     for ts in sorted_tses:
 812 |         demand_in_ts = demand[ts]
 813 |         C_frac = fraction * total_capacity
 814 |         new_link_assignments = bin_pack_links(links, ifspeeds, C_frac)
 815 |         count += 1
 816 |         if count % 2016 == 0 and online:
 817 |             # one week is complete
 818 |             if linkq.get_bca(new_link_assignments) < init_bca * (4-week_count)/4.0:
 819 |                 fraction = inc_frac(fraction, alpha)
 820 |             week_count += 1
 821 |                 
 822 |         online_allocations[ts] = {}
 823 |         while not allocate_timestep(ts, demand_in_ts, fraction, previous_ts):
 824 |             if not online:
 825 |                 return None, None
 826 |             fraction = inc_frac(fraction,  beta)
 827 |         previous_ts = ts
 828 |         
 829 |     online_allocations_by_rtr = {}
 830 |     for ts in online_allocations:
 831 |             for rtr in online_allocations[ts]:
 832 |                 if rtr not in online_allocations_by_rtr:
 833 |                     online_allocations_by_rtr[rtr] = []
 834 |                 online_allocations_by_rtr[rtr].append(online_allocations[ts][rtr])
 835 | 
 836 |     return online_allocations_by_rtr, fraction
 837 | 
 838 | def get_links(peer, all_links, pop=None):
 839 |     ''' get links at a pop and belonging to 
 840 |     a BGP peer.'''
 841 |     relevant_links = []
 842 |     for link in all_links:
 843 |         if peer in link:
 844 |             if pop:
 845 |                 if "-%s-" % pop in link:
 846 |                     relevant_links.append(link)
 847 |             else:
 848 |                 relevant_links.append(link)
 849 |                 
 850 |     return relevant_links
 851 | 
 852 | def get_client_to_link_maps(ts, client_maps, client_to_primary_peer,
 853 |                             client_to_primary_pop, client_to_pop_latency,
 854 |                             all_links):
 855 |     client_to_pop_latency_tses = sorted(client_to_pop_latency.keys())
 856 |     
 857 |     def get_lowest_latency_links(client, ts, primary_pop=None,enable_transit=False):
 858 |         ts_prev = None
 859 |         for ts_iter in client_to_pop_latency_tses:
 860 |             if ts_iter >= int(ts):
 861 |                 ts_end = ts_iter
 862 |                 break
 863 |             ts_prev = ts_iter
 864 | 
 865 |         best_pop = None
 866 |         all_measured_pops = {}
 867 |         lookback = 0
 868 |         for ts_iter in reversed(client_to_pop_latency_tses):
 869 |             # Python weirdness: if ts_prev is None, the following condition retursn true
 870 |             if ts_iter > ts_prev: continue
 871 |             lookback += 1
 872 |             if lookback > 10: break
 873 |             if client in client_to_pop_latency[ts_iter]:
 874 |                 measured_pops = client_to_pop_latency[ts_iter][client]
 875 |                 for pop, lat in measured_pops:
 876 |                     if pop not in all_measured_pops:
 877 |                         # keeping the most recent measurement
 878 |                         all_measured_pops[pop] = lat
 879 |         better_pops = []
 880 |         for pop1 in all_measured_pops:
 881 |             for pop2 in all_measured_pops:
 882 |                 if pop1 == pop2: continue
 883 |                 if pop1 in primary_pop or pop2 in primary_pop:
 884 |                     if abs(all_measured_pops[pop1] - all_measured_pops[pop2]) <= 10:
 885 |                         better_pops.append(pop1)
 886 |                         better_pops.append(pop2)
 887 | 
 888 |         if better_pops:
 889 |             better_pops = list(set(better_pops))                        
 890 |             better_pops = list(set(better_pops).union(primary_pop))
 891 |         else:
 892 |             better_pops = primary_pop
 893 |         client_primary_peers = client_to_primary_peer[client]
 894 |         relevant_links = []
 895 |         for bpop in better_pops:
 896 |             for peer in client_primary_peers:
 897 |                 relevant_links.extend(get_links(peer, all_links, pop=bpop))
 898 | 
 899 |         return relevant_links
 900 |     
 901 |     with open(CLIENT_TO_LINK_MAPS) as fi:
 902 |         client_to_link_maps_ = json.load(fi)
 903 | 
 904 |     if client_maps == "current":
 905 |         return client_to_link_maps_
 906 |     
 907 |     elif client_maps == "all":
 908 |         client_to_link_maps = {}
 909 |         for client in client_to_link_maps_:
 910 |             client_to_link_maps[client] = []
 911 |             for peer in client_to_primary_peer[client]:
 912 |                 client_to_link_maps[client].extend(peer_to_links[peer])
 913 |         return client_to_link_maps
 914 |     
 915 |     elif client_maps == "all-transit":
 916 |         # fun things here
 917 |         client_to_link_maps = {}
 918 |         for client in client_to_link_maps_:
 919 |             relevant_links = []
 920 |             for peer in client_to_primary_peer[client]:
 921 |                 relevant_links.extend(peer_to_links[peer])
 922 |             # Address space overlap is such that these links are also
 923 |             # feasible choices
 924 |             client_to_link_maps[client] = relevant_links
 925 |             
 926 |         return client_to_link_maps
 927 | 
 928 |     elif client_maps == "pop":
 929 |         client_to_link_maps = {}
 930 |         for client in client_to_link_maps_:
 931 |             relevant_links = []
 932 |             for peer in client_to_primary_peer[client]:
 933 |                 for ppop in client_to_primary_pop[client]:
 934 |                     relevant_links.extend(get_links(peer, all_links, pop=ppop))
 935 |             client_to_link_maps[client] = relevant_links
 936 |         return client_to_link_maps
 937 |     
 938 |     elif client_maps == "pop-transit":
 939 |         client_to_link_maps = {}
 940 |         for client in client_to_link_maps_:
 941 |             relevant_links = []
 942 |             for peer in client_to_primary_peer[client]:
 943 |                 for ppop in client_to_primary_pop[client]:
 944 |                     relevant_links.extend(get_links(peer, all_links, pop=ppop))
 945 |             client_to_link_maps[client] = relevant_links
 946 | 
 947 |         return client_to_link_maps
 948 |     
 949 |     elif client_maps == "pop2":
 950 |         client_to_link_maps = {}
 951 |         for client in client_to_link_maps_:
 952 |             client_to_link_maps[client] =\
 953 |                 get_lowest_latency_links(client, ts,
 954 |                                         primary_pop=client_to_primary_pop[client])
 955 |         return client_to_link_maps
 956 | 
 957 |     elif client_maps == "pop2-transit":
 958 |         client_to_link_maps = {}
 959 |         for client in client_to_link_maps_:
 960 |             client_to_link_maps[client] = get_lowest_latency_links(client, ts,
 961 |                                                                    primary_pop=client_to_primary_pop[client],
 962 |                                                                    enable_transit=True)
 963 |         return client_to_link_maps
 964 |     
 965 |     
 966 | def cascara_traffic_allocation_latency_sensitive(demand, init_fraction, links,
 967 |                                                  client_to_primary_peer,
 968 |                                                  client_to_primary_pop,
 969 |                                                  client_to_pop_latency,
 970 |                                                  ifspeeds, online=False,
 971 |                                                  alpha=None, beta=None,
 972 |                                                  client_maps="current"):
 973 |     print "Client map strategy:", client_maps
 974 |     max_rate = 40
 975 |     limit_per_link = 0.2
 976 |     print "Limit per link", limit_per_link
 977 |     allocation_step = max_rate * 5 * 60 # 10G for 5 minutes    
 978 |     online_allocations = {}
 979 |     links_by_cost = {}
 980 |     linkq = PiorityQueue(ifspeeds)
 981 |     START_PRIO = len(demand)
 982 |     FREE_SLOTS = round(len(demand)*5/100.0)  - 1
 983 |     num_tses = len(demand)
 984 | 
 985 |                 
 986 |     # Assigning the usable capacity fraction should be done effectively:
 987 |     # use links in an increasing order of their cost.
 988 |     total_capacity = float(sum([ifspeeds[x] for x in links]))
 989 |     capacity_fraction = init_fraction * total_capacity
 990 |     
 991 |     sorted_tses = sorted(demand.keys())
 992 |     
 993 |     env = Env(empty=True)
 994 |     env.setParam('OutputFlag', 0)
 995 |     env.start()
 996 |     
 997 |     def allocate_timestep_latency_sensitive(ts, previous_ts, demand_in_ts,
 998 |                                             fraction, min_frac=0.2):
 999 |         
1000 |         client_to_link_maps = get_client_to_link_maps(ts, client_maps,
1001 |                                                       client_to_primary_peer,
1002 |                                                       client_to_primary_pop,
1003 |                                                       client_to_pop_latency,
1004 |                                                       links)
1005 |         # demand_in_ts: {client: demand}
1006 |         # client_to_link_maps: {client: peers}
1007 |         # link_allocations: {link: allocation}
1008 |         C_frac = fraction * total_capacity
1009 |         over_demand = 0
1010 |         burst_link_allocs = {}
1011 |         links_maxed_in_round = []
1012 |         if sum(demand_in_ts.values()) >= C_frac:
1013 |             # the allocation will fail because C_frac isn't enough
1014 |             # We have to augment some links
1015 |             print "Demand is higher than C_frac", C_frac, sum(demand_in_ts.values())
1016 |             over_demand = sum(demand_in_ts.values()) - C_frac
1017 |             while over_demand > 0:
1018 |                 try:
1019 |                     linkname, slots, prio = linkq.pop_link()
1020 |                     links_maxed_in_round.append((linkname, slots, prio))
1021 |                 except KeyError:
1022 |                     # Ran out of links to max out, add the ones popped
1023 |                     # off the priority queue and return
1024 |                     for link, slots, prio in links_maxed_in_round:
1025 |                         linkq.add_link(link, slots, prio)
1026 |                         print "burst_allocations_timestep: Ran out of links to max out", ts
1027 |                         return False
1028 |                 link_contrib  = min(ifspeeds[linkname], over_demand)
1029 |                 burst_link_allocs[linkname] = link_contrib
1030 |                 over_demand = over_demand - link_contrib
1031 | 
1032 |         assert over_demand == 0
1033 |         C_frac  = sum(demand_in_ts.values())
1034 |         
1035 |         model = Model("lp", env=env)
1036 |         variables = {}
1037 |         objective = 0
1038 |         total_allocation = 0
1039 |         per_link_vars = {}
1040 |         # i is the client + metro
1041 |         for i in demand_in_ts:
1042 |             variables[i] = {}
1043 |             if len(client_to_link_maps[i]) == 0:
1044 |                 # This is a stop gap because there are no feasible links
1045 |                 # to this client and that needs to be fixed
1046 |                 # print "No links for clients", i, "mapping to all links by the same peer"
1047 |                 client_to_link_maps[i] = get_links(client_to_primary_peer[i][0], links, pop=None)
1048 |             try:
1049 |                 assert len(client_to_link_maps[i]) >= 1, "too few links"
1050 |             except:
1051 |                 pdb.set_trace()
1052 |             # j is the link (peer + rtr)
1053 |             for j in links:
1054 |                 variables[i][j] = model.addVar(lb=0, name="x_%s_%s" % (i,j))
1055 |                 if j not in client_to_link_maps[i]:
1056 |                     # this link is there in the global graph but the link
1057 |                     # selection done by get_client_to_link_maps
1058 |                     # has not considered it "relevant" for the client_map
1059 |                     # strategy. To ensure the link doesn't have
1060 |                     # a non-zero allocation on it, adding a special constraint.
1061 |                     model.addConstr(variables[i][j] <= 0, name="%s-%s-positive" % (i, j))
1062 |                     
1063 |                 objective += cost_by_rtr(j) * variables[i][j]
1064 |                 total_allocation += variables[i][j]
1065 |                 if j not in per_link_vars:
1066 |                     per_link_vars[j] = 0
1067 |                 per_link_vars[j] += variables[i][j]
1068 |             # allocations from client i to any links should
1069 |             # be at least as high as the demand to the client
1070 |             if i in demand_in_ts:
1071 |                 model.addConstr(sum(variables[i].values()) >= demand_in_ts[i],
1072 |                                 name="demand_to_%s" % i)
1073 |             else:
1074 |                 model.addConstr(sum(variables[i].values()) >= 0)
1075 |                       
1076 |         model.update()
1077 | 
1078 |         # trafic towards all clients from a link
1079 |         # should not send more than the allocation amount
1080 |         # predetermined for this link
1081 |         for link in per_link_vars:
1082 |             if link in  burst_link_allocs:
1083 |                 model.addConstr(per_link_vars[link] <= burst_link_allocs[link],
1084 |                                 name="burst_link_%s" % link)
1085 |             else:
1086 |                 model.addConstr(per_link_vars[link] <= limit_per_link * ifspeeds[link],
1087 |                                 name="link_capzacity_%s" % link)
1088 | 
1089 |         model.setObjective(objective)
1090 |         model.addConstr(total_allocation <= C_frac, name="C_frac")
1091 |         
1092 |         try:
1093 |             model.optimize()
1094 |         except error.SolverError as e:
1095 |             print e
1096 |             print "Gurobi failed for timestamp", ts, fraction
1097 |             return 0
1098 | 
1099 |         if model.Status == GRB.INFEASIBLE:
1100 |             print "Gurobi model is infeasible", ts, fraction
1101 |             
1102 |             for link, slots, prio in links_maxed_in_round:
1103 |                 linkq.add_link(link, slots, prio)
1104 |             model_c = model.copy()
1105 |             model_c.computeIIS()
1106 |             for c in model_c.getConstrs():
1107 |                 if c.IISConstr: print('%s' % c.constrName)
1108 |             pdb.set_trace()
1109 |             return 0
1110 | 
1111 |         for link, slots, prio in links_maxed_in_round:
1112 |             linkq.add_link(link, slots, prio)        
1113 |         
1114 |         for i in variables:
1115 |             for j in variables[i]:
1116 |                 if j not in online_allocations[ts]:
1117 |                     online_allocations[ts][j] = 0
1118 |                 online_allocations[ts][j] += variables[i][j].x
1119 |                                                     
1120 |         return 1
1121 |                                 
1122 |     def inc_frac(frac, value):
1123 |         print "Inc frac:", frac, frac+value
1124 |         frac = frac + value
1125 |         if frac >= 1:
1126 |             frac = 1
1127 |         return frac
1128 |     
1129 |     fraction = init_fraction
1130 |     count = 0
1131 |     week_count = 1
1132 | 
1133 |     previous_ts = None
1134 | 
1135 |     link_to_metros = {}
1136 |     print "Fraction initial", fraction
1137 |     for ts in sorted_tses:
1138 |         print "TIMESTAMP FOR LP:", ts
1139 |         demand_in_ts = demand[ts]
1140 |         C_frac = fraction * total_capacity
1141 |         count += 1
1142 |         online_allocations[ts] = {}
1143 |         alloc_found = False
1144 |         while not alloc_found:
1145 |             return_status = allocate_timestep_latency_sensitive(ts, previous_ts, demand_in_ts,
1146 |                                                                 fraction)
1147 |             if return_status!= 1 and not online:
1148 |                 print "Allocation failed at ts", ts, C_frac
1149 |                 return None, None
1150 |             elif return_status != 1:
1151 |                 fraction = inc_frac(fraction, beta)
1152 |                 print week_count, fraction
1153 |                 continue
1154 |             # allocation successful
1155 |             alloc_found = True
1156 |             
1157 |         previous_ts = ts
1158 |         
1159 |     online_allocations_by_rtr = {}
1160 |     for ts in online_allocations:
1161 |             for rtr in online_allocations[ts]:
1162 |                 if rtr not in online_allocations_by_rtr:
1163 |                     online_allocations_by_rtr[rtr] = []
1164 |                 online_allocations_by_rtr[rtr].append(online_allocations[ts][rtr])
1165 | 
1166 |     return online_allocations_by_rtr, fraction
1167 | 


--------------------------------------------------------------------------------
/optimize_mip_single_peer_split.py:
--------------------------------------------------------------------------------
  1 | ## Example command to run this program:
  2 | from sklearn.cluster import KMeans
  3 | from vincenty import vincenty
  4 | import time
  5 | import numpy as np
  6 | from datetime import datetime
  7 | import csv
  8 | import pdb
  9 | from cvxpy import *
 10 | import sys
 11 | import numpy as np
 12 | import pandas as pd
 13 | from consts import *
 14 | 
 15 | if sys.argv[4] == 'native':
 16 |     print "Using native Gurobi bindings for the solver"
 17 |     from api_opt_native import *
 18 | elif sys.argv[4] == 'non_native':
 19 |     print "Using CVX wrapper for Gurobi"
 20 |     from api_opt import *
 21 | 
 22 | k_val = int(sys.argv[1]) # percentile (1-99)
 23 | m_val = int(sys.argv[2])
 24 | combined_flow_file_name = sys.argv[3]
 25 | 
 26 | if len(sys.argv) > 5:
 27 |     model_strategy = sys.argv[5]
 28 | else:
 29 |     model_strategy = "fixed-m"
 30 | 
 31 | if 'cluster' in model_strategy:
 32 |     num_link_clusters = int(model_strategy.split('cluster-')[-1])
 33 |         
 34 | print "K value:", k_val
 35 | print "M value:", m_val
 36 | print "Model Strategy:", model_strategy
 37 | 
 38 | with open(EDGE_INTF_CAPACITY_TIME_FNAME) as fi:
 39 |     ifspeeds_pre = json.load(fi)
 40 | 
 41 | ifspeeds = {}
 42 | for rtr in ifspeeds_pre:
 43 |     for peer in ifspeeds_pre[rtr]:
 44 |         ifspeeds["%s-%s" % (peer.upper(), rtr)] = max(ifspeeds_pre[rtr][peer].values()) *5*60
 45 | 
 46 | def get_subset_of_flows(cluster_flows):
 47 |     cluster_sub = {}
 48 |     for ts in cluster_flows:
 49 |         cluster_sub[ts] = {}
 50 |         for rtr in cluster_flows[ts]:
 51 |             if rtr not in links_to_include:
 52 |                 continue
 53 |             cluster_sub[ts][rtr] = cluster_flows[ts][rtr]
 54 | 
 55 |     return cluster_sub
 56 | 
 57 | active_links_per_month = {}
 58 | previous_model = None
 59 | for month in [
 60 |         '6-2018',
 61 |         '7-2018',
 62 |         '8-2018',
 63 |         '9-2018',
 64 |         '10-2018',
 65 |         '11-2018',
 66 |         '12-2018',
 67 |         '1-2019',
 68 |         '2-2019',
 69 |         '3-2019',
 70 |         '4-2019',
 71 |         '5-2019',
 72 |         '6-2019'
 73 |         "7-2019",
 74 |         "8-2019",
 75 |         "9-2019",
 76 |         "10-2019",
 77 |         "11-2019",
 78 |         "12-2019",
 79 |         "1-2020",
 80 |         "2-2020",
 81 |         "3-2020",
 82 |         "4-2020",
 83 |         "5-2020"
 84 | ]:
 85 |     print "Month:", month
 86 |     #for cluster_id in [ 0, 1, 2, 3, 4]:
 87 |     for cluster_id in [9]:
 88 |         cluster_sub_flows = read_current_allocations(combined_flow_file_name,
 89 |                                                      month, cluster_id, strategy=model_strategy)
 90 |         if not cluster_sub_flows: continue
 91 |         
 92 |         if model_strategy == 'subset':
 93 |             cluster_sub_flows = get_subset_of_flows(cluster_sub_flows)
 94 |             
 95 |         router_set = get_routers_from_assignments(cluster_sub_flows)
 96 |         if not router_set:
 97 |             print "No router links included in analysis, skipping"
 98 |             continue
 99 |         
100 |         print router_set
101 |         num_billing_slots = len(cluster_sub_flows)
102 |         print 'Calculating optimum traffic assignments for month %s with %d billing slots' %\
103 |             (month, num_billing_slots)
104 |         sanity_check(cluster_sub_flows, ifspeeds)
105 |         # Calculate the present-day traffic distribution per peering link
106 |         per_router_present_dist = from_ts_to_rtr_key(cluster_sub_flows, router_set)
107 |         cost_per_rtr = calculate_cost_of_traffic_assignment(per_router_present_dist,
108 |                                                             ifspeeds, num_billing_slots, k_val)
109 | 
110 |         print "Minimixing the %d-ile element" % k_val
111 |         
112 |         # Calculate demand per 5 minute interval
113 |         flows_per_ts_egress = {}
114 |         for ts in cluster_sub_flows:
115 | 
116 |             flows_per_ts_egress[ts] = sum(cluster_sub_flows[ts].values())
117 | 
118 |         total_cost = sum(cost_per_rtr.values())
119 |         print "Pre-optimization cost is:", total_cost
120 |         print "Per router pre-op cost is", cost_per_rtr
121 | 
122 |         if model_strategy == 'prev_work':
123 |             model = solve_optimization_previous_work(cluster_sub_flows, flows_per_ts_egress,
124 |                                         router_set,
125 |                                         total_cost, month, cluster_id, num_billing_slots,
126 |                                         k_val, ifspeeds, m_val, combined_flow_file_name,
127 |                                         previous_model=previous_model, strategy=model_strategy)
128 | 
129 |         else:
130 |             model = solve_optimization(cluster_sub_flows, flows_per_ts_egress,
131 |                                         router_set,
132 |                                         total_cost, month, cluster_id, num_billing_slots,
133 |                                         k_val, ifspeeds, m_val, combined_flow_file_name,
134 |                                         previous_model=previous_model, strategy=model_strategy,
135 |                                         scav_frac = scav_frac)
136 | 
137 |         print "Finished saving to disk", cluster_id, month
138 |         previous_model = model
139 | 


--------------------------------------------------------------------------------