├── Experiments.py
├── HelperFunctions.py
├── README.md
├── ResultsAnalysis.ipynb
├── SPOresultsAllVars.pkl
├── plots
    ├── AbsoluteLoss.png
    ├── RelativeLoss.png
    └── Runtime.png
└── requirements.txt


/Experiments.py:
--------------------------------------------------------------------------------
 1 | from HelperFunctions import *
 2 | import time
 3 | import pandas as pd
 4 | import pickle
 5 | 
 6 | def problem_size_experiment(params, noise, degree,sigma, iterations=30):
 7 |     
 8 |     ''' 
 9 |     Runs the direct and SGD solvers with given input parameters
10 |     
11 |     input: 
12 |         dict{str:list} params: dictionary of parameter values to experiment with. Must specify 'n', 'p', and 'grid_size'
13 |         float noise: multiplicative noise term applied to cost vector, sampled from uniform distribution in [1-noise, 1+noise]
14 |         int degree: polynomial degree of generated cost vector. When degree=1, expected value of c is linear in x. Degree > 1 controls the amount of model misspecification.
15 | 
16 |     returns: dict{str:list} with experimental results including: runtime, SPO loss, and SPO plus loss for both direct and SGD solvers
17 |     ''' 
18 |     
19 |     # Variable definitions
20 |     experimental_results = {}
21 |       
22 |     # For each parameter combo solve the problem instance and record results
23 |     for grid_dim in params['grid_dim']:
24 |         for p in params['p']:
25 |             for n in params['n']:
26 |                 # create sigma of length p
27 |                 sigma_arr = np.full(p,sigma)
28 |                 
29 |                 direct_runtimeparams = []
30 |                 SGD_runtimeparams = []
31 |                 
32 |                 SPO_loss_directparams = []
33 |                 SPO_loss_SGDparams = []
34 |                 
35 |                 SPO_plus_loss_directparams= []
36 |                 SPO_plus_loss_SGDparams = []
37 |                 # Create shortest path contraints
38 |                 A,b = CreateShortestPathConstraints(grid_dim)
39 |                 for i in range(iterations):
40 |                     print(n,p,grid_dim,i)
41 |                     # Generate the dataset
42 |                     X, C = generate_data(n, p, grid_dim, sigma_arr, noise, degree)
43 | 
44 |                     #print('for n =', n, 'p = ', p, 'grid_dim = ',grid_dim)
45 |                     # Run the direct solution and record the time
46 |                     start_direct = time.time()
47 |                     B_direct=DirectSolution(A,b, X, C)
48 |                     end_direct = time.time() - start_direct
49 |                     direct_runtimeparams.append(end_direct)
50 | 
51 |                     # Run the SGD solution and record the time
52 |                     start_sgd = time.time()
53 |                     B_SGD=GradientDescentSolution(A,b, X, C, batch_size=10,epsilon = 0.001) 
54 |                     end_sgd = time.time() - start_sgd
55 |                     SGD_runtimeparams.append(end_sgd)
56 | 
57 |                     # Record losses
58 |                     solver = ShortestPathSolver(A,b)
59 |                     SPO_loss_directparams.append(SPOLoss(solver, X, C, B_direct))
60 |                     SPO_loss_SGDparams.append(SPOLoss(solver, X, C, B_SGD))
61 |                     SPO_plus_loss_directparams.append(SPOplusLoss(solver, X, C, B_direct))
62 |                     SPO_plus_loss_SGDparams.append(SPOplusLoss(solver, X, C, B_SGD))
63 | 
64 |                 #store results from all iterations in dicts
65 |                 experimental_results[(n, p, grid_dim,'direct_runtime')] = direct_runtimeparams
66 |                 experimental_results[(n, p, grid_dim,'SGD_runtime')] = SGD_runtimeparams
67 |                 
68 |                 experimental_results[(n, p, grid_dim,'SPO_loss_direct')] = SPO_loss_directparams
69 |                 experimental_results[(n, p, grid_dim,'SPO_loss_SGD')] = SPO_loss_SGDparams
70 |                 
71 |                 experimental_results[(n, p, grid_dim,'SPO_plus_loss_direct')]= SPO_plus_loss_directparams
72 |                 experimental_results[(n, p, grid_dim,'SPO_plus_loss_SGD')] = SPO_plus_loss_SGDparams
73 |             checkpoint = pd.DataFrame(experimental_results).transpose()
74 |             checkpoint.index.names = ['n','p','grid_dim','metric']
75 |             pickle.dump(checkpoint,open('SPOresultsCheckpoint.pkl','wb'))
76 |     experimental_results = pd.DataFrame(experimental_results).transpose()
77 |     experimental_results.index.names = ['n','p','grid_dim','metric']
78 |     return experimental_results
79 | 
80 | 
81 | params = {"n": [100,200,300,400,500,600,700,800,900,1000], "p": [5,10,15,20], "grid_dim": [5]}
82 | noise = 0.25
83 | degree = 3
84 | sigma=0.2
85 | 
86 | experiment1 = problem_size_experiment(params, noise, degree,sigma,iterations=30)
87 | pickle.dump(experiment1, open('SPOresultsAllVars.pkl','wb'))
88 | 


--------------------------------------------------------------------------------
/HelperFunctions.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cvxpy as cp
  3 | 
  4 | 
  5 | def generate_data(n, p, grid_dim, sigma, noise, degree):
  6 |     '''
  7 |     Generate data for nxn grid
  8 | 
  9 |     Parameters:
 10 |         int n: number of data points to generate
 11 |         int p: number of features
 12 |         int grid_dim: Dimension of square grid, determines size of cost vector
 13 |         array sigma: array of length p, is the variance of each feature vector dimension, i.e. x_i ~ N(0, sigma_p)
 14 |         float noise: multiplicative noise term applied to cost vector, sampled from uniform distribution in [1-noise, 1+noise]
 15 |         int degree: polynomial degree of generated cost vector. When degree=1, expected value of c is linear in x. Degree > 1 controls the amount of model misspecification.
 16 | 
 17 |     Returns:
 18 |         np.array X: feature data of dimension [num_samples, p]
 19 |         np.array C: cost data of dimension [num_samples, d]
 20 |     '''
 21 |     # Define number of edges based on griworksd size, i.e. size of cost vector
 22 |     d = grid_dim * (grid_dim - 1) * 2
 23 | 
 24 |     # Define the parameters of the true model
 25 |     B_star = np.random.binomial(size = [d,p], n = 1, p = 0.5) # each entry of B is a bernoulli RV with prob = 0.5 entry is 1
 26 | 
 27 |     # Generate feature data: Generated from multivariate Gaussian distribution with i.i.d. standard normal entries --> x ~ N(0, sigma)
 28 |     X = np.random.normal(loc = 0, scale = sigma, size = [n, p]) # each row is a training point of size p
 29 |     
 30 |     # Generate cost data
 31 |     noise_vector = np.random.uniform(low = 1 - noise, high = 1 + noise, size = [n, d]) # i.i.d noise terms
 32 |     C = np.multiply( (( (1/np.sqrt(p) * B_star@X.T) + 3) ** degree + 1).T, noise_vector)
 33 |     
 34 |     return X, C
 35 | 
 36 | 
 37 | def CreateShortestPathConstraints(gridsize):
 38 |     '''
 39 |     Generate constraints for the nxn grid shortest path problem. 
 40 |     Each node in the grid has a constraint where the LHS is the inflows - outflows and the RHS is the desired flow.
 41 |     The desired flow is 0 for all nodes except for the start node where it's -1 and end node where it's 1
 42 |     
 43 |     Parameters:
 44 |         int gridsize: Size of each dimension in grid
 45 |         
 46 |     Returns:
 47 |         np.array A: Flow matrix of shape [num_nodes, num_edges]. Aij is -1 if the edge j is an outflow of node i and 1 if edge edge j is an inflow of node i
 48 |         np.array b: RHS of constraints [num_nodes]
 49 |     '''
 50 |     # define node and edge sizes
 51 |     num_nodes = gridsize**2
 52 |     num_directional_edges = (num_nodes - gridsize) # num vertical edges and num horizontal edges
 53 |     num_edges = num_directional_edges*2 # sum vertical and horizontal edges together
 54 |     
 55 |     # initialize empty A and B arrays
 56 |     A = np.zeros((num_nodes, num_edges), np.int8)
 57 |     b = np.zeros(num_nodes, np.int8)
 58 |     
 59 |     # fill in flow matrix
 60 |     # nodes are ordered by rows. ex. in a 3x3 grid the first rows nodes are indices 1,2,3 and second row is 4,5,6
 61 |     # horizontal edges are enumerated first and then vertical edges
 62 |     horizontaledgepointer = 0
 63 |     verticaledgepointer = 0
 64 |     for i in range(num_directional_edges):
 65 |         # update flow matrix for horizontal edges
 66 |         outnode = horizontaledgepointer
 67 |         innode = horizontaledgepointer + 1
 68 |         
 69 |         A[outnode, i] = -1
 70 |         A[innode, i] = 1
 71 |         horizontaledgepointer += 1
 72 |         if (horizontaledgepointer + 1)% gridsize == 0:# node is at right edge of the grid so  go to next row
 73 |             horizontaledgepointer += 1
 74 |         
 75 |         # update flow matrix for vertical edges
 76 |         outnode = verticaledgepointer
 77 |         innode = verticaledgepointer + gridsize
 78 |         A[outnode, num_directional_edges + i] = -1
 79 |         A[innode, num_directional_edges + i] = 1
 80 |         verticaledgepointer += gridsize
 81 |         if verticaledgepointer + gridsize >= num_nodes:# node is at bottom edge of the grid so go to next column
 82 |             verticaledgepointer = (verticaledgepointer % gridsize) + 1
 83 |         
 84 |     # update RHS for start and end nodes
 85 |     b[0] = -1
 86 |     b[-1] = 1 
 87 |     return A, b
 88 | 
 89 | 
 90 | class ShortestPathSolver:
 91 |     def __init__(self,A,b):
 92 |         '''
 93 |         Defines binary optimization problem to solve the shortest path problem with constraint matrix A and RHS b as numpy arrays
 94 |         
 95 |         Parameters:
 96 |             np.array A: constraint matrix A
 97 |             np.array b: RHS of constraints
 98 |         '''
 99 |         if A.shape[0] != b.size:
100 |             print('invalid input')
101 |             return
102 |         numedges = A.shape[1]
103 |         self.c = cp.Parameter(numedges)
104 |         self.w = cp.Variable(numedges, nonneg=True)
105 |         self.prob = cp.Problem(cp.Minimize(self.c@self.w), 
106 |                                [A @ self.w == b, self.w <= 1]) #add a trivial inequality constraint because necessary for GLPK_MI solver
107 |         
108 |     def solve(self,c):
109 |         '''
110 |         Solves the predefined optmiization problem with cost vector c and returns the decision variable array
111 |         
112 |         Parameters:
113 |             np.array c: cost vector 
114 |             np.array b: RHS of constraints
115 |         
116 |         Returns:
117 |             np.array of the solution to the shortest path problem
118 |         '''
119 |         self.c.project_and_assign(c)
120 |         self.prob.solve()
121 |         return self.w.value
122 |     
123 |     
124 |     
125 | def SPOLoss(solver, X, C, B):
126 |     '''
127 |     Computes the SPO (decision) loss
128 | 
129 |     Parameters:
130 |         ShortestPathSolver solver: a shortest path solver object
131 |         np.array X: Feature Matrix [num_samples, num_features]
132 |         np.array C: Cost Matrix [num_samples, num_edges]
133 |         np.array B: Weights for the linear model 
134 |         float reg_weight: the regularization weight
135 | 
136 |     Returns:
137 |         SPOLoss as a float 
138 |     '''
139 |     W=np.apply_along_axis(solver.solve,1,X@B.T)
140 |     W_star = np.apply_along_axis(solver.solve,1,C)
141 |     return (np.multiply(C,W).sum()-np.multiply(C,W_star).sum())/W.shape[0]
142 | 
143 | 
144 | 
145 | def SPOplusLoss(solver, X, C, B):
146 |     '''
147 |     Computes the SPO+ loss. This is an upper bound on the SPO loss.
148 | 
149 |       Parameters:
150 |         ShortestPathSolver solver: a shortest path solver object
151 |         np.array X: Feature Matrix [num_samples, num_features]
152 |         np.array C: Cost Matrix [num_samples, num_edges]
153 |         np.array B: Weights for the linear model 
154 |         float reg_weight: the regularization weight
155 | 
156 |     Returns:
157 |         SPOLoss as a float 
158 |     '''
159 |     pred2 = 2*(X@B.T)
160 |     W_support = np.apply_along_axis(solver.solve,1,pred2-C)
161 |     support = np.multiply(C-pred2,W_support).sum(axis=1)
162 |     W_star = np.apply_along_axis(solver.solve,1,C)
163 |     z_star = np.multiply(C,W_star).sum(axis=1)
164 |     return (support + np.multiply(pred2,W_star).sum(axis=1) - z_star).mean()
165 |     
166 |     
167 | 
168 | def DirectSolution(A, b, X, C, reg_weight=0.0):
169 |     '''
170 |     Computes the direct solution that minimizes the SPO+ loss given the hypothesis class of linear models B
171 |     
172 |     Parameters:
173 |         np.array A: Constraint matrix [num_nodes, num_edges]
174 |         np.array b: RHS of constraints [num_nodes]
175 |         np.array X: Feature Matrix [num_samples, num_features]
176 |         np.array C: Cost Matrix [num_samples, num_edges]
177 |         float reg_weight: the regularization weight
178 | 
179 |     Returns:
180 |         np.array B: coefficient matrix of fitted linear models [num_edges, num_features]
181 |     '''
182 |     num_samples = X.shape[0]
183 | 
184 |     #solve every shortest path problem
185 |     solver = ShortestPathSolver(A, b)
186 |     W = np.apply_along_axis(solver.solve, 1, C)#W has shape [num_samples, num_edges]
187 | 
188 |     #define linear program variables
189 |     B = cp.Variable( (A.shape[1], X.shape[1]) ) #B has shape [num_edges, num_features]
190 |     P = cp.Variable((num_samples, A.shape[0]), nonneg = True) #P has shape [num_samples, num_nodes]
191 |     
192 |     #define linear program objective and constraints
193 |     objective = ( (cp.sum(-P@b) + 2*cp.sum(cp.multiply(X@B.T,W)) - cp.sum(cp.multiply(W,C))) / num_samples)
194 |     if reg_weight > 0:
195 |         objective += reg_weight*cp.atoms.norm(B, 'fro')
196 |     prob = cp.Problem(cp.Minimize(objective), 
197 |                                    [(P@A) <= ((2*(X@B.T)) - C)])
198 |     #solve
199 |     prob.solve()
200 |     return B.value
201 | 
202 | 
203 | def GradientDescentSolution(A, b, X, C, batch_size=5, epsilon = 0.001, epsilonsample=5):
204 |     '''
205 |     Computes the direct solution that minimizes the SPO+ loss given the hypothesis class of linear models B
206 |     
207 |     Parameters:
208 |         np.array A: Constraint matrix [num_nodes, num_edges]
209 |         np.array b: RHS of constraints [num_nodes]
210 |         np.array X: Feature Matrix [num_samples, num_features]
211 |         np.array C: Cost Matrix [num_samples, num_edges]
212 |         integer batch_size: batch size  
213 |         float epsilon: the threshold used for the algorithms stopping condition
214 |         int epsilonsample: the number of steps that the results are averaged over to be compared to the epsilon for the stopping condition
215 | 
216 |     Returns:
217 |         np.array B: coefficient matrix of fitted linear models [num_edges, num_features]
218 |     '''
219 |     
220 |     loop = True 
221 |     step=0 # keeps track of the iteration number in gradient descent
222 |     epsilons=[] # keeps track of the last "epsilonsample" number
223 | 
224 |     #solve every shortest path problem
225 |     solver = ShortestPathSolver(A,b)
226 |     W_c = np.apply_along_axis(solver.solve,1,C) #W has shape [num_samples, num_edges]
227 |     B = np.zeros((A.shape[1],X.shape[1])) #B has shape [num_edges, num_features]
228 |     
229 |     while loop:      
230 |         # get a random sample of indices of size batch_size
231 |         batch_indices = np.random.randint(0,len(X),batch_size)
232 |         X_sample = X[batch_indices]
233 |         C_sample = C[batch_indices]
234 |         W_c_sample = W_c[batch_indices]
235 |         
236 |         # solve for the gradient of the unregularized objective function
237 |         objectives = (2*(X_sample@B.T)) - C_sample
238 |         W_batch=np.apply_along_axis(solver.solve,1,objectives)
239 |         G_batch = (W_c_sample-W_batch).T@X_sample # might not be the same as mean
240 |                 
241 |         # calculate the gradient step  
242 |         grad = G_batch/batch_size
243 |         learning_rate = 1/(step+1)**(1/2)
244 |         grad_step = learning_rate*grad
245 |             
246 |         # calculate new weights
247 |         B_new = B - grad_step
248 |         
249 |         # stopping condition
250 |         if len(epsilons)==epsilonsample:
251 |             epsilons.pop(0)
252 |         epsilons.append(np.mean(np.abs(B@X.T - B_new@X.T)))
253 |         if np.mean(epsilons) < epsilon: 
254 |             loop = False 
255 |             print(f'Converged after {step} steps')
256 | 
257 |         # update weights 
258 |         B = B_new
259 |         step += 1
260 |             
261 |     return B 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # smart_predict_optimize
 2 | 
 3 | smart_predict_optimize is a Python implementation of the "Smart, Predict then Optimize" (Elmachtoub and Grigas, 2021) framework that aims to compare the computational efficiency and performance of optimizing a linear SPO model with a Linear Program versus Stochastic Gradient Descent.
 4 | 
 5 | ## Package installation
 6 | 
 7 | All packages that are used can be found in requirements.txt. To install all packages, run the following:
 8 | ```bash
 9 | pip install requirements.txt
10 | ```
11 | 
12 | ## Code Overview
13 | 
14 | ### HelperFunctions.py
15 | Contains the majority of the code. Contains functions and classes to:
16 | - Generate synthetic data and formulate it into a Shortest Path problem.
17 | - Compute SPO and SPO+ loss.
18 | - Solve a shortest path problem
19 | - Fit a linear model to predict the parameters of a shortest path problem with the SPO+ loss function via a Linear Program
20 | - Fit a linear model to predict the parameters of a shortest path problem with the SPO+ loss function via Gradient Descent
21 | 
22 | ### Experiments.py
23 | Generates data and runs experiments using functions and classes from HelperFunction.py. Generates a .pkl file (SPOresultsAllVars.pkl) of the experiment output. 
24 | 
25 | ### ResultsAnalysis.ipynb 
26 | Visualizes the results of the experiments generated from Experiments.py. Reads SPOresultsAllVars.pkl and writes output plots to the "plots" directory. 
27 | 
28 | ## Usage
29 | 
30 | To reproduce the plots, follow the steps below: 
31 | 1. Generate experiment data. Note that running this code will take a significant amount of time (20+ hours) on standard consumer hardware. 
32 | 
33 | ```bash
34 | python Experiments.py 
35 | ```
36 | 2. Generate plots by running each cell in ResultsAnalysis.ipynb. 
37 | 
38 | Alternatively plots can be recreated by using the SPOresultsAllVars.pkl file that is in this repository.
39 | 
40 | ## Project Status 
41 | 
42 | This project is completed and not actively being worked on. 
43 | 


--------------------------------------------------------------------------------
/SPOresultsAllVars.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelzcjia/smart_predict_optimize/6f38759c3162efb36412d8f9a616976c8f27c613/SPOresultsAllVars.pkl


--------------------------------------------------------------------------------
/plots/AbsoluteLoss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelzcjia/smart_predict_optimize/6f38759c3162efb36412d8f9a616976c8f27c613/plots/AbsoluteLoss.png


--------------------------------------------------------------------------------
/plots/RelativeLoss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelzcjia/smart_predict_optimize/6f38759c3162efb36412d8f9a616976c8f27c613/plots/RelativeLoss.png


--------------------------------------------------------------------------------
/plots/Runtime.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelzcjia/smart_predict_optimize/6f38759c3162efb36412d8f9a616976c8f27c613/plots/Runtime.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.20.2
2 | cvxopt==1.2.6
3 | cvxpy==1.1.11
4 | matplotlib==3.4.1
5 | pandas==1.1.4


--------------------------------------------------------------------------------