├── Experiments.py ├── HelperFunctions.py ├── README.md ├── ResultsAnalysis.ipynb ├── SPOresultsAllVars.pkl ├── plots ├── AbsoluteLoss.png ├── RelativeLoss.png └── Runtime.png └── requirements.txt /Experiments.py: -------------------------------------------------------------------------------- 1 | from HelperFunctions import * 2 | import time 3 | import pandas as pd 4 | import pickle 5 | 6 | def problem_size_experiment(params, noise, degree,sigma, iterations=30): 7 | 8 | ''' 9 | Runs the direct and SGD solvers with given input parameters 10 | 11 | input: 12 | dict{str:list} params: dictionary of parameter values to experiment with. Must specify 'n', 'p', and 'grid_size' 13 | float noise: multiplicative noise term applied to cost vector, sampled from uniform distribution in [1-noise, 1+noise] 14 | int degree: polynomial degree of generated cost vector. When degree=1, expected value of c is linear in x. Degree > 1 controls the amount of model misspecification. 15 | 16 | returns: dict{str:list} with experimental results including: runtime, SPO loss, and SPO plus loss for both direct and SGD solvers 17 | ''' 18 | 19 | # Variable definitions 20 | experimental_results = {} 21 | 22 | # For each parameter combo solve the problem instance and record results 23 | for grid_dim in params['grid_dim']: 24 | for p in params['p']: 25 | for n in params['n']: 26 | # create sigma of length p 27 | sigma_arr = np.full(p,sigma) 28 | 29 | direct_runtimeparams = [] 30 | SGD_runtimeparams = [] 31 | 32 | SPO_loss_directparams = [] 33 | SPO_loss_SGDparams = [] 34 | 35 | SPO_plus_loss_directparams= [] 36 | SPO_plus_loss_SGDparams = [] 37 | # Create shortest path contraints 38 | A,b = CreateShortestPathConstraints(grid_dim) 39 | for i in range(iterations): 40 | print(n,p,grid_dim,i) 41 | # Generate the dataset 42 | X, C = generate_data(n, p, grid_dim, sigma_arr, noise, degree) 43 | 44 | #print('for n =', n, 'p = ', p, 'grid_dim = ',grid_dim) 45 | # Run the direct solution and record the time 46 | start_direct = time.time() 47 | B_direct=DirectSolution(A,b, X, C) 48 | end_direct = time.time() - start_direct 49 | direct_runtimeparams.append(end_direct) 50 | 51 | # Run the SGD solution and record the time 52 | start_sgd = time.time() 53 | B_SGD=GradientDescentSolution(A,b, X, C, batch_size=10,epsilon = 0.001) 54 | end_sgd = time.time() - start_sgd 55 | SGD_runtimeparams.append(end_sgd) 56 | 57 | # Record losses 58 | solver = ShortestPathSolver(A,b) 59 | SPO_loss_directparams.append(SPOLoss(solver, X, C, B_direct)) 60 | SPO_loss_SGDparams.append(SPOLoss(solver, X, C, B_SGD)) 61 | SPO_plus_loss_directparams.append(SPOplusLoss(solver, X, C, B_direct)) 62 | SPO_plus_loss_SGDparams.append(SPOplusLoss(solver, X, C, B_SGD)) 63 | 64 | #store results from all iterations in dicts 65 | experimental_results[(n, p, grid_dim,'direct_runtime')] = direct_runtimeparams 66 | experimental_results[(n, p, grid_dim,'SGD_runtime')] = SGD_runtimeparams 67 | 68 | experimental_results[(n, p, grid_dim,'SPO_loss_direct')] = SPO_loss_directparams 69 | experimental_results[(n, p, grid_dim,'SPO_loss_SGD')] = SPO_loss_SGDparams 70 | 71 | experimental_results[(n, p, grid_dim,'SPO_plus_loss_direct')]= SPO_plus_loss_directparams 72 | experimental_results[(n, p, grid_dim,'SPO_plus_loss_SGD')] = SPO_plus_loss_SGDparams 73 | checkpoint = pd.DataFrame(experimental_results).transpose() 74 | checkpoint.index.names = ['n','p','grid_dim','metric'] 75 | pickle.dump(checkpoint,open('SPOresultsCheckpoint.pkl','wb')) 76 | experimental_results = pd.DataFrame(experimental_results).transpose() 77 | experimental_results.index.names = ['n','p','grid_dim','metric'] 78 | return experimental_results 79 | 80 | 81 | params = {"n": [100,200,300,400,500,600,700,800,900,1000], "p": [5,10,15,20], "grid_dim": [5]} 82 | noise = 0.25 83 | degree = 3 84 | sigma=0.2 85 | 86 | experiment1 = problem_size_experiment(params, noise, degree,sigma,iterations=30) 87 | pickle.dump(experiment1, open('SPOresultsAllVars.pkl','wb')) 88 | -------------------------------------------------------------------------------- /HelperFunctions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cvxpy as cp 3 | 4 | 5 | def generate_data(n, p, grid_dim, sigma, noise, degree): 6 | ''' 7 | Generate data for nxn grid 8 | 9 | Parameters: 10 | int n: number of data points to generate 11 | int p: number of features 12 | int grid_dim: Dimension of square grid, determines size of cost vector 13 | array sigma: array of length p, is the variance of each feature vector dimension, i.e. x_i ~ N(0, sigma_p) 14 | float noise: multiplicative noise term applied to cost vector, sampled from uniform distribution in [1-noise, 1+noise] 15 | int degree: polynomial degree of generated cost vector. When degree=1, expected value of c is linear in x. Degree > 1 controls the amount of model misspecification. 16 | 17 | Returns: 18 | np.array X: feature data of dimension [num_samples, p] 19 | np.array C: cost data of dimension [num_samples, d] 20 | ''' 21 | # Define number of edges based on griworksd size, i.e. size of cost vector 22 | d = grid_dim * (grid_dim - 1) * 2 23 | 24 | # Define the parameters of the true model 25 | B_star = np.random.binomial(size = [d,p], n = 1, p = 0.5) # each entry of B is a bernoulli RV with prob = 0.5 entry is 1 26 | 27 | # Generate feature data: Generated from multivariate Gaussian distribution with i.i.d. standard normal entries --> x ~ N(0, sigma) 28 | X = np.random.normal(loc = 0, scale = sigma, size = [n, p]) # each row is a training point of size p 29 | 30 | # Generate cost data 31 | noise_vector = np.random.uniform(low = 1 - noise, high = 1 + noise, size = [n, d]) # i.i.d noise terms 32 | C = np.multiply( (( (1/np.sqrt(p) * B_star@X.T) + 3) ** degree + 1).T, noise_vector) 33 | 34 | return X, C 35 | 36 | 37 | def CreateShortestPathConstraints(gridsize): 38 | ''' 39 | Generate constraints for the nxn grid shortest path problem. 40 | Each node in the grid has a constraint where the LHS is the inflows - outflows and the RHS is the desired flow. 41 | The desired flow is 0 for all nodes except for the start node where it's -1 and end node where it's 1 42 | 43 | Parameters: 44 | int gridsize: Size of each dimension in grid 45 | 46 | Returns: 47 | np.array A: Flow matrix of shape [num_nodes, num_edges]. Aij is -1 if the edge j is an outflow of node i and 1 if edge edge j is an inflow of node i 48 | np.array b: RHS of constraints [num_nodes] 49 | ''' 50 | # define node and edge sizes 51 | num_nodes = gridsize**2 52 | num_directional_edges = (num_nodes - gridsize) # num vertical edges and num horizontal edges 53 | num_edges = num_directional_edges*2 # sum vertical and horizontal edges together 54 | 55 | # initialize empty A and B arrays 56 | A = np.zeros((num_nodes, num_edges), np.int8) 57 | b = np.zeros(num_nodes, np.int8) 58 | 59 | # fill in flow matrix 60 | # nodes are ordered by rows. ex. in a 3x3 grid the first rows nodes are indices 1,2,3 and second row is 4,5,6 61 | # horizontal edges are enumerated first and then vertical edges 62 | horizontaledgepointer = 0 63 | verticaledgepointer = 0 64 | for i in range(num_directional_edges): 65 | # update flow matrix for horizontal edges 66 | outnode = horizontaledgepointer 67 | innode = horizontaledgepointer + 1 68 | 69 | A[outnode, i] = -1 70 | A[innode, i] = 1 71 | horizontaledgepointer += 1 72 | if (horizontaledgepointer + 1)% gridsize == 0:# node is at right edge of the grid so go to next row 73 | horizontaledgepointer += 1 74 | 75 | # update flow matrix for vertical edges 76 | outnode = verticaledgepointer 77 | innode = verticaledgepointer + gridsize 78 | A[outnode, num_directional_edges + i] = -1 79 | A[innode, num_directional_edges + i] = 1 80 | verticaledgepointer += gridsize 81 | if verticaledgepointer + gridsize >= num_nodes:# node is at bottom edge of the grid so go to next column 82 | verticaledgepointer = (verticaledgepointer % gridsize) + 1 83 | 84 | # update RHS for start and end nodes 85 | b[0] = -1 86 | b[-1] = 1 87 | return A, b 88 | 89 | 90 | class ShortestPathSolver: 91 | def __init__(self,A,b): 92 | ''' 93 | Defines binary optimization problem to solve the shortest path problem with constraint matrix A and RHS b as numpy arrays 94 | 95 | Parameters: 96 | np.array A: constraint matrix A 97 | np.array b: RHS of constraints 98 | ''' 99 | if A.shape[0] != b.size: 100 | print('invalid input') 101 | return 102 | numedges = A.shape[1] 103 | self.c = cp.Parameter(numedges) 104 | self.w = cp.Variable(numedges, nonneg=True) 105 | self.prob = cp.Problem(cp.Minimize(self.c@self.w), 106 | [A @ self.w == b, self.w <= 1]) #add a trivial inequality constraint because necessary for GLPK_MI solver 107 | 108 | def solve(self,c): 109 | ''' 110 | Solves the predefined optmiization problem with cost vector c and returns the decision variable array 111 | 112 | Parameters: 113 | np.array c: cost vector 114 | np.array b: RHS of constraints 115 | 116 | Returns: 117 | np.array of the solution to the shortest path problem 118 | ''' 119 | self.c.project_and_assign(c) 120 | self.prob.solve() 121 | return self.w.value 122 | 123 | 124 | 125 | def SPOLoss(solver, X, C, B): 126 | ''' 127 | Computes the SPO (decision) loss 128 | 129 | Parameters: 130 | ShortestPathSolver solver: a shortest path solver object 131 | np.array X: Feature Matrix [num_samples, num_features] 132 | np.array C: Cost Matrix [num_samples, num_edges] 133 | np.array B: Weights for the linear model 134 | float reg_weight: the regularization weight 135 | 136 | Returns: 137 | SPOLoss as a float 138 | ''' 139 | W=np.apply_along_axis(solver.solve,1,X@B.T) 140 | W_star = np.apply_along_axis(solver.solve,1,C) 141 | return (np.multiply(C,W).sum()-np.multiply(C,W_star).sum())/W.shape[0] 142 | 143 | 144 | 145 | def SPOplusLoss(solver, X, C, B): 146 | ''' 147 | Computes the SPO+ loss. This is an upper bound on the SPO loss. 148 | 149 | Parameters: 150 | ShortestPathSolver solver: a shortest path solver object 151 | np.array X: Feature Matrix [num_samples, num_features] 152 | np.array C: Cost Matrix [num_samples, num_edges] 153 | np.array B: Weights for the linear model 154 | float reg_weight: the regularization weight 155 | 156 | Returns: 157 | SPOLoss as a float 158 | ''' 159 | pred2 = 2*(X@B.T) 160 | W_support = np.apply_along_axis(solver.solve,1,pred2-C) 161 | support = np.multiply(C-pred2,W_support).sum(axis=1) 162 | W_star = np.apply_along_axis(solver.solve,1,C) 163 | z_star = np.multiply(C,W_star).sum(axis=1) 164 | return (support + np.multiply(pred2,W_star).sum(axis=1) - z_star).mean() 165 | 166 | 167 | 168 | def DirectSolution(A, b, X, C, reg_weight=0.0): 169 | ''' 170 | Computes the direct solution that minimizes the SPO+ loss given the hypothesis class of linear models B 171 | 172 | Parameters: 173 | np.array A: Constraint matrix [num_nodes, num_edges] 174 | np.array b: RHS of constraints [num_nodes] 175 | np.array X: Feature Matrix [num_samples, num_features] 176 | np.array C: Cost Matrix [num_samples, num_edges] 177 | float reg_weight: the regularization weight 178 | 179 | Returns: 180 | np.array B: coefficient matrix of fitted linear models [num_edges, num_features] 181 | ''' 182 | num_samples = X.shape[0] 183 | 184 | #solve every shortest path problem 185 | solver = ShortestPathSolver(A, b) 186 | W = np.apply_along_axis(solver.solve, 1, C)#W has shape [num_samples, num_edges] 187 | 188 | #define linear program variables 189 | B = cp.Variable( (A.shape[1], X.shape[1]) ) #B has shape [num_edges, num_features] 190 | P = cp.Variable((num_samples, A.shape[0]), nonneg = True) #P has shape [num_samples, num_nodes] 191 | 192 | #define linear program objective and constraints 193 | objective = ( (cp.sum(-P@b) + 2*cp.sum(cp.multiply(X@B.T,W)) - cp.sum(cp.multiply(W,C))) / num_samples) 194 | if reg_weight > 0: 195 | objective += reg_weight*cp.atoms.norm(B, 'fro') 196 | prob = cp.Problem(cp.Minimize(objective), 197 | [(P@A) <= ((2*(X@B.T)) - C)]) 198 | #solve 199 | prob.solve() 200 | return B.value 201 | 202 | 203 | def GradientDescentSolution(A, b, X, C, batch_size=5, epsilon = 0.001, epsilonsample=5): 204 | ''' 205 | Computes the direct solution that minimizes the SPO+ loss given the hypothesis class of linear models B 206 | 207 | Parameters: 208 | np.array A: Constraint matrix [num_nodes, num_edges] 209 | np.array b: RHS of constraints [num_nodes] 210 | np.array X: Feature Matrix [num_samples, num_features] 211 | np.array C: Cost Matrix [num_samples, num_edges] 212 | integer batch_size: batch size 213 | float epsilon: the threshold used for the algorithms stopping condition 214 | int epsilonsample: the number of steps that the results are averaged over to be compared to the epsilon for the stopping condition 215 | 216 | Returns: 217 | np.array B: coefficient matrix of fitted linear models [num_edges, num_features] 218 | ''' 219 | 220 | loop = True 221 | step=0 # keeps track of the iteration number in gradient descent 222 | epsilons=[] # keeps track of the last "epsilonsample" number 223 | 224 | #solve every shortest path problem 225 | solver = ShortestPathSolver(A,b) 226 | W_c = np.apply_along_axis(solver.solve,1,C) #W has shape [num_samples, num_edges] 227 | B = np.zeros((A.shape[1],X.shape[1])) #B has shape [num_edges, num_features] 228 | 229 | while loop: 230 | # get a random sample of indices of size batch_size 231 | batch_indices = np.random.randint(0,len(X),batch_size) 232 | X_sample = X[batch_indices] 233 | C_sample = C[batch_indices] 234 | W_c_sample = W_c[batch_indices] 235 | 236 | # solve for the gradient of the unregularized objective function 237 | objectives = (2*(X_sample@B.T)) - C_sample 238 | W_batch=np.apply_along_axis(solver.solve,1,objectives) 239 | G_batch = (W_c_sample-W_batch).T@X_sample # might not be the same as mean 240 | 241 | # calculate the gradient step 242 | grad = G_batch/batch_size 243 | learning_rate = 1/(step+1)**(1/2) 244 | grad_step = learning_rate*grad 245 | 246 | # calculate new weights 247 | B_new = B - grad_step 248 | 249 | # stopping condition 250 | if len(epsilons)==epsilonsample: 251 | epsilons.pop(0) 252 | epsilons.append(np.mean(np.abs(B@X.T - B_new@X.T))) 253 | if np.mean(epsilons) < epsilon: 254 | loop = False 255 | print(f'Converged after {step} steps') 256 | 257 | # update weights 258 | B = B_new 259 | step += 1 260 | 261 | return B -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # smart_predict_optimize 2 | 3 | smart_predict_optimize is a Python implementation of the "Smart, Predict then Optimize" (Elmachtoub and Grigas, 2021) framework that aims to compare the computational efficiency and performance of optimizing a linear SPO model with a Linear Program versus Stochastic Gradient Descent. 4 | 5 | ## Package installation 6 | 7 | All packages that are used can be found in requirements.txt. To install all packages, run the following: 8 | ```bash 9 | pip install requirements.txt 10 | ``` 11 | 12 | ## Code Overview 13 | 14 | ### HelperFunctions.py 15 | Contains the majority of the code. Contains functions and classes to: 16 | - Generate synthetic data and formulate it into a Shortest Path problem. 17 | - Compute SPO and SPO+ loss. 18 | - Solve a shortest path problem 19 | - Fit a linear model to predict the parameters of a shortest path problem with the SPO+ loss function via a Linear Program 20 | - Fit a linear model to predict the parameters of a shortest path problem with the SPO+ loss function via Gradient Descent 21 | 22 | ### Experiments.py 23 | Generates data and runs experiments using functions and classes from HelperFunction.py. Generates a .pkl file (SPOresultsAllVars.pkl) of the experiment output. 24 | 25 | ### ResultsAnalysis.ipynb 26 | Visualizes the results of the experiments generated from Experiments.py. Reads SPOresultsAllVars.pkl and writes output plots to the "plots" directory. 27 | 28 | ## Usage 29 | 30 | To reproduce the plots, follow the steps below: 31 | 1. Generate experiment data. Note that running this code will take a significant amount of time (20+ hours) on standard consumer hardware. 32 | 33 | ```bash 34 | python Experiments.py 35 | ``` 36 | 2. Generate plots by running each cell in ResultsAnalysis.ipynb. 37 | 38 | Alternatively plots can be recreated by using the SPOresultsAllVars.pkl file that is in this repository. 39 | 40 | ## Project Status 41 | 42 | This project is completed and not actively being worked on. 43 | -------------------------------------------------------------------------------- /SPOresultsAllVars.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michaelzcjia/smart_predict_optimize/6f38759c3162efb36412d8f9a616976c8f27c613/SPOresultsAllVars.pkl -------------------------------------------------------------------------------- /plots/AbsoluteLoss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michaelzcjia/smart_predict_optimize/6f38759c3162efb36412d8f9a616976c8f27c613/plots/AbsoluteLoss.png -------------------------------------------------------------------------------- /plots/RelativeLoss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michaelzcjia/smart_predict_optimize/6f38759c3162efb36412d8f9a616976c8f27c613/plots/RelativeLoss.png -------------------------------------------------------------------------------- /plots/Runtime.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michaelzcjia/smart_predict_optimize/6f38759c3162efb36412d8f9a616976c8f27c613/plots/Runtime.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.20.2 2 | cvxopt==1.2.6 3 | cvxpy==1.1.11 4 | matplotlib==3.4.1 5 | pandas==1.1.4 --------------------------------------------------------------------------------