├── .gitignore ├── Energy ├── DPO │ ├── fenchel_young.py │ └── perturbations.py ├── SchedulingInstances │ ├── load1 │ │ └── day01.txt │ ├── load2 │ │ └── day01.txt │ └── load3 │ │ └── day01.txt ├── Trainer │ ├── CacheLosses.py │ ├── ICON_solving.py │ ├── PO_models.py │ ├── comb_solver.py │ ├── data_utils.py │ ├── diff_layer.py │ ├── get_energy.py │ ├── prices2013.dat │ └── utils.py ├── config.json ├── imle │ ├── __init__.py │ ├── noise.py │ ├── target.py │ └── wrapper.py ├── intopt │ ├── intopt.py │ ├── presolve.py │ ├── requirements.txt │ ├── solveLP.py │ └── util.py ├── readme.md └── testenergy.py ├── HyperparamConfiguration.pdf ├── Knapsack ├── DPO │ ├── fenchel_young.py │ └── perturbations.py ├── Trainer │ ├── CacheLosses.py │ ├── Data.npz │ ├── PO_models.py │ ├── comb_solver.py │ ├── data_utils.py │ ├── diff_layer.py │ └── utils.py ├── config.json ├── imle │ ├── __init__.py │ ├── noise.py │ ├── target.py │ └── wrapper.py ├── intopt │ ├── intopt.py │ ├── presolve.py │ ├── solveLP.py │ └── util.py ├── readme.md └── testknapsack.py ├── LICENSE ├── Matching ├── DPO │ ├── fenchel_young.py │ └── perturbations.py ├── Trainer │ ├── CacheLosses.py │ ├── NNModels.py │ ├── PO_models.py │ ├── bipartite.py │ ├── data_utils.py │ ├── diff_layer.py │ └── utils.py ├── config.json ├── get_data.sh ├── imle │ ├── __init__.py │ ├── noise.py │ ├── target.py │ └── wrapper.py ├── intopt │ ├── intopt.py │ ├── presolve.py │ ├── solveLP.py │ └── util.py ├── readme.md └── test_matching.py ├── Portfolio ├── DPO │ ├── fenchel_young.py │ └── perturbations.py ├── Trainer │ ├── CacheLosses.py │ ├── PO_modelsSP.py │ ├── data_utils.py │ ├── diff_layer.py │ ├── optimizer_module.py │ └── utils.py ├── config.json ├── imle │ ├── __init__.py │ ├── noise.py │ ├── target.py │ └── wrapper.py ├── readme.md └── test_sp.py ├── README.md ├── ShortestPath ├── DPO │ ├── fenchel_young.py │ └── perturbations.py ├── Trainer │ ├── CacheLosses.py │ ├── PO_modelsSP.py │ ├── data_utils.py │ ├── diff_layer.py │ ├── optimizer_module.py │ └── utils.py ├── config.json ├── imle │ ├── __init__.py │ ├── noise.py │ ├── target.py │ └── wrapper.py ├── intopt │ ├── intopt.py │ ├── presolve.py │ ├── solveLP.py │ └── util.py ├── readme.md └── test_sp.py ├── environment.yml ├── requirements.txt └── warcraft ├── DPO ├── fenchel_young.py └── perturbations.py ├── TestWarcraft.py ├── Trainer ├── Trainer.py ├── computervisionmodels.py ├── data_utils.py ├── diff_layer.py ├── metric.py └── utils.py ├── comb_modules ├── dijkstra.py ├── gurobi_dijkstra.py ├── losses.py └── utils.py ├── config.json ├── data └── data_prep.sh ├── imle ├── __init__.py ├── noise.py ├── target.py └── wrapper.py ├── intopt ├── intopt.py ├── presolve.py ├── solveLP.py └── util.py └── readme.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /Energy/DPO/fenchel_young.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # 3 | # Modifications from original work 4 | # 29-03-2021 (tuero@ualberta.ca) : Convert Tensorflow code to PyTorch 5 | # 6 | # Copyright 2021 The Google Research Authors. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | 20 | # Lint as: python3 21 | """Implementation of a Fenchel-Young loss using perturbation techniques.""" 22 | 23 | import torch 24 | import torch.nn as nn 25 | 26 | from DPO import perturbations 27 | 28 | 29 | class PerturbedFunc(torch.autograd.Function): 30 | """Implementation of a Fenchel Young loss.""" 31 | @staticmethod 32 | def forward(ctx, input_tensor, y_true, perturbed, batched, maximize, *args): 33 | diff = perturbed(input_tensor, *args) - y_true.type(input_tensor.dtype) 34 | if not maximize: 35 | diff = -diff 36 | # Computes per-example loss for batched inputs. 37 | if batched: 38 | loss = torch.sum(torch.reshape(diff, [list(diff.shape)[0], -1]) ** 2, dim=-1) 39 | else: # Computes loss for unbatched inputs. 40 | loss = torch.sum(diff ** 2) 41 | ctx.save_for_backward(diff) 42 | ctx.batched = batched 43 | return loss 44 | 45 | @staticmethod 46 | def backward(ctx, dy): 47 | diff, = ctx.saved_tensors 48 | batched = ctx.batched 49 | if batched: # dy has shape (batch_size,) in this case. 50 | dy = torch.reshape(dy, [list(dy.shape)[0]] + (diff.dim() - 1) * [1]) 51 | return dy * diff, None, None, None, None # original 52 | # return diff, None, None, None, None 53 | 54 | 55 | class FenchelYoungLoss(nn.Module): 56 | def __init__(self, 57 | func = None, 58 | num_samples = 1000, 59 | sigma = 0.01, 60 | noise = perturbations._GUMBEL, 61 | batched = True, 62 | maximize = True, 63 | device=None): 64 | """Initializes the Fenchel-Young loss. 65 | 66 | Args: 67 | func: the function whose argmax is to be differentiated by perturbation. 68 | num_samples: (int) the number of perturbed inputs. 69 | sigma: (float) the amount of noise to be considered 70 | noise: (str) the noise distribution to be used to sample perturbations. 71 | batched: whether inputs to the func will have a leading batch dimension 72 | (True) or consist of a single example (False). Defaults to True. 73 | maximize: (bool) whether to maximize or to minimize the input function. 74 | device: The device to create tensors on (cpu/gpu). If None given, it will 75 | default to gpu:0 if available, cpu otherwise. 76 | """ 77 | super().__init__() 78 | self._batched = batched 79 | self._maximize = maximize 80 | self.func = func 81 | self.perturbed = perturbations.perturbed(func=func, 82 | num_samples=num_samples, 83 | sigma=sigma, 84 | noise=noise, 85 | batched=batched, 86 | device=device) 87 | 88 | def forward(self, input_tensor, y_true, *args): 89 | return PerturbedFunc.apply(input_tensor, y_true, self.perturbed, self._batched, self._maximize, *args) 90 | 91 | -------------------------------------------------------------------------------- /Energy/SchedulingInstances/load1/day01.txt: -------------------------------------------------------------------------------- 1 | 30 2 | 1 3 | 3 4 | 0 190 0.1 0.0 5 | 5481 6 | 1 170 0.1 0.1 7 | 5583 8 | 2 130 0.0 0.1 9 | 5547 10 | 10 11 | 0 10 1 25 471.40 12 | 2610 13 | 1 23 8 47 426.43 14 | 2361 15 | 2 25 20 47 389.04 16 | 2154 17 | 3 7 4 42 200.66 18 | 1111 19 | 4 19 19 47 104.39 20 | 578 21 | 5 19 19 48 744.67 22 | 4123 23 | 6 11 1 14 468.51 24 | 2594 25 | 7 30 5 48 292.05 26 | 1617 27 | 8 13 4 22 505.36 28 | 2798 29 | 9 10 2 37 227.03 30 | 1257 31 | -------------------------------------------------------------------------------- /Energy/SchedulingInstances/load2/day01.txt: -------------------------------------------------------------------------------- 1 | 30 2 | 1 3 | 3 4 | 0 120 0.1 0.1 5 | 5433 6 | 1 130 0.0 0.1 7 | 5639 8 | 2 130 0.0 0.1 9 | 5595 10 | 15 11 | 0 8 4 48 64.10 12 | 356 13 | 1 6 7 48 207.59 14 | 1153 15 | 2 13 14 47 475.85 16 | 2643 17 | 3 10 16 45 191.57 18 | 1064 19 | 4 25 14 47 18.36 20 | 102 21 | 5 3 3 26 34.21 22 | 190 23 | 6 26 0 33 3.78 24 | 21 25 | 7 13 2 21 390.33 26 | 2168 27 | 8 9 12 48 8.64 28 | 48 29 | 9 7 7 16 185.26 30 | 1029 31 | 10 26 7 47 393.75 32 | 2187 33 | 11 3 0 31 240.36 34 | 1335 35 | 12 4 2 44 606.56 36 | 3369 37 | 13 26 14 47 737.09 38 | 4094 39 | 14 26 7 47 200.03 40 | 1111 41 | -------------------------------------------------------------------------------- /Energy/SchedulingInstances/load3/day01.txt: -------------------------------------------------------------------------------- 1 | 30 2 | 1 3 | 5 4 | 0 150 0.1 0.1 5 | 5644 6 | 1 190 0.1 0.0 7 | 5433 8 | 2 170 0.1 0.1 9 | 5533 10 | 3 120 0.1 0.1 11 | 5429 12 | 4 130 0.0 0.1 13 | 5530 14 | 20 15 | 0 22 22 48 507.56 16 | 2798 17 | 1 2 0 17 495.95 18 | 2734 19 | 2 17 2 30 336.32 20 | 1854 21 | 3 25 10 47 523.70 22 | 2887 23 | 4 23 10 46 474.54 24 | 2616 25 | 5 19 27 47 85.08 26 | 469 27 | 6 1 0 5 303.12 28 | 1671 29 | 7 19 0 33 381.48 30 | 2103 31 | 8 9 38 48 377.86 32 | 2083 33 | 9 22 12 47 392.73 34 | 2165 35 | 10 1 1 47 109.93 36 | 606 37 | 11 20 0 34 208.79 38 | 1151 39 | 12 25 2 43 74.74 40 | 412 41 | 13 1 0 23 323.98 42 | 1786 43 | 14 11 6 21 230.92 44 | 1273 45 | 15 11 4 47 211.69 46 | 1167 47 | 16 45 0 47 401.80 48 | 2215 49 | 17 8 15 48 78.36 50 | 432 51 | 18 5 30 47 388.38 52 | 2141 53 | 19 18 1 48 224.03 54 | 1235 55 | -------------------------------------------------------------------------------- /Energy/Trainer/CacheLosses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import torch.nn.functional as F 4 | 5 | 6 | 7 | 8 | ###################################### NCE Loss Functions ######################################### 9 | class NCE(torch.nn.Module): 10 | def __init__(self, minimize= True): 11 | super().__init__() 12 | self.mm = 1 if minimize else -1 13 | def forward(self, y_hat,y_true, sol_true,cache): 14 | 15 | loss = 0 16 | mm = self.mm 17 | 18 | 19 | for ii in range(len( y_hat )): 20 | loss += ( ( mm* ( sol_true[ii] - cache )*y_hat[ii] ).sum(dim=(1)) ).mean() 21 | loss /= len(y_hat) 22 | return loss 23 | 24 | class NCE_c(torch.nn.Module): 25 | def __init__(self, minimize= True): 26 | super().__init__() 27 | self.mm = 1 if minimize else -1 28 | def forward(self, y_hat,y_true, sol_true,cache): 29 | 30 | loss = 0 31 | mm = self.mm 32 | for ii in range(len( y_hat )): 33 | loss += ( ( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii]) ).sum(dim=(1)) ).mean() 34 | loss /= len(y_hat) 35 | return loss 36 | 37 | 38 | class MAP(torch.nn.Module): 39 | def __init__(self, minimize= True): 40 | super().__init__() 41 | self.mm = 1 if minimize else -1 42 | def forward(self, y_hat,y_true,sol_true,cache): 43 | 44 | loss = 0 45 | mm = self.mm 46 | 47 | for ii in range(len( y_hat )): 48 | loss += (( mm* ( sol_true[ii] - cache )*y_hat[ii] ).sum(dim=(1)) ).max() 49 | loss /= len(y_hat) 50 | return loss 51 | 52 | 53 | class MAP_c(torch.nn.Module): 54 | def __init__(self, minimize= True): 55 | super().__init__() 56 | self.mm = 1 if minimize else -1 57 | def forward(self, y_hat,y_true,sol_true,cache): 58 | ''' 59 | pred_weights: predicted cost vector [batch_size, img,img] 60 | true_weights: actua cost vector [batch_size, img,img] 61 | target: true shortest path [batch_size, img,img] 62 | cache: cache is torch array [cache_size, img,img] 63 | ''' 64 | loss = 0 65 | mm = self.mm 66 | 67 | for ii in range(len( y_hat )): 68 | loss += (( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii]) ).sum(dim=(1)) ).max() 69 | loss /= len(y_hat) 70 | return loss 71 | 72 | 73 | class MAP_c_actual(torch.nn.Module): 74 | def __init__(self, minimize= True): 75 | super().__init__() 76 | self.mm = 1 if minimize else -1 77 | def forward(self, y_hat,y_true,sol_true,cache): 78 | ''' 79 | pred_weights: predicted cost vector [batch_size, img,img] 80 | true_weights: actua cost vector [batch_size, img,img] 81 | target: true shortest path [batch_size, img,img] 82 | cache: cache is torch array [cache_size, img,img] 83 | ''' 84 | loss = 0 85 | mm = self.mm 86 | 87 | for ii in range(len( y_hat )): 88 | 89 | loss += (( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii]) ).sum(dim=(1)) ).max() 90 | loss /= len(y_hat) 91 | return loss 92 | 93 | 94 | ###################################### Ranking Loss Functions ######################################### 95 | class PointwiseLoss(torch.nn.Module): 96 | def __init__(self): 97 | super().__init__() 98 | def forward(self, y_hat,y_true,sol_true,cache): 99 | ''' 100 | pred_weights: predicted cost vector [batch_size, img,img] 101 | true_weights: actua cost vector [batch_size, img,img] 102 | target: true shortest path [batch_size, img,img] 103 | cache: cache is torch array [cache_size, img,img] 104 | ''' 105 | loss = 0 106 | 107 | for ii in range(len( y_hat )): 108 | loss += ((cache*y_hat[ii])-(cache*y_true[ii])).square().mean() 109 | loss /= len(y_hat) 110 | 111 | return loss 112 | class ListwiseLoss(torch.nn.Module): 113 | def __init__(self, temperature=0., minimize= True): 114 | super().__init__() 115 | self.temperature = temperature 116 | self.mm = 1 if minimize else -1 117 | def forward(self, y_hat,y_true,sol_true,cache): 118 | 119 | loss = 0 120 | mm, temperature = self.mm, self.temperature 121 | 122 | for ii in range(len( y_hat )): 123 | loss += - ( F.log_softmax((-mm*y_hat[ii]*cache/temperature).sum(dim=(1)),dim=0) * F.softmax((-mm*y_true[ii]*cache/temperature).sum(dim=(1)),dim=0)).mean() 124 | loss /= len(y_hat) 125 | 126 | return loss 127 | 128 | 129 | class PairwisediffLoss(torch.nn.Module): 130 | def __init__(self, minimize=True): 131 | super().__init__() 132 | self.mm = 1 if minimize else -1 133 | 134 | def forward(self, y_hat,y_true,sol_true,cache): 135 | ''' 136 | pred_weights: predicted cost vector [batch_size, img,img] 137 | true_weights: actua cost vector [batch_size, img,img] 138 | target: true shortest path [batch_size, img,img] 139 | cache: cache is torch array [cache_size, img,img] 140 | ''' 141 | 142 | loss = 0 143 | for ii in range(len( y_hat )): 144 | _,indices= np.unique((self.mm*y_true[ii]*cache).sum(dim= (1)).detach().numpy(),return_index=True) 145 | 146 | big_ind = [indices[0] for p in range(len(indices)-1)] #good one 147 | small_ind = [indices[p+1] for p in range(len(indices)-1)] #bad one 148 | 149 | 150 | loss += ( ( cache[big_ind]*y_hat[ii] -cache[small_ind]*y_hat[ii]).sum(dim=(1)) - ( cache[big_ind]*y_true[ii] -cache[small_ind]*y_true[ii]).sum(dim=(1)) ).square().mean() 151 | loss /= len(y_hat) 152 | return loss 153 | 154 | class PairwiseLoss(torch.nn.Module): 155 | def __init__(self, margin=0., minimize=True): 156 | super().__init__() 157 | self.margin = margin 158 | self.mm = 1 if minimize else -1 159 | def forward(self, y_hat,y_true,sol_true,cache): 160 | ''' 161 | pred_weights: predicted cost vector [batch_size, img,img] 162 | true_weights: actua cost vector [batch_size, img,img] 163 | target: true shortest path [batch_size, img,img] 164 | cache: cache is torch array [cache_size, img,img] 165 | ''' 166 | relu = torch.nn.ReLU() 167 | loss = 0 168 | mm, margin = self.mm, self.margin 169 | for ii in range(len( y_hat )): 170 | _,indices= np.unique((self.mm*y_true[ii]*cache).sum(dim= (1)).detach().numpy(),return_index=True) 171 | 172 | big_ind = [indices[0] for p in range(len(indices)-1)] #good one 173 | small_ind = [indices[p+1] for p in range(len(indices)-1)] #bad one 174 | 175 | loss += relu( margin + mm*( cache[big_ind]*y_hat[ii] -cache[small_ind]*y_hat[ii] ).sum(dim=(1)) ).mean() 176 | loss /= len(y_hat) 177 | return loss -------------------------------------------------------------------------------- /Energy/Trainer/ICON_solving.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | from gurobipy import * 4 | if __name__== "__main__": 5 | main() 6 | 7 | 8 | -------------------------------------------------------------------------------- /Energy/Trainer/data_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | from torch.utils.data import Dataset, DataLoader 4 | import pytorch_lightning as pl 5 | import torch 6 | from sklearn.preprocessing import StandardScaler 7 | import sklearn 8 | from Trainer.utils import batch_solve 9 | from Trainer.get_energy import get_energy 10 | from Trainer.comb_solver import SolveICON 11 | 12 | class EnergyDatasetWrapper(): 13 | def __init__(self, X,y, sol=None, solver=False): 14 | self.X = X.astype(np.float32) 15 | self.y = y.astype(np.float32) 16 | if sol is None: 17 | sol = batch_solve(solver, y) 18 | 19 | self.sol = np.array(sol).astype(np.float32) 20 | 21 | def __len__(self): 22 | return len(self.y) 23 | 24 | def __getitem__(self, idx): 25 | return self.X[idx],self.y[idx],self.sol[idx] 26 | 27 | 28 | class EnergyDataModule(pl.LightningDataModule): 29 | def __init__(self,param, standardize=True, batch_size= 16, generator=None,num_workers=4, seed=0, relax=False): 30 | super().__init__() 31 | 32 | x_train, y_train, x_test, y_test = get_energy(fname= 'Trainer/prices2013.dat') 33 | 34 | 35 | x_train = x_train[:,1:] 36 | x_test = x_test[:,1:] 37 | if standardize: 38 | scaler = StandardScaler() 39 | x_train = scaler.fit_transform(x_train) 40 | x_test = scaler.transform(x_test) 41 | x_train = x_train.reshape(-1,48,x_train.shape[1]) 42 | y_train = y_train.reshape(-1,48) 43 | x_test = x_test.reshape(-1,48,x_test.shape[1]) 44 | y_test = y_test.reshape(-1,48) 45 | x = np.concatenate((x_train, x_test), axis=0) 46 | y = np.concatenate((y_train,y_test), axis=0) 47 | x,y = sklearn.utils.shuffle(x,y,random_state=seed) 48 | x_train, y_train = x[:550], y[:550] 49 | x_valid, y_valid = x[550:650], y[550:650] 50 | x_test, y_test = x[650:], y[650:] 51 | 52 | solver = SolveICON(relax=relax, **param) 53 | solver.make_model() 54 | 55 | self.train_df = EnergyDatasetWrapper( x_train,y_train,solver=solver) 56 | self.valid_df = EnergyDatasetWrapper( x_valid, y_valid,solver=solver ) 57 | self.test_df = EnergyDatasetWrapper( x_test, y_test,solver=solver ) 58 | self.train_solutions= self.train_df.sol 59 | 60 | self.batch_size = batch_size 61 | self.generator = generator 62 | self.num_workers = num_workers 63 | 64 | 65 | def train_dataloader(self): 66 | return DataLoader(self.train_df, batch_size=self.batch_size,generator= self.generator, num_workers=self.num_workers) 67 | 68 | def val_dataloader(self): 69 | return DataLoader(self.valid_df, batch_size=self.batch_size,generator= self.generator, num_workers=self.num_workers) 70 | 71 | def test_dataloader(self): 72 | return DataLoader(self.test_df, batch_size=self.batch_size,generator= self.generator, num_workers=self.num_workers) -------------------------------------------------------------------------------- /Energy/Trainer/diff_layer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from Trainer.utils import batch_solve 5 | 6 | def SPOlayer(solver,minimize = True): 7 | mm = 1 if minimize else -1 8 | class SPOlayer_cls(torch.autograd.Function): 9 | @staticmethod 10 | def forward(ctx, y_hat,y_true,sol_true ): 11 | sol_hat = batch_solve(solver, y_hat) 12 | 13 | ctx.save_for_backward(y_hat,y_true,sol_true) 14 | 15 | return ( mm*(sol_hat -sol_true)*y_true).sum() 16 | 17 | @staticmethod 18 | def backward(ctx, grad_output): 19 | y_hat,y_true,sol_true = ctx.saved_tensors 20 | y_spo = 2*y_hat - y_true 21 | sol_spo = batch_solve(solver,y_spo) 22 | return (sol_true - sol_spo)*mm, None, None 23 | return SPOlayer_cls.apply 24 | 25 | 26 | def DBBlayer(solver,lambda_val=1., minimize = True): 27 | mm = 1 if minimize else -1 28 | class DBBlayer_cls(torch.autograd.Function): 29 | @staticmethod 30 | def forward(ctx, y_hat,y_true,sol_true ): 31 | sol_hat = batch_solve(solver, y_hat) 32 | 33 | ctx.save_for_backward(y_hat,y_true,sol_true, sol_hat) 34 | 35 | return sol_hat 36 | 37 | @staticmethod 38 | def backward(ctx, grad_output): 39 | """ 40 | In the backward pass we compute gradient to minimize regret 41 | """ 42 | y_hat,y_true,sol_true, sol_hat= ctx.saved_tensors 43 | y_perturbed = y_hat + mm* lambda_val* grad_output 44 | sol_perturbed = batch_solve(solver, y_perturbed) 45 | 46 | return -mm*(sol_hat - sol_perturbed)/lambda_val, None, None 47 | return DBBlayer_cls.apply 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /Energy/Trainer/get_energy.py: -------------------------------------------------------------------------------- 1 | """ 2 | Reading in the data, sklearn style... 3 | 4 | The data contains the following column: 5 | 6 | #DateTime Holiday HolidayFlag DayOfWeek WeekOfYear Day Month Year PeriodOfDay Fo recastWindProduction SystemLoadEA SMPEA ORKTemperature ORKWindspeed CO2Intensity ActualWindProduction SystemLoadEP2 SMPEP2 7 | 8 | #DateTime and Holiday: is a string and subsumed by following features HolidayFlag: is Boolean and identicaly for each day DayOfWeek WeekOfYear Day Month Year: is discrete and identicaly for each day PeriodOfDay: is discrete 0..47 ORKTemperature ORKWindspeed: contains NAN and are questionable (actual values) ActualWindProduction SystemLoadEP2 SMPEP2: Actual values, with SMPEP2 the label 9 | """ 10 | 11 | import numpy as np 12 | import pandas as pd 13 | from sklearn.model_selection import train_test_split 14 | 15 | # prep numpy arrays, Xs will contain groupID as first column 16 | def get_energy(fname=None, trainTestRatio=0.70): 17 | df = get_energy_pandas(fname) 18 | 19 | length = df['groupID'].nunique() 20 | grouplength = 48 21 | 22 | # numpy arrays, X contains groupID as first column 23 | X1g = df.loc[:, df.columns != 'SMPEP2'].values 24 | y = df.loc[:, 'SMPEP2'].values 25 | 26 | # no negative values allowed...for now I just clamp these values to zero. They occur three times in the training data. 27 | # for i in range(len(y)): 28 | # y[i] = max(y[i], 0) 29 | 30 | 31 | # ordered split per complete group 32 | train_len = int(trainTestRatio*length) 33 | 34 | # the splitting 35 | X_1gtrain = X1g[:grouplength*train_len] 36 | y_train = y[:grouplength*train_len] 37 | X_1gtest = X1g[grouplength*train_len:] 38 | y_test = y[grouplength*train_len:] 39 | 40 | 41 | 42 | #print(len(X1g_train),len(X1g_test),len(X),len(X1g_train)+len(X1g_test)) 43 | return (X_1gtrain, y_train, X_1gtest, y_test) 44 | 45 | 46 | def get_energy_grouped(fname=None): 47 | df = get_energy_pandas(fname) 48 | 49 | # put the 'y's into columns (I hope this respects the ordering!) 50 | t = df.groupby('groupID')['SMPEP2'].apply(np.array) 51 | grpY = np.vstack(t.values) # stack into a 2D array 52 | # now something similar but for the features... lets naively just take averages 53 | grpX = df.loc[:, df.columns != 'SMPEP2'].groupby('groupID').mean().values 54 | 55 | # train/test splitting, sklearn is so convenient 56 | (grpX_train, grpX_test, grpY_train, grpY_test) = \ 57 | train_test_split(grpX, grpY, test_size=0.3, shuffle=False) 58 | 59 | return (grpX_train, grpY_train, grpX_test, grpY_test) 60 | 61 | 62 | def get_energy_pandas(fname=None): 63 | if fname == None: 64 | fname = "prices2013.dat" 65 | 66 | df = pd.read_csv(fname, delim_whitespace=True, quotechar='"') 67 | # remove unnecessary columns 68 | df.drop(['#DateTime', 'Holiday', 'ActualWindProduction', 'SystemLoadEP2'], axis=1, inplace=True) 69 | # remove columns with missing values 70 | df.drop(['ORKTemperature', 'ORKWindspeed'], axis=1, inplace=True) 71 | 72 | # missing value treatment 73 | # df[pd.isnull(df).any(axis=1)] 74 | # impute missing CO2 intensities linearly 75 | df.loc[df.loc[:,'CO2Intensity'] == 0, 'CO2Intensity'] = np.nan # an odity 76 | df.loc[:,'CO2Intensity'].interpolate(inplace=True) 77 | # remove remaining 3 days with missing values 78 | grouplength = 48 79 | for i in range(0, len(df), grouplength): 80 | day_has_nan = pd.isnull(df.loc[i:i+(grouplength-1)]).any(axis=1).any() 81 | if day_has_nan: 82 | #print("Dropping",i) 83 | df.drop(range(i,i+grouplength), inplace=True) 84 | # data is sorted by year, month, day, periodofday; don't want learning over this 85 | df.drop(['Day', 'Year', 'PeriodOfDay'], axis=1, inplace=True) 86 | 87 | # insert group identifier at beginning 88 | grouplength = 48 89 | length = int(len(df)/48) # 792 90 | gids = [gid for gid in range(length) for i in range(grouplength)] 91 | df.insert(0, 'groupID', gids) 92 | 93 | return df 94 | 95 | 96 | 97 | if __name__ == '__main__': 98 | df = get_energy_pandas() 99 | print(df.head()) 100 | 101 | (X_1gtrain, y_train, X_1gtest, y_test) = get_energy() 102 | print([len(x) for x in (X_1gtrain, y_train, X_1gtest, y_test)]) 103 | 104 | ### Options to try for learning: 105 | # split DayOfWeek into Weekday/Weekend, perhaps even split up days 106 | # split up Month into seasons 107 | # do use ORK*s but with missing value imputation 108 | # remove WeekOfYear? 109 | 110 | -------------------------------------------------------------------------------- /Energy/Trainer/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | 6 | def batch_solve(solver,y): 7 | ''' 8 | wrapper around te solver to return solution of a vector of cost coefficients 9 | ''' 10 | sol = [] 11 | for i in range(len(y)): 12 | sol.append( solver.solve(y[i])) 13 | return torch.from_numpy( np.array(sol) ).float() 14 | 15 | 16 | def regret_aslist(solver, y_hat,y_true, sol_true, minimize=True): 17 | ''' 18 | computes regret of more than one cost vectors 19 | ''' 20 | mm = 1 if minimize else -1 21 | sol_hat = batch_solve(solver,y_hat.detach().numpy()) 22 | return ((mm*(sol_hat - sol_true)*y_true).sum(1)/(sol_true*y_true).sum(1)) 23 | 24 | def regret_fn(solver, y_hat,y_true, sol_true, minimize=True): 25 | ''' 26 | computes average regret given a predicted cost vector and the true solution vector and the true cost vector 27 | y_hat,y, sol_true are torch tensors 28 | ''' 29 | return regret_aslist(solver,y_hat,y_true,sol_true,minimize).mean() 30 | 31 | 32 | def abs_regret_aslist(solver, y_hat,y_true, sol_true, minimize=True): 33 | ''' 34 | computes regret of more than one cost vectors 35 | ''' 36 | mm = 1 if minimize else -1 37 | sol_hat = batch_solve(solver,y_hat.detach().numpy()) 38 | return ((mm*(sol_hat - sol_true)*y_true).sum(1)) 39 | 40 | 41 | def abs_regret_fn(solver, y_hat,y_true, sol_true, minimize=True): 42 | ''' 43 | computes average regret given a predicted cost vector and the true solution vector and the true cost vector 44 | y_hat,y, sol_true are torch tensors 45 | ''' 46 | return abs_regret_aslist(solver,y_hat,y_true,sol_true,minimize).mean() 47 | 48 | 49 | def growpool_fn(solver,cache, y_hat): 50 | ''' 51 | cache is torch array [currentpoolsize,48] 52 | y_hat is torch array [batch_size,48] 53 | ''' 54 | sol = batch_solve(solver,y_hat) 55 | cache_np = cache.detach().numpy() 56 | cache_np = np.unique(np.append(cache_np,sol,axis=0),axis=0) 57 | # torch has no unique function, so we have to do this 58 | return torch.from_numpy(cache_np).float() -------------------------------------------------------------------------------- /Energy/config.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "model": "baseline_mse", 4 | "instance": 1, 5 | "lr": 0.5 6 | }, 7 | { 8 | "model": "baseline_mse", 9 | "instance": 2, 10 | "lr": 0.5 11 | }, 12 | { 13 | "model": "baseline_mse", 14 | "instance": 3, 15 | "lr": 0.5 16 | }, 17 | { 18 | "model": "CachingPO", 19 | "loss": "MAP_c", 20 | "instance": 1, 21 | "lr": 0.5 22 | }, 23 | { 24 | "model": "CachingPO", 25 | "loss": "MAP_c", 26 | "instance": 2, 27 | "lr": 0.5 28 | }, 29 | { 30 | "model": "CachingPO", 31 | "loss": "MAP_c", 32 | "instance": 3, 33 | "lr": 0.5 34 | }, 35 | { 36 | "model": "CachingPO", 37 | "loss": "pairwise_diff", 38 | "instance": 1, 39 | "lr": 0.5 40 | }, 41 | { 42 | "model": "CachingPO", 43 | "loss": "pairwise_diff", 44 | "instance": 2, 45 | "lr": 0.5 46 | }, 47 | { 48 | "model": "CachingPO", 49 | "loss": "pairwise_diff", 50 | "instance": 3, 51 | "lr": 0.1 52 | }, 53 | { 54 | "model": "CachingPO", 55 | "loss": "pairwise", 56 | "instance": 1, 57 | "lr": 0.1, 58 | "tau": 1 59 | }, 60 | { 61 | "model": "CachingPO", 62 | "loss": "pairwise", 63 | "instance": 2, 64 | "lr": 0.1, 65 | "tau": 5 66 | }, 67 | { 68 | "model": "CachingPO", 69 | "loss": "pairwise", 70 | "instance": 3, 71 | "lr": 0.1, 72 | "tau": 50 73 | }, 74 | { 75 | "model": "CachingPO", 76 | "loss": "listwise", 77 | "instance": 1, 78 | "lr": 0.1, 79 | "tau": 5.0 80 | }, 81 | { 82 | "model": "CachingPO", 83 | "loss": "listwise", 84 | "instance": 2, 85 | "lr": 0.1, 86 | "tau": 5.0 87 | }, 88 | { 89 | "model": "CachingPO", 90 | "loss": "listwise", 91 | "instance": 3, 92 | "lr": 0.1, 93 | "tau": 5.0 94 | }, 95 | { 96 | "model": "SPO", 97 | "instance": 1, 98 | "lr": 1.0 99 | }, 100 | { 101 | "model": "SPO", 102 | "instance": 2, 103 | "lr": 0.5 104 | }, 105 | { 106 | "model": "SPO", 107 | "instance": 3, 108 | "lr": 0.5 109 | }, 110 | { 111 | "model": "DBB", 112 | "instance": 1, 113 | "lr": 0.01, 114 | "lambda_val": 0.1 115 | }, 116 | { 117 | "model": "DBB", 118 | "instance": 2, 119 | "lr": 0.5, 120 | "lambda_val": 1.0 121 | }, 122 | { 123 | "model": "DBB", 124 | "instance": 3, 125 | "lr": 0.5, 126 | "lambda_val": 1.0 127 | }, 128 | { 129 | "model": "FenchelYoung", 130 | "instance": 1, 131 | "lr": 0.01, 132 | "sigma": 0.1 133 | }, 134 | { 135 | "model": "FenchelYoung", 136 | "instance": 2, 137 | "lr": 0.5, 138 | "sigma": 5.0 139 | }, 140 | { 141 | "model": "FenchelYoung", 142 | "instance": 3, 143 | "lr": 0.01, 144 | "sigma": 0.1 145 | }, 146 | { 147 | "model": "IMLE", 148 | "instance": 1, 149 | "lr": 0.5, 150 | "beta": 1.0, 151 | "temperature": 2.0, 152 | "k": 5 153 | }, 154 | { 155 | "model": "IMLE", 156 | "instance": 2, 157 | "lr": 0.5, 158 | "beta": 1.0, 159 | "temperature": 1.0, 160 | "k": 5 161 | }, 162 | { 163 | "model": "IMLE", 164 | "instance": 3, 165 | "lr": 0.5, 166 | "beta": 1.0, 167 | "temperature": 1.0, 168 | "k": 5 169 | }, 170 | { 171 | "model": "DCOL", 172 | "instance": 1, 173 | "lr": 0.1, 174 | "mu": 1.0 175 | }, 176 | { 177 | "model": "DCOL", 178 | "instance": 2, 179 | "lr": 0.1, 180 | "mu": 1.0 181 | }, 182 | { 183 | "model": "DCOL", 184 | "instance": 3, 185 | "lr": 0.1, 186 | "mu": 1.0 187 | }, 188 | { 189 | "model": "IntOpt", 190 | "instance": 1, 191 | "lr": 0.1, 192 | "thr": 0.1, 193 | "damping": 1e-06 194 | }, 195 | { 196 | "model": "IntOpt", 197 | "instance": 2, 198 | "lr": 0.1, 199 | "thr": 0.001, 200 | "damping": 1e-06 201 | }, 202 | { 203 | "model": "IntOpt", 204 | "instance": 3, 205 | "lr": 0.1, 206 | "thr": 0.1, 207 | "damping": 0.1 208 | } 209 | ] -------------------------------------------------------------------------------- /Energy/imle/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PredOpt/predopt-benchmarks/1a8e048c5aa640f73a05e29878b8e1e8f6f73610/Energy/imle/__init__.py -------------------------------------------------------------------------------- /Energy/imle/noise.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import math 4 | 5 | import torch 6 | from torch import Tensor, Size 7 | from torch.distributions.gamma import Gamma 8 | 9 | from abc import ABC, abstractmethod 10 | 11 | from typing import Optional 12 | 13 | import logging 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | class BaseNoiseDistribution(ABC): 19 | def __init__(self): 20 | super().__init__() 21 | 22 | @abstractmethod 23 | def sample(self, 24 | shape: Size) -> Tensor: 25 | raise NotImplementedError 26 | 27 | 28 | class SumOfGammaNoiseDistribution(BaseNoiseDistribution): 29 | r""" 30 | Creates a generator of samples for the Sum-of-Gamma distribution [1], parameterized 31 | by :attr:`k`, :attr:`nb_iterations`, and :attr:`device`. 32 | 33 | [1] Mathias Niepert, Pasquale Minervini, Luca Franceschi - Implicit MLE: Backpropagating Through Discrete 34 | Exponential Family Distributions. NeurIPS 2021 (https://arxiv.org/abs/2106.01798) 35 | 36 | Example:: 37 | 38 | >>> import torch 39 | >>> noise_distribution = SumOfGammaNoiseDistribution(k=5, nb_iterations=100) 40 | >>> noise_distribution.sample(torch.Size([5])) 41 | tensor([ 0.2504, 0.0112, 0.5466, 0.0051, -0.1497]) 42 | 43 | Args: 44 | k (float): k parameter -- see [1] for more details. 45 | nb_iterations (int): number of iterations for estimating the sample. 46 | device (torch.devicde): device where to store samples. 47 | """ 48 | def __init__(self, 49 | k: float, 50 | nb_iterations: int = 10, 51 | device: Optional[torch.device] = None): 52 | super().__init__() 53 | self.k = k 54 | self.nb_iterations = nb_iterations 55 | self.device = device 56 | 57 | def sample(self, 58 | shape: Size) -> Tensor: 59 | samples = torch.zeros(size=shape, device=self.device) 60 | for i in range(1, self.nb_iterations + 1): 61 | concentration = torch.tensor(1. / self.k, device=self.device) 62 | rate = torch.tensor(i / self.k, device=self.device) 63 | 64 | gamma = Gamma(concentration=concentration, rate=rate) 65 | samples = samples + gamma.sample(sample_shape=shape).to(self.device) 66 | samples = (samples - math.log(self.nb_iterations)) / self.k 67 | return samples.to(self.device) 68 | -------------------------------------------------------------------------------- /Energy/imle/target.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from torch import Tensor 4 | from abc import ABC, abstractmethod 5 | 6 | import logging 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class BaseTargetDistribution(ABC): 12 | def __init__(self): 13 | super().__init__() 14 | 15 | @abstractmethod 16 | def params(self, 17 | theta: Tensor, 18 | dy: Tensor) -> Tensor: 19 | raise NotImplementedError 20 | 21 | 22 | class TargetDistribution(BaseTargetDistribution): 23 | r""" 24 | Creates a generator of target distributions parameterized by :attr:`alpha` and :attr:`beta`. 25 | 26 | Example:: 27 | 28 | >>> import torch 29 | >>> target_distribution = TargetDistribution(alpha=1.0, beta=1.0) 30 | >>> target_distribution.params(theta=torch.tensor([1.0]), dy=torch.tensor([1.0])) 31 | tensor([2.]) 32 | 33 | Args: 34 | alpha (float): weight of the initial distribution parameters theta 35 | beta (float): weight of the downstream gradient dy 36 | """ 37 | def __init__(self, 38 | alpha: float = 1.0, 39 | beta: float = 1.0): 40 | super().__init__() 41 | self.alpha = alpha 42 | self.beta = beta 43 | 44 | def params(self, 45 | theta: Tensor, 46 | dy: Tensor) -> Tensor: 47 | theta_prime = self.alpha * theta - self.beta * dy 48 | return theta_prime 49 | -------------------------------------------------------------------------------- /Energy/imle/wrapper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import functools 4 | 5 | import torch 6 | from torch import Tensor 7 | 8 | from imle.noise import BaseNoiseDistribution 9 | from imle.target import BaseTargetDistribution, TargetDistribution 10 | 11 | from typing import Callable, Optional 12 | 13 | import logging 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | def imle(function: Callable[[Tensor], Tensor] = None, 19 | target_distribution: Optional[BaseTargetDistribution] = None, 20 | noise_distribution: Optional[BaseNoiseDistribution] = None, 21 | nb_samples: int = 1, 22 | input_noise_temperature: float = 1.0, 23 | target_noise_temperature: float = 1.0): 24 | r"""Turns a black-box combinatorial solver in an Exponential Family distribution via Perturb-and-MAP and I-MLE [1]. 25 | 26 | The input function (solver) needs to return the solution to the problem of finding a MAP state for a constrained 27 | exponential family distribution -- this is the case for most black-box combinatorial solvers [2]. If this condition 28 | is violated though, the result would not hold and there is no guarantee on the validity of the obtained gradients. 29 | 30 | This function can be used directly or as a decorator. 31 | 32 | [1] Mathias Niepert, Pasquale Minervini, Luca Franceschi - Implicit MLE: Backpropagating Through Discrete 33 | Exponential Family Distributions. NeurIPS 2021 (https://arxiv.org/abs/2106.01798) 34 | [2] Marin Vlastelica, Anselm Paulus, Vít Musil, Georg Martius, Michal Rolínek - Differentiation of Blackbox 35 | Combinatorial Solvers. ICLR 2020 (https://arxiv.org/abs/1912.02175) 36 | 37 | Example:: 38 | 39 | >>> from imle.wrapper import imle 40 | >>> from imle.target import TargetDistribution 41 | >>> from imle.noise import SumOfGammaNoiseDistribution 42 | >>> target_distribution = TargetDistribution(alpha=0.0, beta=10.0) 43 | >>> noise_distribution = SumOfGammaNoiseDistribution(k=21, nb_iterations=100) 44 | >>> @imle(target_distribution=target_distribution, noise_distribution=noise_distribution, nb_samples=100, 45 | >>> input_noise_temperature=input_noise_temperature, target_noise_temperature=5.0) 46 | >>> def imle_solver(weights_batch: Tensor) -> Tensor: 47 | >>> return torch_solver(weights_batch) 48 | 49 | Args: 50 | function (Callable[[Tensor], Tensor]): black-box combinatorial solver 51 | target_distribution (Optional[BaseTargetDistribution]): factory for target distributions 52 | noise_distribution (Optional[BaseNoiseDistribution]): noise distribution 53 | nb_samples (int): number of noise sammples 54 | input_noise_temperature (float): noise temperature for the input distribution 55 | target_noise_temperature (float): noise temperature for the target distribution 56 | """ 57 | if target_distribution is None: 58 | target_distribution = TargetDistribution(alpha=1.0, beta=1.0) 59 | 60 | if function is None: 61 | return functools.partial(imle, 62 | target_distribution=target_distribution, 63 | noise_distribution=noise_distribution, 64 | nb_samples=nb_samples, 65 | input_noise_temperature=input_noise_temperature, 66 | target_noise_temperature=target_noise_temperature) 67 | 68 | @functools.wraps(function) 69 | def wrapper(input: Tensor, *args): 70 | class WrappedFunc(torch.autograd.Function): 71 | 72 | @staticmethod 73 | def forward(ctx, input: Tensor, *args): 74 | # [BATCH_SIZE, ...] 75 | input_shape = input.shape 76 | 77 | batch_size = input_shape[0] 78 | instance_shape = input_shape[1:] 79 | 80 | # [BATCH_SIZE, N_SAMPLES, ...] 81 | perturbed_input_shape = [batch_size, nb_samples] + list(instance_shape) 82 | 83 | if noise_distribution is None: 84 | noise = torch.zeros(size=perturbed_input_shape) 85 | else: 86 | noise = noise_distribution.sample(shape=torch.Size(perturbed_input_shape)) 87 | 88 | input_noise = noise * input_noise_temperature 89 | 90 | # [BATCH_SIZE, N_SAMPLES, ...] 91 | perturbed_input_3d = input.view(batch_size, 1, -1).repeat(1, nb_samples, 1).view(perturbed_input_shape) 92 | perturbed_input_3d = perturbed_input_3d + input_noise 93 | 94 | # [BATCH_SIZE * N_SAMPLES, ...] 95 | perturbed_input_2d = perturbed_input_3d.view([-1] + perturbed_input_shape[2:]) 96 | perturbed_input_2d_shape = perturbed_input_2d.shape 97 | 98 | # [BATCH_SIZE * N_SAMPLES, ...] 99 | perturbed_output = function(perturbed_input_2d) 100 | # [BATCH_SIZE, N_SAMPLES, ...] 101 | perturbed_output = perturbed_output.view(perturbed_input_shape) 102 | 103 | ctx.save_for_backward(input, noise, perturbed_output) 104 | 105 | # [BATCH_SIZE * N_SAMPLES, ...] 106 | # res = perturbed_output.view(perturbed_input_2d_shape) 107 | #### New line added 108 | res = perturbed_output.mean(dim=1) 109 | return res 110 | 111 | @staticmethod 112 | def backward(ctx, dy): 113 | # input: [BATCH_SIZE, ...] 114 | # noise: [BATCH_SIZE, N_SAMPLES, ...] 115 | # perturbed_output_3d: # [BATCH_SIZE, N_SAMPLES, ...] 116 | input, noise, perturbed_output_3d = ctx.saved_variables 117 | 118 | input_shape = input.shape 119 | batch_size = input_shape[0] 120 | instance_shape = input_shape[1:] 121 | 122 | 123 | #### New line added 124 | dy = dy.view(batch_size, 1, -1).repeat(1,nb_samples, 1).view([batch_size*nb_samples] +list(instance_shape)) 125 | 126 | 127 | # dy is [BATCH_SIZE * N_SAMPLES, ...] 128 | dy_shape = dy.shape 129 | # noise is [BATCH_SIZE, N_SAMPLES, ...] 130 | noise_shape = noise.shape 131 | 132 | # [BATCH_SIZE * NB_SAMPLES, ...] 133 | input_2d = input.view(batch_size, 1, -1).repeat(1, nb_samples, 1).view(dy_shape) 134 | target_input_2d = target_distribution.params(input_2d, dy) 135 | 136 | # [BATCH_SIZE, NB_SAMPLES, ...] 137 | target_input_3d = target_input_2d.view(noise_shape) 138 | 139 | # [BATCH_SIZE, NB_SAMPLES, ...] 140 | target_noise = noise * target_noise_temperature 141 | 142 | # [BATCH_SIZE, N_SAMPLES, ...] 143 | perturbed_target_input_3d = target_input_3d + target_noise 144 | 145 | # [BATCH_SIZE * N_SAMPLES, ...] 146 | perturbed_target_input_2d = perturbed_target_input_3d.view(dy_shape) 147 | 148 | # [BATCH_SIZE * N_SAMPLES, ...] 149 | target_output_2d = function(perturbed_target_input_2d) 150 | 151 | # [BATCH_SIZE, N_SAMPLES, ...] 152 | target_output_3d = target_output_2d.view(noise_shape) 153 | 154 | # [BATCH_SIZE, ...] 155 | gradient = (perturbed_output_3d - target_output_3d) 156 | gradient = gradient.mean(axis=1) 157 | return gradient 158 | 159 | return WrappedFunc.apply(input, *args) 160 | return wrapper 161 | -------------------------------------------------------------------------------- /Energy/intopt/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.12.0 2 | numpy==1.21.6 3 | scipy==1.6.3 4 | -------------------------------------------------------------------------------- /Energy/readme.md: -------------------------------------------------------------------------------- 1 | This directory corresponds to the Energy-cost aware scheduling problem. 2 | 3 | The data is included in `Trainer/prices2013.dat` 4 | There exist three instances of the scheduling problem in the director `SchedulingInstances`. 5 | The first, second, and third instances contain 10, 15, and 20 tasks, respectively. 6 | 7 | To run an experiment use `testenergy.py`. 8 | To reproduce the result of expriements run 9 | ``` 10 | python testenergy.py --scheduler True 11 | ``` -------------------------------------------------------------------------------- /HyperparamConfiguration.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PredOpt/predopt-benchmarks/1a8e048c5aa640f73a05e29878b8e1e8f6f73610/HyperparamConfiguration.pdf -------------------------------------------------------------------------------- /Knapsack/DPO/fenchel_young.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # 3 | # Modifications from original work 4 | # 29-03-2021 (tuero@ualberta.ca) : Convert Tensorflow code to PyTorch 5 | # 6 | # Copyright 2021 The Google Research Authors. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | 20 | # Lint as: python3 21 | """Implementation of a Fenchel-Young loss using perturbation techniques.""" 22 | 23 | import torch 24 | import torch.nn as nn 25 | 26 | from DPO import perturbations 27 | 28 | 29 | class PerturbedFunc(torch.autograd.Function): 30 | """Implementation of a Fenchel Young loss.""" 31 | @staticmethod 32 | def forward(ctx, input_tensor, y_true, perturbed, batched, maximize, *args): 33 | diff = perturbed(input_tensor, *args) - y_true.type(input_tensor.dtype) 34 | if not maximize: 35 | diff = -diff 36 | # Computes per-example loss for batched inputs. 37 | if batched: 38 | loss = torch.sum(torch.reshape(diff, [list(diff.shape)[0], -1]) ** 2, dim=-1) 39 | else: # Computes loss for unbatched inputs. 40 | loss = torch.sum(diff ** 2) 41 | ctx.save_for_backward(diff) 42 | ctx.batched = batched 43 | return loss 44 | 45 | @staticmethod 46 | def backward(ctx, dy): 47 | diff, = ctx.saved_tensors 48 | batched = ctx.batched 49 | if batched: # dy has shape (batch_size,) in this case. 50 | dy = torch.reshape(dy, [list(dy.shape)[0]] + (diff.dim() - 1) * [1]) 51 | return dy * diff, None, None, None, None # original 52 | # return diff, None, None, None, None 53 | 54 | 55 | class FenchelYoungLoss(nn.Module): 56 | def __init__(self, 57 | func = None, 58 | num_samples = 1000, 59 | sigma = 0.01, 60 | noise = perturbations._GUMBEL, 61 | batched = True, 62 | maximize = True, 63 | device=None): 64 | """Initializes the Fenchel-Young loss. 65 | 66 | Args: 67 | func: the function whose argmax is to be differentiated by perturbation. 68 | num_samples: (int) the number of perturbed inputs. 69 | sigma: (float) the amount of noise to be considered 70 | noise: (str) the noise distribution to be used to sample perturbations. 71 | batched: whether inputs to the func will have a leading batch dimension 72 | (True) or consist of a single example (False). Defaults to True. 73 | maximize: (bool) whether to maximize or to minimize the input function. 74 | device: The device to create tensors on (cpu/gpu). If None given, it will 75 | default to gpu:0 if available, cpu otherwise. 76 | """ 77 | super().__init__() 78 | self._batched = batched 79 | self._maximize = maximize 80 | self.func = func 81 | self.perturbed = perturbations.perturbed(func=func, 82 | num_samples=num_samples, 83 | sigma=sigma, 84 | noise=noise, 85 | batched=batched, 86 | device=device) 87 | 88 | def forward(self, input_tensor, y_true, *args): 89 | return PerturbedFunc.apply(input_tensor, y_true, self.perturbed, self._batched, self._maximize, *args) 90 | 91 | -------------------------------------------------------------------------------- /Knapsack/Trainer/CacheLosses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import torch.nn.functional as F 4 | 5 | 6 | 7 | 8 | ###################################### NCE Loss Functions ######################################### 9 | class NCE(torch.nn.Module): 10 | def __init__(self, minimize=False): 11 | super().__init__() 12 | self.mm = 1 if minimize else -1 13 | def forward(self, y_hat,y_true, sol_true,cache): 14 | 15 | loss = 0 16 | mm = self.mm 17 | ## print("shape to be preinted: ") 18 | # print(sol_true.shape, cache.shape, y_hat.shape) 19 | ### torch.Size([B, 2500]) torch.Size([|S|, 2500]) torch.Size([B, 2500]) 20 | 21 | for ii in range(len( y_hat )): 22 | loss += ( ( mm* ( sol_true[ii] - cache )*y_hat[ii] ).sum(dim=(1)) ).mean() 23 | loss /= len(y_hat) 24 | return loss 25 | 26 | class NCE_c(torch.nn.Module): 27 | def __init__(self, minimize=False): 28 | super().__init__() 29 | self.mm = 1 if minimize else -1 30 | def forward(self, y_hat,y_true, sol_true,cache): 31 | 32 | loss = 0 33 | mm = self.mm 34 | for ii in range(len( y_hat )): 35 | loss += ( ( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii]) ).sum(dim=(1)) ).mean() 36 | loss /= len(y_hat) 37 | return loss 38 | 39 | 40 | class MAP(torch.nn.Module): 41 | def __init__(self, minimize=False): 42 | super().__init__() 43 | self.mm = 1 if minimize else -1 44 | def forward(self, y_hat,y_true,sol_true,cache): 45 | 46 | loss = 0 47 | mm = self.mm 48 | 49 | for ii in range(len( y_hat )): 50 | loss += (( mm* ( sol_true[ii] - cache )*y_hat[ii] ).sum(dim=(1)) ).max() 51 | loss /= len(y_hat) 52 | return loss 53 | 54 | 55 | class MAP_c(torch.nn.Module): 56 | def __init__(self, minimize=False): 57 | super().__init__() 58 | self.mm = 1 if minimize else -1 59 | def forward(self, y_hat,y_true,sol_true,cache): 60 | ''' 61 | pred_weights: predicted cost vector [batch_size, img,img] 62 | true_weights: actua cost vector [batch_size, img,img] 63 | target: true shortest path [batch_size, img,img] 64 | cache: cache is torch array [cache_size, img,img] 65 | ''' 66 | loss = 0 67 | mm = self.mm 68 | 69 | for ii in range(len( y_hat )): 70 | loss += (( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii]) ).sum(dim=(1)) ).max() 71 | loss /= len(y_hat) 72 | return loss 73 | 74 | 75 | class MAP_c_actual(torch.nn.Module): 76 | def __init__(self, minimize=False): 77 | super().__init__() 78 | self.mm = 1 if minimize else -1 79 | def forward(self, y_hat,y_true,sol_true,cache): 80 | ''' 81 | pred_weights: predicted cost vector [batch_size, img,img] 82 | true_weights: actua cost vector [batch_size, img,img] 83 | target: true shortest path [batch_size, img,img] 84 | cache: cache is torch array [cache_size, img,img] 85 | ''' 86 | loss = 0 87 | mm = self.mm 88 | 89 | for ii in range(len( y_hat )): 90 | 91 | loss += (( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii]) ).sum(dim=(1)) ).max() 92 | loss /= len(y_hat) 93 | return loss 94 | 95 | 96 | ###################################### Ranking Loss Functions ######################################### 97 | class PointwiseLoss(torch.nn.Module): 98 | def __init__(self): 99 | super().__init__() 100 | def forward(self, y_hat,y_true,sol_true,cache): 101 | ''' 102 | pred_weights: predicted cost vector [batch_size, img,img] 103 | true_weights: actua cost vector [batch_size, img,img] 104 | target: true shortest path [batch_size, img,img] 105 | cache: cache is torch array [cache_size, img,img] 106 | ''' 107 | loss = 0 108 | 109 | for ii in range(len( y_hat )): 110 | loss += ((cache*y_hat[ii])-(cache*y_true[ii])).square().mean() 111 | loss /= len(y_hat) 112 | 113 | return loss 114 | class ListwiseLoss(torch.nn.Module): 115 | def __init__(self, temperature=0., minimize=False): 116 | super().__init__() 117 | self.temperature = temperature 118 | self.mm = 1 if minimize else -1 119 | def forward(self, y_hat,y_true,sol_true,cache): 120 | 121 | loss = 0 122 | mm, temperature = self.mm, self.temperature 123 | 124 | for ii in range(len( y_hat )): 125 | loss += - ( F.log_softmax((-mm*y_hat[ii]*cache/temperature).sum(dim=(1)),dim=0) * F.softmax((-mm*y_true[ii]*cache/temperature).sum(dim=(1)),dim=0)).mean() 126 | loss /= len(y_hat) 127 | 128 | return loss 129 | 130 | 131 | class PairwisediffLoss(torch.nn.Module): 132 | def __init__(self, minimize=False): 133 | super().__init__() 134 | self.mm = 1 if minimize else -1 135 | 136 | def forward(self, y_hat,y_true,sol_true,cache): 137 | ''' 138 | pred_weights: predicted cost vector [batch_size, img,img] 139 | true_weights: actua cost vector [batch_size, img,img] 140 | target: true shortest path [batch_size, img,img] 141 | cache: cache is torch array [cache_size, img,img] 142 | ''' 143 | 144 | loss = 0 145 | for ii in range(len( y_hat )): 146 | _,indices= np.unique((self.mm*y_true[ii]*cache).sum(dim= (1)).detach().numpy(),return_index=True) 147 | 148 | big_ind = [indices[0] for p in range(len(indices)-1)] #good one 149 | small_ind = [indices[p+1] for p in range(len(indices)-1)] #bad one 150 | 151 | 152 | loss += ( ( cache[big_ind]*y_hat[ii] -cache[small_ind]*y_hat[ii]).sum(dim=(1)) - ( cache[big_ind]*y_true[ii] -cache[small_ind]*y_true[ii]).sum(dim=(1)) ).square().mean() 153 | loss /= len(y_hat) 154 | return loss 155 | 156 | class PairwiseLoss(torch.nn.Module): 157 | def __init__(self, margin=0., minimize=False): 158 | super().__init__() 159 | self.margin = margin 160 | self.mm = 1 if minimize else -1 161 | def forward(self, y_hat,y_true,sol_true,cache): 162 | ''' 163 | pred_weights: predicted cost vector [batch_size, img,img] 164 | true_weights: actua cost vector [batch_size, img,img] 165 | target: true shortest path [batch_size, img,img] 166 | cache: cache is torch array [cache_size, img,img] 167 | ''' 168 | relu = torch.nn.ReLU() 169 | loss = 0 170 | mm, margin = self.mm, self.margin 171 | for ii in range(len( y_hat )): 172 | _,indices= np.unique((self.mm*y_true[ii]*cache).sum(dim= (1)).detach().numpy(),return_index=True) 173 | 174 | big_ind = [indices[0] for p in range(len(indices)-1)] #good one 175 | small_ind = [indices[p+1] for p in range(len(indices)-1)] #bad one 176 | 177 | loss += relu( margin + mm*( cache[big_ind]*y_hat[ii] -cache[small_ind]*y_hat[ii] ).sum(dim=(1)) ).mean() 178 | loss /= len(y_hat) 179 | return loss -------------------------------------------------------------------------------- /Knapsack/Trainer/Data.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PredOpt/predopt-benchmarks/1a8e048c5aa640f73a05e29878b8e1e8f6f73610/Knapsack/Trainer/Data.npz -------------------------------------------------------------------------------- /Knapsack/Trainer/comb_solver.py: -------------------------------------------------------------------------------- 1 | from ortools.linear_solver import pywraplp 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import cvxpy as cp 6 | import cvxpylayers 7 | from cvxpylayers.torch import CvxpyLayer 8 | from qpth.qp import QPFunction 9 | 10 | 11 | class knapsack_solver: 12 | def __init__(self, weights,capacity,n_items): 13 | self.weights= weights 14 | self.capacity = capacity 15 | self.n_items = n_items 16 | self.make_model() 17 | def make_model(self): 18 | solver = pywraplp.Solver.CreateSolver('SCIP') 19 | x = {} 20 | for i in range(self.n_items): 21 | x[i] = solver.BoolVar(f'x_{i}') 22 | solver.Add( sum(x[i] * self.weights[i] for i in range(self.n_items)) <= self.capacity) 23 | 24 | 25 | self.x = x 26 | self.solver = solver 27 | def solve(self,y): 28 | y= y.astype(np.float64) 29 | x = self.x 30 | solver = self.solver 31 | 32 | objective = solver.Objective() 33 | for i in range(self.n_items): 34 | objective.SetCoefficient(x[i],y[i]) 35 | objective.SetMaximization() 36 | status = solver.Solve() 37 | 38 | if status == pywraplp.Solver.OPTIMAL: 39 | sol = np.zeros(self.n_items) 40 | for i in range(self.n_items): 41 | sol[i]= x[i].solution_value() 42 | return sol 43 | else: 44 | raise Exception("No soluton found") 45 | 46 | class cvx_knapsack_solver(nn.Module): 47 | def __init__(self, weights,capacity,n_items, mu=1.): 48 | super().__init__() 49 | self.weights= weights 50 | self.capacity = capacity 51 | self.n_items = n_items 52 | A = weights.reshape(1,-1).astype(np.float32) 53 | b = capacity 54 | x = cp.Variable(n_items) 55 | c = cp.Parameter(n_items) 56 | constraints = [x >= 0,x<=1,A @ x <= b] 57 | objective = cp.Maximize(c @ x - mu*cp.pnorm(x, p=2)) #cp.pnorm(A @ x - b, p=1) 58 | problem = cp.Problem(objective, constraints) 59 | self.layer = CvxpyLayer(problem, parameters=[c], variables=[x]) 60 | def forward(self,costs): 61 | sol, = self.layer(costs) 62 | 63 | return sol 64 | 65 | 66 | 67 | from intopt.intopt import intopt 68 | class intopt_knapsack_solver(nn.Module): 69 | def __init__(self, weights,capacity,n_items, thr=0.1,damping=1e-3, diffKKT = False, dopresolve = True,): 70 | super().__init__() 71 | self.weights= weights 72 | self.capacity = capacity 73 | self.n_items = n_items 74 | A = weights.reshape(1,-1).astype(np.float32) 75 | b = np.array([capacity]).astype(np.float32) 76 | A_lb = -np.eye(n_items).astype(np.float32) 77 | b_lb = np.zeros(n_items).astype(np.float32) 78 | A_ub = np.eye(n_items).astype(np.float32) 79 | b_ub = np.ones(n_items).astype(np.float32) 80 | 81 | # G = np.concatenate((A_lb, A_ub ), axis=0).astype(np.float32) 82 | # h = np.concatenate(( b_lb, b_ub )).astype(np.float32) 83 | self.A, self.b,self.G, self.h = torch.from_numpy(A), torch.from_numpy(b), torch.from_numpy(A_ub), torch.from_numpy(b_ub) 84 | self.thr =thr 85 | self.damping = damping 86 | self.layer = intopt(self.A, self.b,self.G, self.h, thr, damping, diffKKT, dopresolve) 87 | 88 | def forward(self,costs): 89 | return self.layer(-costs) 90 | 91 | # sol = [self.layer(-cost) for cost in costs] 92 | 93 | 94 | 95 | 96 | # return torch.stack(sol) 97 | -------------------------------------------------------------------------------- /Knapsack/Trainer/data_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | from torch.utils.data import Dataset, DataLoader 4 | import pytorch_lightning as pl 5 | import torch 6 | from sklearn.preprocessing import StandardScaler 7 | import sklearn 8 | from Trainer.comb_solver import knapsack_solver 9 | 10 | class Datawrapper(): 11 | def __init__(self, X,y, sol=None,solver=None): 12 | assert (sol is not None) or (solver is not None) 13 | self.X = X.astype(np.float32) 14 | self.y = y.astype(np.float32) 15 | if sol is None: 16 | sol = [] 17 | for i in range(len(y)): 18 | sol.append( solver.solve(y[i]) ) 19 | sol = np.array (sol).astype(np.float32) 20 | self.sol = sol 21 | 22 | def __len__(self): 23 | return len(self.y) 24 | 25 | def __getitem__(self, idx): 26 | return self.X[idx],self.y[idx], self.sol[idx] 27 | 28 | 29 | class KnapsackDataModule(pl.LightningDataModule): 30 | def __init__(self,capacity, standardize=True, batch_size=70, generator=None,num_workers=8, seed=0): 31 | super().__init__() 32 | 33 | data = np.load('Trainer/Data.npz') 34 | weights = data['weights'] 35 | weights = np.array(weights) 36 | n_items = len(weights) 37 | x_train, x_test, y_train,y_test = data['X_1gtrain'],data['X_1gtest'],data['y_train'],data['y_test'] 38 | x_train = x_train[:,1:] 39 | x_test = x_test[:,1:] 40 | if standardize: 41 | scaler = StandardScaler() 42 | x_train = scaler.fit_transform(x_train) 43 | x_test = scaler.transform(x_test) 44 | x_train = x_train.reshape(-1,48,x_train.shape[1]) 45 | y_train = y_train.reshape(-1,48) 46 | x_test = x_test.reshape(-1,48,x_test.shape[1]) 47 | y_test = y_test.reshape(-1,48) 48 | x = np.concatenate((x_train, x_test), axis=0) 49 | y = np.concatenate((y_train,y_test), axis=0) 50 | x,y = sklearn.utils.shuffle(x,y,random_state=seed) 51 | x_train, y_train = x[:550], y[:550] 52 | x_valid, y_valid = x[550:650], y[550:650] 53 | x_test, y_test = x[650:], y[650:] 54 | 55 | solver = knapsack_solver(weights,capacity= capacity, n_items= len(weights) ) 56 | 57 | self.train_df = Datawrapper( x_train,y_train,solver=solver) 58 | self.valid_df = Datawrapper( x_valid, y_valid,solver=solver ) 59 | self.test_df = Datawrapper( x_test, y_test,solver=solver ) 60 | self.train_solutions= self.train_df.sol 61 | 62 | self.batch_size = batch_size 63 | self.generator = generator 64 | self.num_workers = num_workers 65 | 66 | self.weights, self.n_items = weights, n_items 67 | 68 | def train_dataloader(self): 69 | return DataLoader(self.train_df, batch_size=self.batch_size,generator= self.generator, num_workers=self.num_workers) 70 | 71 | def val_dataloader(self): 72 | return DataLoader(self.valid_df, batch_size=self.batch_size,generator= self.generator, num_workers=self.num_workers) 73 | 74 | def test_dataloader(self): 75 | return DataLoader(self.test_df, batch_size=self.batch_size,generator= self.generator, num_workers=self.num_workers) -------------------------------------------------------------------------------- /Knapsack/Trainer/diff_layer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from Trainer.utils import batch_solve 5 | 6 | def SPOlayer(solver,minimize=False): 7 | mm = 1 if minimize else -1 8 | class SPOlayer_cls(torch.autograd.Function): 9 | @staticmethod 10 | def forward(ctx, y_hat,y_true,sol_true ): 11 | sol_hat = batch_solve(solver, y_hat) 12 | 13 | ctx.save_for_backward(y_hat,y_true,sol_true) 14 | 15 | return ( mm*(sol_hat -sol_true)*y_true).sum() 16 | 17 | @staticmethod 18 | def backward(ctx, grad_output): 19 | y_hat,y_true,sol_true = ctx.saved_tensors 20 | y_spo = 2*y_hat - y_true 21 | sol_spo = batch_solve(solver,y_spo) 22 | return (sol_true - sol_spo)*mm, None, None 23 | return SPOlayer_cls.apply 24 | 25 | 26 | def DBBlayer(solver,lambda_val=1., minimize=False): 27 | mm = 1 if minimize else -1 28 | class DBBlayer_cls(torch.autograd.Function): 29 | @staticmethod 30 | def forward(ctx, y_hat,y_true,sol_true ): 31 | sol_hat = batch_solve(solver, y_hat) 32 | 33 | ctx.save_for_backward(y_hat,y_true,sol_true, sol_hat) 34 | 35 | return sol_hat 36 | 37 | @staticmethod 38 | def backward(ctx, grad_output): 39 | """ 40 | In the backward pass we compute gradient to minimize regret 41 | """ 42 | y_hat,y_true,sol_true, sol_hat= ctx.saved_tensors 43 | y_perturbed = y_hat + mm * lambda_val* grad_output 44 | sol_perturbed = batch_solve(solver, y_perturbed) 45 | 46 | return -mm*(sol_hat - sol_perturbed)/lambda_val, None, None 47 | return DBBlayer_cls.apply -------------------------------------------------------------------------------- /Knapsack/Trainer/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | def batch_solve(solver, y): 6 | 7 | sol = [] 8 | for i in range(len(y)): 9 | sol.append( solver.solve(y[i].detach().numpy()) ) 10 | return torch.tensor(sol).float() 11 | 12 | def regret_list(solver, y_hat,y_true, sol_true, minimize=False): 13 | mm = 1 if minimize else -1 14 | sol_hat = batch_solve(solver,y_hat) 15 | 16 | return ((mm*(sol_hat - sol_true)*y_true).sum(1)) /(( sol_true*y_true).sum(1)) 17 | 18 | 19 | def regret_fn(solver, y_hat,y_true, sol_true, minimize=False): 20 | 21 | 22 | return regret_list(solver,y_hat,y_true,sol_true,minimize).mean() 23 | 24 | 25 | def abs_regret_list(solver, y_hat,y_true, sol_true, minimize=False): 26 | mm = 1 if minimize else -1 27 | sol_hat = batch_solve(solver,y_hat) 28 | 29 | return ((mm*(sol_hat - sol_true)*y_true).sum(1)) 30 | 31 | def abs_regret_fn(solver, y_hat,y_true, sol_true, minimize=False): 32 | 33 | 34 | return abs_regret_list(solver,y_hat,y_true,sol_true,minimize).mean() 35 | 36 | def growpool_fn(solver,cache, y_hat): 37 | ''' 38 | cache is torch array [currentpoolsize,48] 39 | y_hat is torch array [batch_size,48] 40 | ''' 41 | sol = batch_solve(solver,y_hat).detach().numpy() 42 | cache_np = cache.detach().numpy() 43 | cache_np = np.unique(np.append(cache_np,sol,axis=0),axis=0) 44 | # torch has no unique function, so we have to do this 45 | return torch.from_numpy(cache_np).float() -------------------------------------------------------------------------------- /Knapsack/config.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "model": "IntOpt", 4 | "capacity": 60, 5 | "lr": 0.5, 6 | "thr": 0.01, 7 | "damping": 10.0 8 | }, 9 | { 10 | "model": "IntOpt", 11 | "capacity": 120, 12 | "lr": 0.5, 13 | "thr": 0.1, 14 | "damping": 10.0 15 | }, 16 | { 17 | "model": "IntOpt", 18 | "capacity": 180, 19 | "lr": 1.0, 20 | "thr": 0.01, 21 | "damping": 0.1 22 | }, 23 | { 24 | "model": "DCOL", 25 | "capacity": 60, 26 | "lr": 0.5, 27 | "mu": 10.0 28 | }, 29 | { 30 | "model": "DCOL", 31 | "capacity": 120, 32 | "lr": 0.5, 33 | "mu": 1.0 34 | }, 35 | { 36 | "model": "DCOL", 37 | "capacity": 180, 38 | "lr": 0.5, 39 | "mu": 0.1 40 | }, 41 | { 42 | "model": "baseline_mse", 43 | "capacity": 60, 44 | "lr": 0.5 45 | }, 46 | { 47 | "model": "baseline_mse", 48 | "capacity": 120, 49 | "lr": 1.0 50 | }, 51 | { 52 | "model": "baseline_mse", 53 | "capacity": 180, 54 | "lr": 1.0 55 | }, 56 | { 57 | "model": "SPO", 58 | "capacity": 60, 59 | "lr": 0.5 60 | }, 61 | { 62 | "model": "SPO", 63 | "capacity": 120, 64 | "lr": 1.0 65 | }, 66 | { 67 | "model": "SPO", 68 | "capacity": 180, 69 | "lr": 1.0 70 | }, 71 | { 72 | "model": "DBB", 73 | "capacity": 60, 74 | "lr": 0.5, 75 | "lambda_val": 0.1 76 | }, 77 | { 78 | "model": "DBB", 79 | "capacity": 120, 80 | "lr": 1.0, 81 | "lambda_val": 1.0 82 | }, 83 | { 84 | "model": "DBB", 85 | "capacity": 180, 86 | "lr": 0.5, 87 | "lambda_val": 1.0 88 | }, 89 | { 90 | "model": "FenchelYoung", 91 | "capacity": 60, 92 | "lr": 1.0, 93 | "sigma": 0.005 94 | }, 95 | { 96 | "model": "FenchelYoung", 97 | "capacity": 120, 98 | "lr": 1, 99 | "sigma": 0.5 100 | }, 101 | { 102 | "model": "FenchelYoung", 103 | "capacity": 180, 104 | "lr": 0.5, 105 | "sigma": 0.5 106 | }, 107 | { 108 | "model": "IMLE", 109 | "capacity": 60, 110 | "lr": 0.5, 111 | "beta": 0.1, 112 | "temperature": 0.5, 113 | "k": 5 114 | }, 115 | { 116 | "model": "IMLE", 117 | "capacity": 120, 118 | "lr": 0.5, 119 | "beta": 0.1, 120 | "temperature": 0.1, 121 | "k": 5 122 | }, 123 | { 124 | "model": "IMLE", 125 | "capacity": 180, 126 | "lr": 0.5, 127 | "beta": 0.1, 128 | "temperature": 5.0, 129 | "k": 5 130 | }, 131 | { 132 | "model": "CachingPO", 133 | "loss": "MAP_c", 134 | "capacity": 60, 135 | "lr": 1.0 136 | }, 137 | { 138 | "model": "CachingPO", 139 | "loss": "MAP_c", 140 | "capacity": 120, 141 | "lr": 1.0 142 | }, 143 | { 144 | "model": "CachingPO", 145 | "loss": "MAP_c", 146 | "capacity": 180, 147 | "lr": 1.0 148 | }, 149 | { 150 | "model": "CachingPO", 151 | "loss": "pairwise_diff", 152 | "capacity": 60, 153 | "lr": 1.0 154 | }, 155 | { 156 | "model": "CachingPO", 157 | "loss": "pairwise_diff", 158 | "capacity": 120, 159 | "lr": 1.0 160 | }, 161 | { 162 | "model": "CachingPO", 163 | "loss": "pairwise_diff", 164 | "capacity": 180, 165 | "lr": 1.0 166 | }, 167 | { 168 | "model": "CachingPO", 169 | "loss": "pairwise", 170 | "capacity": 60, 171 | "lr": 0.5, 172 | "tau": 10 173 | }, 174 | { 175 | "model": "CachingPO", 176 | "loss": "pairwise", 177 | "capacity": 120, 178 | "lr": 0.5, 179 | "tau": 10 180 | }, 181 | { 182 | "model": "CachingPO", 183 | "loss": "pairwise", 184 | "capacity": 180, 185 | "lr": 0.5, 186 | "tau": 10 187 | }, 188 | { 189 | "model": "CachingPO", 190 | "loss": "listwise", 191 | "capacity": 60, 192 | "lr": 1, 193 | "tau": 0.001 194 | }, 195 | { 196 | "model": "CachingPO", 197 | "loss": "listwise", 198 | "capacity": 120, 199 | "lr": 1, 200 | "tau": 0.001 201 | }, 202 | { 203 | "model": "CachingPO", 204 | "loss": "listwise", 205 | "capacity": 180, 206 | "lr": 0.5, 207 | "tau": 0.0001 208 | } 209 | ] -------------------------------------------------------------------------------- /Knapsack/imle/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PredOpt/predopt-benchmarks/1a8e048c5aa640f73a05e29878b8e1e8f6f73610/Knapsack/imle/__init__.py -------------------------------------------------------------------------------- /Knapsack/imle/noise.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import math 4 | 5 | import torch 6 | from torch import Tensor, Size 7 | from torch.distributions.gamma import Gamma 8 | 9 | from abc import ABC, abstractmethod 10 | 11 | from typing import Optional 12 | 13 | import logging 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | class BaseNoiseDistribution(ABC): 19 | def __init__(self): 20 | super().__init__() 21 | 22 | @abstractmethod 23 | def sample(self, 24 | shape: Size) -> Tensor: 25 | raise NotImplementedError 26 | 27 | 28 | class SumOfGammaNoiseDistribution(BaseNoiseDistribution): 29 | r""" 30 | Creates a generator of samples for the Sum-of-Gamma distribution [1], parameterized 31 | by :attr:`k`, :attr:`nb_iterations`, and :attr:`device`. 32 | 33 | [1] Mathias Niepert, Pasquale Minervini, Luca Franceschi - Implicit MLE: Backpropagating Through Discrete 34 | Exponential Family Distributions. NeurIPS 2021 (https://arxiv.org/abs/2106.01798) 35 | 36 | Example:: 37 | 38 | >>> import torch 39 | >>> noise_distribution = SumOfGammaNoiseDistribution(k=5, nb_iterations=100) 40 | >>> noise_distribution.sample(torch.Size([5])) 41 | tensor([ 0.2504, 0.0112, 0.5466, 0.0051, -0.1497]) 42 | 43 | Args: 44 | k (float): k parameter -- see [1] for more details. 45 | nb_iterations (int): number of iterations for estimating the sample. 46 | device (torch.devicde): device where to store samples. 47 | """ 48 | def __init__(self, 49 | k: float, 50 | nb_iterations: int = 10, 51 | device: Optional[torch.device] = None): 52 | super().__init__() 53 | self.k = k 54 | self.nb_iterations = nb_iterations 55 | self.device = device 56 | 57 | def sample(self, 58 | shape: Size) -> Tensor: 59 | samples = torch.zeros(size=shape, device=self.device) 60 | for i in range(1, self.nb_iterations + 1): 61 | concentration = torch.tensor(1. / self.k, device=self.device) 62 | rate = torch.tensor(i / self.k, device=self.device) 63 | 64 | gamma = Gamma(concentration=concentration, rate=rate) 65 | samples = samples + gamma.sample(sample_shape=shape).to(self.device) 66 | samples = (samples - math.log(self.nb_iterations)) / self.k 67 | return samples.to(self.device) 68 | -------------------------------------------------------------------------------- /Knapsack/imle/target.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from torch import Tensor 4 | from abc import ABC, abstractmethod 5 | 6 | import logging 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class BaseTargetDistribution(ABC): 12 | def __init__(self): 13 | super().__init__() 14 | 15 | @abstractmethod 16 | def params(self, 17 | theta: Tensor, 18 | dy: Tensor) -> Tensor: 19 | raise NotImplementedError 20 | 21 | 22 | class TargetDistribution(BaseTargetDistribution): 23 | r""" 24 | Creates a generator of target distributions parameterized by :attr:`alpha` and :attr:`beta`. 25 | 26 | Example:: 27 | 28 | >>> import torch 29 | >>> target_distribution = TargetDistribution(alpha=1.0, beta=1.0) 30 | >>> target_distribution.params(theta=torch.tensor([1.0]), dy=torch.tensor([1.0])) 31 | tensor([2.]) 32 | 33 | Args: 34 | alpha (float): weight of the initial distribution parameters theta 35 | beta (float): weight of the downstream gradient dy 36 | """ 37 | def __init__(self, 38 | alpha: float = 1.0, 39 | beta: float = 1.0): 40 | super().__init__() 41 | self.alpha = alpha 42 | self.beta = beta 43 | 44 | def params(self, 45 | theta: Tensor, 46 | dy: Tensor) -> Tensor: 47 | theta_prime = self.alpha * theta - self.beta * dy 48 | return theta_prime 49 | -------------------------------------------------------------------------------- /Knapsack/imle/wrapper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import functools 4 | 5 | import torch 6 | from torch import Tensor 7 | 8 | from imle.noise import BaseNoiseDistribution 9 | from imle.target import BaseTargetDistribution, TargetDistribution 10 | 11 | from typing import Callable, Optional 12 | 13 | import logging 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | def imle(function: Callable[[Tensor], Tensor] = None, 19 | target_distribution: Optional[BaseTargetDistribution] = None, 20 | noise_distribution: Optional[BaseNoiseDistribution] = None, 21 | nb_samples: int = 1, 22 | input_noise_temperature: float = 1.0, 23 | target_noise_temperature: float = 1.0): 24 | r"""Turns a black-box combinatorial solver in an Exponential Family distribution via Perturb-and-MAP and I-MLE [1]. 25 | 26 | The input function (solver) needs to return the solution to the problem of finding a MAP state for a constrained 27 | exponential family distribution -- this is the case for most black-box combinatorial solvers [2]. If this condition 28 | is violated though, the result would not hold and there is no guarantee on the validity of the obtained gradients. 29 | 30 | This function can be used directly or as a decorator. 31 | 32 | [1] Mathias Niepert, Pasquale Minervini, Luca Franceschi - Implicit MLE: Backpropagating Through Discrete 33 | Exponential Family Distributions. NeurIPS 2021 (https://arxiv.org/abs/2106.01798) 34 | [2] Marin Vlastelica, Anselm Paulus, Vít Musil, Georg Martius, Michal Rolínek - Differentiation of Blackbox 35 | Combinatorial Solvers. ICLR 2020 (https://arxiv.org/abs/1912.02175) 36 | 37 | Example:: 38 | 39 | >>> from imle.wrapper import imle 40 | >>> from imle.target import TargetDistribution 41 | >>> from imle.noise import SumOfGammaNoiseDistribution 42 | >>> target_distribution = TargetDistribution(alpha=0.0, beta=10.0) 43 | >>> noise_distribution = SumOfGammaNoiseDistribution(k=21, nb_iterations=100) 44 | >>> @imle(target_distribution=target_distribution, noise_distribution=noise_distribution, nb_samples=100, 45 | >>> input_noise_temperature=input_noise_temperature, target_noise_temperature=5.0) 46 | >>> def imle_solver(weights_batch: Tensor) -> Tensor: 47 | >>> return torch_solver(weights_batch) 48 | 49 | Args: 50 | function (Callable[[Tensor], Tensor]): black-box combinatorial solver 51 | target_distribution (Optional[BaseTargetDistribution]): factory for target distributions 52 | noise_distribution (Optional[BaseNoiseDistribution]): noise distribution 53 | nb_samples (int): number of noise sammples 54 | input_noise_temperature (float): noise temperature for the input distribution 55 | target_noise_temperature (float): noise temperature for the target distribution 56 | """ 57 | if target_distribution is None: 58 | target_distribution = TargetDistribution(alpha=1.0, beta=1.0) 59 | 60 | if function is None: 61 | return functools.partial(imle, 62 | target_distribution=target_distribution, 63 | noise_distribution=noise_distribution, 64 | nb_samples=nb_samples, 65 | input_noise_temperature=input_noise_temperature, 66 | target_noise_temperature=target_noise_temperature) 67 | 68 | @functools.wraps(function) 69 | def wrapper(input: Tensor, *args): 70 | class WrappedFunc(torch.autograd.Function): 71 | 72 | @staticmethod 73 | def forward(ctx, input: Tensor, *args): 74 | # [BATCH_SIZE, ...] 75 | input_shape = input.shape 76 | 77 | batch_size = input_shape[0] 78 | instance_shape = input_shape[1:] 79 | 80 | # [BATCH_SIZE, N_SAMPLES, ...] 81 | perturbed_input_shape = [batch_size, nb_samples] + list(instance_shape) 82 | 83 | if noise_distribution is None: 84 | noise = torch.zeros(size=perturbed_input_shape) 85 | else: 86 | noise = noise_distribution.sample(shape=torch.Size(perturbed_input_shape)) 87 | 88 | input_noise = noise * input_noise_temperature 89 | 90 | # [BATCH_SIZE, N_SAMPLES, ...] 91 | perturbed_input_3d = input.view(batch_size, 1, -1).repeat(1, nb_samples, 1).view(perturbed_input_shape) 92 | perturbed_input_3d = perturbed_input_3d + input_noise 93 | 94 | # [BATCH_SIZE * N_SAMPLES, ...] 95 | perturbed_input_2d = perturbed_input_3d.view([-1] + perturbed_input_shape[2:]) 96 | perturbed_input_2d_shape = perturbed_input_2d.shape 97 | 98 | # [BATCH_SIZE * N_SAMPLES, ...] 99 | perturbed_output = function(perturbed_input_2d) 100 | # [BATCH_SIZE, N_SAMPLES, ...] 101 | perturbed_output = perturbed_output.view(perturbed_input_shape) 102 | 103 | ctx.save_for_backward(input, noise, perturbed_output) 104 | 105 | # [BATCH_SIZE * N_SAMPLES, ...] 106 | # res = perturbed_output.view(perturbed_input_2d_shape) 107 | #### New line added 108 | res = perturbed_output.mean(dim=1) 109 | return res 110 | 111 | @staticmethod 112 | def backward(ctx, dy): 113 | # input: [BATCH_SIZE, ...] 114 | # noise: [BATCH_SIZE, N_SAMPLES, ...] 115 | # perturbed_output_3d: # [BATCH_SIZE, N_SAMPLES, ...] 116 | input, noise, perturbed_output_3d = ctx.saved_variables 117 | 118 | input_shape = input.shape 119 | batch_size = input_shape[0] 120 | instance_shape = input_shape[1:] 121 | 122 | 123 | #### New line added 124 | dy = dy.view(batch_size, 1, -1).repeat(1,nb_samples, 1).view([batch_size*nb_samples] +list(instance_shape)) 125 | 126 | 127 | # dy is [BATCH_SIZE * N_SAMPLES, ...] 128 | dy_shape = dy.shape 129 | # noise is [BATCH_SIZE, N_SAMPLES, ...] 130 | noise_shape = noise.shape 131 | 132 | # [BATCH_SIZE * NB_SAMPLES, ...] 133 | input_2d = input.view(batch_size, 1, -1).repeat(1, nb_samples, 1).view(dy_shape) 134 | target_input_2d = target_distribution.params(input_2d, dy) 135 | 136 | # [BATCH_SIZE, NB_SAMPLES, ...] 137 | target_input_3d = target_input_2d.view(noise_shape) 138 | 139 | # [BATCH_SIZE, NB_SAMPLES, ...] 140 | target_noise = noise * target_noise_temperature 141 | 142 | # [BATCH_SIZE, N_SAMPLES, ...] 143 | perturbed_target_input_3d = target_input_3d + target_noise 144 | 145 | # [BATCH_SIZE * N_SAMPLES, ...] 146 | perturbed_target_input_2d = perturbed_target_input_3d.view(dy_shape) 147 | 148 | # [BATCH_SIZE * N_SAMPLES, ...] 149 | target_output_2d = function(perturbed_target_input_2d) 150 | 151 | # [BATCH_SIZE, N_SAMPLES, ...] 152 | target_output_3d = target_output_2d.view(noise_shape) 153 | 154 | # [BATCH_SIZE, ...] 155 | gradient = (perturbed_output_3d - target_output_3d) 156 | gradient = gradient.mean(axis=1) 157 | return gradient 158 | 159 | return WrappedFunc.apply(input, *args) 160 | return wrapper 161 | -------------------------------------------------------------------------------- /Knapsack/readme.md: -------------------------------------------------------------------------------- 1 | This directory corresponds to the Knapsack problem. 2 | 3 | The data is included in `Trainer/Data.npz` 4 | To run an experiment use `testknapsack.py`. 5 | To reproduce the result of expriements run 6 | ``` 7 | python testknapsack.py --scheduler True 8 | ``` -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 PredOpt-Benchmarks 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Matching/DPO/fenchel_young.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # 3 | # Modifications from original work 4 | # 29-03-2021 (tuero@ualberta.ca) : Convert Tensorflow code to PyTorch 5 | # 6 | # Copyright 2021 The Google Research Authors. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | 20 | # Lint as: python3 21 | """Implementation of a Fenchel-Young loss using perturbation techniques.""" 22 | 23 | import torch 24 | import torch.nn as nn 25 | 26 | from DPO import perturbations 27 | 28 | 29 | class PerturbedFunc(torch.autograd.Function): 30 | """Implementation of a Fenchel Young loss.""" 31 | @staticmethod 32 | def forward(ctx, input_tensor, y_true, perturbed, batched, maximize, *args): 33 | diff = perturbed(input_tensor, *args) - y_true.type(input_tensor.dtype) 34 | if not maximize: 35 | diff = -diff 36 | # Computes per-example loss for batched inputs. 37 | if batched: 38 | loss = torch.sum(torch.reshape(diff, [list(diff.shape)[0], -1]) ** 2, dim=-1) 39 | else: # Computes loss for unbatched inputs. 40 | loss = torch.sum(diff ** 2) 41 | ctx.save_for_backward(diff) 42 | ctx.batched = batched 43 | return loss 44 | 45 | @staticmethod 46 | def backward(ctx, dy): 47 | diff, = ctx.saved_tensors 48 | batched = ctx.batched 49 | if batched: # dy has shape (batch_size,) in this case. 50 | dy = torch.reshape(dy, [list(dy.shape)[0]] + (diff.dim() - 1) * [1]) 51 | return dy * diff, None, None, None, None # original 52 | # return diff, None, None, None, None 53 | 54 | 55 | class FenchelYoungLoss(nn.Module): 56 | def __init__(self, 57 | func = None, 58 | num_samples = 1000, 59 | sigma = 0.01, 60 | noise = perturbations._GUMBEL, 61 | batched = True, 62 | maximize = True, 63 | device=None): 64 | """Initializes the Fenchel-Young loss. 65 | 66 | Args: 67 | func: the function whose argmax is to be differentiated by perturbation. 68 | num_samples: (int) the number of perturbed inputs. 69 | sigma: (float) the amount of noise to be considered 70 | noise: (str) the noise distribution to be used to sample perturbations. 71 | batched: whether inputs to the func will have a leading batch dimension 72 | (True) or consist of a single example (False). Defaults to True. 73 | maximize: (bool) whether to maximize or to minimize the input function. 74 | device: The device to create tensors on (cpu/gpu). If None given, it will 75 | default to gpu:0 if available, cpu otherwise. 76 | """ 77 | super().__init__() 78 | self._batched = batched 79 | self._maximize = maximize 80 | self.func = func 81 | self.perturbed = perturbations.perturbed(func=func, 82 | num_samples=num_samples, 83 | sigma=sigma, 84 | noise=noise, 85 | batched=batched, 86 | device=device) 87 | 88 | def forward(self, input_tensor, y_true, *args): 89 | return PerturbedFunc.apply(input_tensor, y_true, self.perturbed, self._batched, self._maximize, *args) 90 | 91 | -------------------------------------------------------------------------------- /Matching/Trainer/CacheLosses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import torch.nn.functional as F 4 | ###################################### NCE Loss Functions ######################################### 5 | class NCE(torch.nn.Module): 6 | def __init__(self, minimize=False): 7 | super().__init__() 8 | self.mm = 1 if minimize else -1 9 | def forward(self, y_hat,y_true, sol_true,cache): 10 | 11 | loss = 0 12 | mm = self.mm 13 | ## print("shape to be preinted: ") 14 | # print(sol_true.shape, cache.shape, y_hat.shape) 15 | ### torch.Size([B, 2500]) torch.Size([|S|, 2500]) torch.Size([B, 2500]) 16 | 17 | for ii in range(len( y_hat )): 18 | loss += ( ( mm* ( sol_true[ii] - cache )*y_hat[ii] ).sum(dim=(1)) ).mean() 19 | loss /= len(y_hat) 20 | return loss 21 | 22 | class NCE_c(torch.nn.Module): 23 | def __init__(self, minimize=False): 24 | super().__init__() 25 | self.mm = 1 if minimize else -1 26 | def forward(self, y_hat,y_true, sol_true,cache): 27 | 28 | loss = 0 29 | mm = self.mm 30 | for ii in range(len( y_hat )): 31 | loss += ( ( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii]) ).sum(dim=(1)) ).mean() 32 | loss /= len(y_hat) 33 | return loss 34 | 35 | 36 | class MAP(torch.nn.Module): 37 | def __init__(self, minimize=False): 38 | super().__init__() 39 | self.mm = 1 if minimize else -1 40 | def forward(self, y_hat,y_true,sol_true,cache): 41 | 42 | loss = 0 43 | mm = self.mm 44 | 45 | for ii in range(len( y_hat )): 46 | loss += (( mm* ( sol_true[ii] - cache )*y_hat[ii] ).sum(dim=(1)) ).max() 47 | loss /= len(y_hat) 48 | return loss 49 | 50 | 51 | class MAP_c(torch.nn.Module): 52 | def __init__(self, minimize=False): 53 | super().__init__() 54 | self.mm = 1 if minimize else -1 55 | def forward(self, y_hat,y_true,sol_true,cache): 56 | 57 | loss = 0 58 | mm = self.mm 59 | 60 | for ii in range(len( y_hat )): 61 | loss += (( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii]) ).sum(dim=(1)) ).max() 62 | loss /= len(y_hat) 63 | return loss 64 | 65 | 66 | class MAP_c_actual(torch.nn.Module): 67 | def __init__(self, minimize=False): 68 | super().__init__() 69 | self.mm = 1 if minimize else -1 70 | def forward(self, y_hat,y_true,sol_true,cache): 71 | 72 | loss = 0 73 | mm = self.mm 74 | 75 | for ii in range(len( y_hat )): 76 | 77 | loss += (( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii]) ).sum(dim=(1)) ).max() 78 | loss /= len(y_hat) 79 | return loss 80 | 81 | ###################################### Ranking Loss Functions ######################################### 82 | class PointwiseLoss(torch.nn.Module): 83 | def __init__(self): 84 | super().__init__() 85 | def forward(self, y_hat,y_true,sol_true,cache): 86 | ''' 87 | pred_weights: predicted cost vector [batch_size, img,img] 88 | true_weights: actua cost vector [batch_size, img,img] 89 | target: true shortest path [batch_size, img,img] 90 | cache: cache is torch array [cache_size, img,img] 91 | ''' 92 | loss = 0 93 | 94 | for ii in range(len( y_hat )): 95 | loss += ((cache*y_hat[ii])-(cache*y_true[ii])).square().mean() 96 | loss /= len(y_hat) 97 | 98 | return loss 99 | class ListwiseLoss(torch.nn.Module): 100 | def __init__(self, temperature=0., minimize=False): 101 | super().__init__() 102 | self.temperature = temperature 103 | self.mm = 1 if minimize else -1 104 | def forward(self, y_hat,y_true,sol_true,cache): 105 | 106 | loss = 0 107 | mm, temperature = self.mm, self.temperature 108 | 109 | for ii in range(len( y_hat )): 110 | loss += - ( F.log_softmax((-mm*y_hat[ii]*cache/temperature).sum(dim=(1)),dim=0) * F.softmax((-mm*y_true[ii]*cache/temperature).sum(dim=(1)),dim=0)).mean() 111 | loss /= len(y_hat) 112 | 113 | return loss 114 | 115 | 116 | class PairwisediffLoss(torch.nn.Module): 117 | def __init__(self, minimize=False): 118 | super().__init__() 119 | self.mm = 1 if minimize else -1 120 | 121 | def forward(self, y_hat,y_true,sol_true,cache): 122 | ''' 123 | pred_weights: predicted cost vector [batch_size, img,img] 124 | true_weights: actua cost vector [batch_size, img,img] 125 | target: true shortest path [batch_size, img,img] 126 | cache: cache is torch array [cache_size, img,img] 127 | ''' 128 | 129 | loss = 0 130 | for ii in range(len( y_hat )): 131 | _,indices= np.unique((self.mm*y_true[ii]*cache).sum(dim= (1)).detach().numpy(),return_index=True) 132 | 133 | big_ind = [indices[0] for p in range(len(indices)-1)] #good one 134 | small_ind = [indices[p+1] for p in range(len(indices)-1)] #bad one 135 | 136 | 137 | loss += ( ( cache[big_ind]*y_hat[ii] -cache[small_ind]*y_hat[ii]).sum(dim=(1)) - ( cache[big_ind]*y_true[ii] -cache[small_ind]*y_true[ii]).sum(dim=(1)) ).square().mean() 138 | loss /= len(y_hat) 139 | return loss 140 | 141 | class PairwiseLoss(torch.nn.Module): 142 | def __init__(self, margin=0., minimize=False): 143 | super().__init__() 144 | self.margin = margin 145 | self.mm = 1 if minimize else -1 146 | def forward(self, y_hat,y_true,sol_true,cache): 147 | ''' 148 | pred_weights: predicted cost vector [batch_size, img,img] 149 | true_weights: actua cost vector [batch_size, img,img] 150 | target: true shortest path [batch_size, img,img] 151 | cache: cache is torch array [cache_size, img,img] 152 | ''' 153 | relu = torch.nn.ReLU() 154 | loss = 0 155 | mm, margin = self.mm, self.margin 156 | for ii in range(len( y_hat )): 157 | _,indices= np.unique((self.mm*y_true[ii]*cache).sum(dim= (1)).detach().numpy(),return_index=True) 158 | 159 | big_ind = [indices[0] for p in range(len(indices)-1)] #good one 160 | small_ind = [indices[p+1] for p in range(len(indices)-1)] #bad one 161 | 162 | loss += relu( margin + mm*( cache[big_ind]*y_hat[ii] -cache[small_ind]*y_hat[ii] ).sum(dim=(1)) ).mean() 163 | loss /= len(y_hat) 164 | return loss -------------------------------------------------------------------------------- /Matching/Trainer/NNModels.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import DataLoader 3 | from torch.autograd import Variable 4 | import torch.nn.functional as F 5 | from torch import nn 6 | 7 | def cora_net(n_features=2866, n_hidden=200, n_layers=2, n_targets=1): 8 | if n_layers ==1: 9 | return nn.Sequential(nn.Linear(n_features, n_targets), nn.Sigmoid()) 10 | else: 11 | layers = [] 12 | # input layer 13 | layers.append(nn.Sequential( 14 | nn.Linear(n_features, n_hidden), 15 | nn.ReLU() 16 | )) 17 | # hidden layers 18 | for _ in range(n_layers -2) : 19 | layers.append(nn.Sequential( 20 | nn.Linear(n_hidden, n_hidden), 21 | nn.ReLU() 22 | )) 23 | # output layer 24 | layers.append(nn.Sequential( 25 | nn.Linear(n_hidden, n_targets), 26 | nn.Sigmoid() 27 | )) 28 | return nn.Sequential(*layers) 29 | 30 | def cora_normednet(n_features=2866, n_hidden=200, n_layers=2, n_targets=1): 31 | if n_layers ==1: 32 | return nn.Sequential(nn.Linear(n_features, n_targets), nn.Sigmoid()) 33 | else: 34 | layers = [] 35 | # input layer 36 | layers.append(nn.Sequential( 37 | nn.Linear(n_features, n_hidden), 38 | nn.ReLU(),nn.BatchNorm1d(2500) 39 | )) 40 | # hidden layers 41 | for _ in range(n_layers -2) : 42 | layers.append(nn.Sequential( 43 | nn.Linear(n_hidden, n_hidden), 44 | nn.ReLU(),nn.BatchNorm1d(2500) 45 | )) 46 | # output layer 47 | layers.append(nn.Sequential( 48 | nn.Linear(n_hidden, n_targets) 49 | # nn.Sigmoid() 50 | )) 51 | return nn.Sequential(*layers) 52 | 53 | def cora_nosigmoidnet(n_features=2866, n_hidden=200, n_layers=2, n_targets=1): 54 | if n_layers ==1: 55 | return nn.Sequential(nn.Linear(n_features, n_targets), nn.Sigmoid()) 56 | else: 57 | layers = [] 58 | # input layer 59 | layers.append(nn.Sequential( 60 | nn.Linear(n_features, n_hidden), 61 | nn.ReLU() 62 | )) 63 | # hidden layers 64 | for _ in range(n_layers -2) : 65 | layers.append(nn.Sequential( 66 | nn.Linear(n_hidden, n_hidden), 67 | nn.ReLU() 68 | )) 69 | # output layer 70 | layers.append(nn.Sequential( 71 | nn.Linear(n_hidden, n_targets) 72 | )) 73 | return nn.Sequential(*layers) -------------------------------------------------------------------------------- /Matching/Trainer/bipartite.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | import pickle 4 | import copy 5 | from tqdm.auto import tqdm 6 | import sys 7 | from ortools.graph import pywrapgraph 8 | from ortools.linear_solver import pywraplp 9 | import torch 10 | 11 | def linearobj(x,v, **params): 12 | return 13 | 14 | def bmatching(preds, mult=1000, **kwargs): 15 | assignment = pywrapgraph.LinearSumAssignment() 16 | cost = -preds.reshape(50,50)*mult 17 | n1 = len(cost) 18 | n2 = len(cost[0]) 19 | for i in range(n1): 20 | for j in range(n2): 21 | assignment.AddArcWithCost(i, j, int(cost[i,j])) 22 | solve_status = assignment.Solve() 23 | solution = np.zeros((50,50)) 24 | for i in range(assignment.NumNodes()): 25 | mate = assignment.RightMate(i) 26 | solution[i,mate] = 1 27 | return solution.reshape(-1) 28 | 29 | solver = pywraplp.Solver.CreateSolver('GLOP') 30 | # solver.SuppressOutput() 31 | 32 | class bmatching_diverse: 33 | def __init__(self,p=0.25, q=0.25, relaxation=False) -> None: 34 | self.p, self.q = p,q 35 | self.relaxation = relaxation 36 | def solve(self, preds, match_subs, **kwargs): 37 | p,q = self.p, self.q 38 | relaxation = self.relaxation 39 | 40 | solver.Clear() 41 | mult=1000 42 | cost = -preds.reshape(50,50)*mult 43 | m = match_subs.reshape(50,50) 44 | n1 = len(cost) 45 | n2 = len(cost[0]) 46 | x = {} 47 | for i in range(n1): 48 | for j in range(n2): 49 | x[i,j] = solver.NumVar(0,1,'') if relaxation else solver.IntVar(0,1,'') 50 | 51 | for i in range(n1): 52 | solver.Add(solver.Sum([x[i, j] for j in range(n2)]) <= 1) 53 | 54 | for j in range(n2): 55 | solver.Add(solver.Sum([x[i, j] for i in range(n1)]) <= 1) 56 | 57 | # pairing in same field 58 | pairing_same = [] 59 | allvars = [] 60 | for i in range(n1): 61 | for j in range(n2): 62 | pairing_same.append(x[i,j] * m[i,j]) 63 | allvars.append(x[i,j]) 64 | solver.Add(solver.Sum(pairing_same) >= p*solver.Sum(allvars)) 65 | 66 | # pairing in distinct field 67 | pairing_dis = [] 68 | for i in range(n1): 69 | for j in range(n2): 70 | pairing_dis.append(x[i,j] * (1-m[i,j])) 71 | solver.Add(solver.Sum(pairing_dis) >= q*solver.Sum(allvars)) 72 | 73 | obj = [] 74 | for i in range(n1): 75 | for j in range(n2): 76 | obj.append(cost[i,j] * x[i,j]) 77 | solver.Minimize(solver.Sum(obj)) 78 | 79 | status = solver.Solve() 80 | solution = np.zeros((50,50)) 81 | 82 | if status == pywraplp.Solver.OPTIMAL: 83 | for i in range(n1): 84 | for j in range(n2): 85 | solution[i,j] = x[i,j].solution_value() 86 | #solver.Clear() 87 | return solution.reshape(-1) 88 | 89 | def get_qpt_matrices(self, match_subs): 90 | p,q = self.p, self.q 91 | 92 | # we only have G * x <= h 93 | 94 | # Matching 95 | N1 = np.zeros((50,2500)) 96 | N2 = np.zeros_like(N1) 97 | b1 = np.ones(50) 98 | b2 = np.ones_like(b1) 99 | 100 | for i in range(50): 101 | rowmask = np.zeros((50,50)) 102 | colmask = np.zeros_like(rowmask) 103 | rowmask[i,:] = 1 104 | colmask[:,i] = 1 105 | N1[i] = rowmask.flatten() 106 | N2[i] = colmask.flatten() 107 | 108 | # Similarity constraint 109 | Sim = p - match_subs 110 | bsim = np.zeros(1) 111 | 112 | # Diversity constraint 113 | Div = q - 1 + match_subs 114 | bdiv = np.zeros_like(bsim) 115 | 116 | G = np.vstack((N1, N2, Sim, Div)) 117 | h = np.concatenate((b1, b2, bsim, bdiv)) 118 | A = torch.Tensor().float() 119 | b = torch.Tensor().float() 120 | return A,b, torch.from_numpy(G).float(), torch.from_numpy(h).float() 121 | 122 | 123 | 124 | 125 | # def get_qpt_matrices(match_subs, p=0.25, q=0.25, **kwargs): 126 | # # we only have G * x <= h 127 | 128 | # # Matching 129 | # N1 = np.zeros((50,2500)) 130 | # N2 = np.zeros_like(N1) 131 | # b1 = np.ones(50) 132 | # b2 = np.ones_like(b1) 133 | 134 | # for i in range(50): 135 | # rowmask = np.zeros((50,50)) 136 | # colmask = np.zeros_like(rowmask) 137 | # rowmask[i,:] = 1 138 | # colmask[:,i] = 1 139 | # N1[i] = rowmask.flatten() 140 | # N2[i] = colmask.flatten() 141 | 142 | # # Similarity constraint 143 | # Sim = p - match_subs 144 | # bsim = np.zeros(1) 145 | 146 | # # Diversity constraint 147 | # Div = q - 1 + match_subs 148 | # bdiv = np.zeros_like(bsim) 149 | 150 | # G = np.vstack((N1, N2, Sim, Div)) 151 | # h = np.concatenate((b1, b2, bsim, bdiv)) 152 | # A = None 153 | # b = None 154 | # return A,b, G,h 155 | 156 | 157 | 158 | 159 | def get_cora(): 160 | """ 161 | Get X,y 162 | """ 163 | # 164 | with open('data/cora_data.pickle', 'rb') as f: 165 | gt, ft, M = pickle.load(f) 166 | return ft, gt, M 167 | 168 | if __name__ == '__main__': 169 | x,y,m = get_cora() 170 | params = {'p':0.5,'q':0.5} 171 | idx = 15 172 | p,m = bmatching_diverse(y[idx], m[idx], **params) , m[idx] 173 | objective_fun=lambda x,v,**params: x @ v 174 | print("Objective ",objective_fun(p,y[idx]) ) 175 | 176 | _,_, G,h = get_qpt_matrices(m, **params) 177 | ineq = G @ p 178 | print('G: ', G.shape) 179 | print('h: ', h.shape) 180 | print('G @ x: ', ineq.shape) 181 | csat = (ineq - h) <=0 182 | print('constraints satisfied ?', csat.all()) 183 | print(ineq -h) 184 | a,b = np.unique(G, axis=1, return_counts=True) 185 | print('uniques?', a.shape) 186 | print('any repetition?', (b > 2).any()) -------------------------------------------------------------------------------- /Matching/Trainer/data_utils.py: -------------------------------------------------------------------------------- 1 | from Trainer.bipartite import get_cora 2 | from torch.utils.data import DataLoader 3 | import numpy as np 4 | import pytorch_lightning as pl 5 | import torch 6 | from torch.utils.data import DataLoader 7 | import tqdm 8 | class CoraDatawrapper(): 9 | def __init__(self, x,y, M,solver, params={'p':0.25, 'q':0.25}, relaxation=False, sols=None, verbose=False): 10 | self.x = x 11 | self.y = y 12 | self.m = M 13 | if sols is not None: 14 | self.sols = sols 15 | else: 16 | y_iter = range(len(self.y)) 17 | it = tqdm(y_iter) if verbose else y_iter 18 | self.sols = np.array([solver.solve(self.y[i], self.m[i], relaxation=relaxation, **params) for i in it]) 19 | self.sols = torch.from_numpy(self.sols).float() 20 | 21 | self.x = torch.from_numpy(self.x).float() 22 | self.y = torch.from_numpy(self.y).float() 23 | self.m = torch.from_numpy(self.m).float() 24 | def __len__(self): 25 | return len(self.y) 26 | 27 | def __getitem__(self, index): 28 | return self.x[index], self.y[index], self.sols[index], self.m[index] 29 | 30 | 31 | 32 | def return_trainlabel(solver,params): 33 | x, y,m = get_cora() 34 | 35 | y_train, y_test = y[:22], y[22:] 36 | m_train, m_test = m[:22], m[22:] 37 | y_iter = range(len(y_train)) 38 | sols = np.array([solver.solve(y[i], m[i], **params) for i in y_iter]) 39 | sols = np.unique(sols,axis=0) 40 | return torch.from_numpy (sols) 41 | 42 | ###################################### Dataloader ######################################### 43 | 44 | class CoraMatchingDataModule(pl.LightningDataModule): 45 | def __init__(self,solver,params, generator=None, normalize=False, batch_size: int = 32, num_workers: int=8): 46 | super().__init__() 47 | x, y,m = get_cora() 48 | 49 | x_train, x_test = x[:22], x[22:] 50 | y_train, y_test = y[:22], y[22:] 51 | m_train, m_test = m[:22], m[22:] 52 | 53 | 54 | self.train_df = CoraDatawrapper( x_train,y_train,m_train,solver,params=params) 55 | self.valid_df = CoraDatawrapper( x_test,y_test,m_test, solver,params=params) 56 | self.test_df = CoraDatawrapper( x_test,y_test,m_test,solver, params=params) 57 | ### As we don't have much data, valid and test dataset are same 58 | self.batch_size = batch_size 59 | self.generator = generator 60 | self.num_workers = num_workers 61 | 62 | 63 | def train_dataloader(self): 64 | return DataLoader(self.train_df, batch_size=self.batch_size,generator= self.generator, num_workers=self.num_workers) 65 | 66 | def val_dataloader(self): 67 | return DataLoader(self.valid_df, batch_size=self.batch_size,generator= self.generator, num_workers=self.num_workers) 68 | 69 | def test_dataloader(self): 70 | return DataLoader(self.test_df, batch_size=5, num_workers=self.num_workers) -------------------------------------------------------------------------------- /Matching/Trainer/diff_layer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from Trainer.utils import batch_solve 5 | 6 | 7 | 8 | def SPOlayer(solver,minimize=False): 9 | mm = 1 if minimize else -1 10 | class SPOlayer_cls(torch.autograd.Function): 11 | @staticmethod 12 | def forward(ctx, y_hat,y_true,sol_true ,m ): 13 | sol_hat = batch_solve(solver, y_hat, m) 14 | 15 | ctx.save_for_backward(y_hat,y_true,sol_true ,m ) 16 | 17 | return ( mm*(sol_hat -sol_true)*y_true).sum() 18 | 19 | @staticmethod 20 | def backward(ctx, grad_output): 21 | y_hat,y_true,sol_true ,m = ctx.saved_tensors 22 | y_spo = 2*y_hat - y_true 23 | sol_spo = batch_solve(solver,y_spo ,m) 24 | return (sol_true - sol_spo)*mm, None, None, None 25 | return SPOlayer_cls.apply 26 | 27 | 28 | def DBBlayer(solver,lambda_val=1., minimize=False): 29 | mm = 1 if minimize else -1 30 | class DBBlayer_cls(torch.autograd.Function): 31 | @staticmethod 32 | def forward(ctx, y_hat,y_true,sol_true ,m ): 33 | sol_hat = batch_solve(solver, y_hat ,m) 34 | 35 | ctx.save_for_backward(y_hat,y_true,sol_true, sol_hat ,m) 36 | 37 | return sol_hat 38 | 39 | @staticmethod 40 | def backward(ctx, grad_output): 41 | """ 42 | In the backward pass we compute gradient to minimize regret 43 | """ 44 | y_hat,y_true,sol_true, sol_hat ,m = ctx.saved_tensors 45 | y_perturbed = y_hat + mm* lambda_val* grad_output 46 | sol_perturbed = batch_solve(solver, y_perturbed ,m) 47 | 48 | return -mm*(sol_hat - sol_perturbed)/lambda_val, None, None, None 49 | return DBBlayer_cls.apply -------------------------------------------------------------------------------- /Matching/Trainer/utils.py: -------------------------------------------------------------------------------- 1 | # from Trainer.bipartite import bmatching_diverse, get_qpt_matrices 2 | import torch 3 | import numpy as np 4 | 5 | # solver = bmatching_diverse 6 | # objective_fun=lambda x,v,**params: x @ v 7 | 8 | def batch_solve(solver,y,m,relaxation =False,batched= True): 9 | 10 | if batched: 11 | ### y, m both are of dim (*,2500) 12 | sol = [] 13 | 14 | for i in range(len(y)): 15 | sol.append( solver.solve(y[i].detach().numpy(), m[i].numpy(), relaxation=relaxation) ) 16 | return torch.tensor(sol).float() 17 | else: 18 | ### y, m both are of dim (2500) 19 | sol = solver.solve(y.detach().numpy(), m.numpy(), relaxation=relaxation) 20 | return torch.tensor(sol).float() 21 | 22 | 23 | def regret_list(solver,y_hat,y_true,sol_true,m,minimize=False): 24 | mm = 1 if minimize else -1 25 | sol_hat = batch_solve(solver, y_hat,m) 26 | return ((mm*(sol_hat - sol_true)*y_true).sum(1)/ (sol_true*y_true).sum(1) ) 27 | 28 | def abs_regret_list(solver,y_hat,y_true,sol_true,m,minimize=False): 29 | mm = 1 if minimize else -1 30 | sol_hat = batch_solve(solver, y_hat,m) 31 | return ((mm*(sol_hat - sol_true)*y_true).sum(1) ) 32 | 33 | def regret_fn(solver,y_hat,y_true,sol_true,m,minimize=False): 34 | # mm = 1 if minimize else -1 35 | # sol_hat = batch_solve(y_hat,m) 36 | # sol_ = batch_solve(y,m) 37 | # # return ((mm*(sol_hat - sol_)*y).sum(1)/ (sol_*y).sum(1) ).mean() 38 | return regret_list(solver,y_hat,y_true,sol_true,m,minimize=minimize).mean() 39 | 40 | def abs_regret_fn(solver,y_hat,y_true,sol_true,m,minimize=False): 41 | # mm = 1 if minimize else -1 42 | # sol_hat = batch_solve(y_hat,m) 43 | # sol_ = batch_solve(y,m) 44 | # # return ((mm*(sol_hat - sol_)*y).sum(1)/ (sol_*y).sum(1) ).mean() 45 | return abs_regret_list(solver,y_hat,y_true,sol_true,m,minimize=minimize).mean() 46 | 47 | 48 | def growpool_fn(solver,cache, y_hat, m): 49 | ''' 50 | cache is torch array [currentpoolsize,48] 51 | y_hat is torch array [batch_size,48] 52 | ''' 53 | sol = batch_solve(solver,y_hat,m).detach().numpy() 54 | cache_np = cache.detach().numpy() 55 | cache_np = np.unique(np.append(cache_np,sol,axis=0),axis=0) 56 | # torch has no unique function, so we have to do this 57 | return torch.from_numpy(cache_np).float() -------------------------------------------------------------------------------- /Matching/config.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "model": "DBB", 4 | "instance": 1, 5 | "lr": 0.01, 6 | "lambda_val": 10.0 7 | }, 8 | { 9 | "model": "DBB", 10 | "instance": 2, 11 | "lr": 0.01, 12 | "lambda_val": 0.1 13 | }, 14 | { 15 | "model": "DBB", 16 | "instance": 3, 17 | "lr": 0.01, 18 | "lambda_val": 0.1 19 | }, 20 | { 21 | "model": "FenchelYoung", 22 | "instance": 1, 23 | "lr": 0.001, 24 | "sigma": 0.5 25 | }, 26 | { 27 | "model": "FenchelYoung", 28 | "instance": 2, 29 | "lr": 0.001, 30 | "sigma": 0.01 31 | }, 32 | { 33 | "model": "FenchelYoung", 34 | "instance": 3, 35 | "lr": 0.001, 36 | "sigma": 5.0 37 | }, 38 | { 39 | "model": "IMLE", 40 | "instance": 1, 41 | "lr": 0.001, 42 | "beta": 100.0, 43 | "temperature": 0.5, 44 | "k": 5 45 | }, 46 | { 47 | "model": "IMLE", 48 | "instance": 2, 49 | "lr": 0.001, 50 | "beta": 100.0, 51 | "temperature": 0.5, 52 | "k": 5 53 | }, 54 | { 55 | "model": "IMLE", 56 | "instance": 3, 57 | "lr": 0.001, 58 | "beta": 100.0, 59 | "temperature": 0.5, 60 | "k": 5 61 | }, 62 | { 63 | "model": "DCOL", 64 | "instance": 1, 65 | "lr": 0.01, 66 | "mu": 100.0 67 | }, 68 | { 69 | "model": "DCOL", 70 | "instance": 2, 71 | "lr": 0.001, 72 | "mu": 10.0 73 | }, 74 | { 75 | "model": "DCOL", 76 | "instance": 3, 77 | "lr": 0.001, 78 | "mu": 10.0 79 | }, 80 | { 81 | "model": "IntOpt", 82 | "instance": 1, 83 | "lr": 0.001, 84 | "thr": 1.0, 85 | "damping": 0.1 86 | }, 87 | { 88 | "model": "IntOpt", 89 | "instance": 2, 90 | "lr": 0.05, 91 | "thr": 0.1, 92 | "damping": 10.0 93 | }, 94 | { 95 | "model": "IntOpt", 96 | "instance": 3, 97 | "lr": 0.001, 98 | "thr": 0.1, 99 | "damping": 0.1 100 | }, 101 | { 102 | "model": "baseline_mse", 103 | "instance": 1, 104 | "lr": 0.01 105 | }, 106 | { 107 | "model": "baseline_mse", 108 | "instance": 2, 109 | "lr": 0.01 110 | }, 111 | { 112 | "model": "baseline_mse", 113 | "instance": 3, 114 | "lr": 0.0005 115 | }, 116 | { 117 | "model": "CachingPO", 118 | "loss": "MAP_c", 119 | "instance": 1, 120 | "lr": 0.001 121 | }, 122 | { 123 | "model": "CachingPO", 124 | "loss": "MAP_c", 125 | "instance": 2, 126 | "lr": 0.01 127 | }, 128 | { 129 | "model": "CachingPO", 130 | "loss": "MAP_c", 131 | "instance": 3, 132 | "lr": 0.005 133 | }, 134 | { 135 | "model": "CachingPO", 136 | "loss": "pairwise_diff", 137 | "instance": 1, 138 | "lr": 0.001 139 | }, 140 | { 141 | "model": "CachingPO", 142 | "loss": "pairwise_diff", 143 | "instance": 2, 144 | "lr": 0.01 145 | }, 146 | { 147 | "model": "CachingPO", 148 | "loss": "pairwise_diff", 149 | "instance": 3, 150 | "lr": 0.005 151 | }, 152 | { 153 | "model": "CachingPO", 154 | "loss": "pairwise", 155 | "instance": 1, 156 | "lr": 0.005, 157 | "tau": 5 158 | }, 159 | { 160 | "model": "CachingPO", 161 | "loss": "pairwise", 162 | "instance": 2, 163 | "lr": 0.01, 164 | "tau": 50 165 | }, 166 | { 167 | "model": "CachingPO", 168 | "loss": "pairwise", 169 | "instance": 3, 170 | "lr": 0.01, 171 | "tau": 50 172 | }, 173 | { 174 | "model": "CachingPO", 175 | "loss": "listwise", 176 | "instance": 1, 177 | "lr": 0.001, 178 | "tau": 5.0 179 | }, 180 | { 181 | "model": "CachingPO", 182 | "loss": "listwise", 183 | "instance": 2, 184 | "lr": 0.01, 185 | "tau": 5.0 186 | }, 187 | { 188 | "model": "CachingPO", 189 | "loss": "listwise", 190 | "instance": 3, 191 | "lr": 0.01, 192 | "tau": 50.0 193 | }, 194 | { 195 | "model": "SPO", 196 | "instance": 1, 197 | "lr": 0.001 198 | }, 199 | { 200 | "model": "SPO", 201 | "instance": 2, 202 | "lr": 0.001 203 | }, 204 | { 205 | "model": "SPO", 206 | "instance": 3, 207 | "lr": 0.005 208 | } 209 | ] -------------------------------------------------------------------------------- /Matching/get_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo 'downloading preprocessed CORA dataset...' 3 | gdown 1MNy9HCVkJykRbXf6XXI9D7lggF0UF8MP 4 | tar -xvzf data.tar.gz 5 | echo 'cleaning...' 6 | rm data.tar.gz 7 | cd data/ 8 | python make_cora_dataset.py 9 | echo 'done' 10 | -------------------------------------------------------------------------------- /Matching/imle/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PredOpt/predopt-benchmarks/1a8e048c5aa640f73a05e29878b8e1e8f6f73610/Matching/imle/__init__.py -------------------------------------------------------------------------------- /Matching/imle/noise.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import math 4 | 5 | import torch 6 | from torch import Tensor, Size 7 | from torch.distributions.gamma import Gamma 8 | 9 | from abc import ABC, abstractmethod 10 | 11 | from typing import Optional 12 | 13 | import logging 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | class BaseNoiseDistribution(ABC): 19 | def __init__(self): 20 | super().__init__() 21 | 22 | @abstractmethod 23 | def sample(self, 24 | shape: Size) -> Tensor: 25 | raise NotImplementedError 26 | 27 | 28 | class SumOfGammaNoiseDistribution(BaseNoiseDistribution): 29 | r""" 30 | Creates a generator of samples for the Sum-of-Gamma distribution [1], parameterized 31 | by :attr:`k`, :attr:`nb_iterations`, and :attr:`device`. 32 | 33 | [1] Mathias Niepert, Pasquale Minervini, Luca Franceschi - Implicit MLE: Backpropagating Through Discrete 34 | Exponential Family Distributions. NeurIPS 2021 (https://arxiv.org/abs/2106.01798) 35 | 36 | Example:: 37 | 38 | >>> import torch 39 | >>> noise_distribution = SumOfGammaNoiseDistribution(k=5, nb_iterations=100) 40 | >>> noise_distribution.sample(torch.Size([5])) 41 | tensor([ 0.2504, 0.0112, 0.5466, 0.0051, -0.1497]) 42 | 43 | Args: 44 | k (float): k parameter -- see [1] for more details. 45 | nb_iterations (int): number of iterations for estimating the sample. 46 | device (torch.devicde): device where to store samples. 47 | """ 48 | def __init__(self, 49 | k: float, 50 | nb_iterations: int = 10, 51 | device: Optional[torch.device] = None): 52 | super().__init__() 53 | self.k = k 54 | self.nb_iterations = nb_iterations 55 | self.device = device 56 | 57 | def sample(self, 58 | shape: Size) -> Tensor: 59 | samples = torch.zeros(size=shape, device=self.device) 60 | for i in range(1, self.nb_iterations + 1): 61 | concentration = torch.tensor(1. / self.k, device=self.device) 62 | rate = torch.tensor(i / self.k, device=self.device) 63 | 64 | gamma = Gamma(concentration=concentration, rate=rate) 65 | samples = samples + gamma.sample(sample_shape=shape).to(self.device) 66 | samples = (samples - math.log(self.nb_iterations)) / self.k 67 | return samples.to(self.device) 68 | -------------------------------------------------------------------------------- /Matching/imle/target.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from torch import Tensor 4 | from abc import ABC, abstractmethod 5 | 6 | import logging 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class BaseTargetDistribution(ABC): 12 | def __init__(self): 13 | super().__init__() 14 | 15 | @abstractmethod 16 | def params(self, 17 | theta: Tensor, 18 | dy: Tensor) -> Tensor: 19 | raise NotImplementedError 20 | 21 | 22 | class TargetDistribution(BaseTargetDistribution): 23 | r""" 24 | Creates a generator of target distributions parameterized by :attr:`alpha` and :attr:`beta`. 25 | 26 | Example:: 27 | 28 | >>> import torch 29 | >>> target_distribution = TargetDistribution(alpha=1.0, beta=1.0) 30 | >>> target_distribution.params(theta=torch.tensor([1.0]), dy=torch.tensor([1.0])) 31 | tensor([2.]) 32 | 33 | Args: 34 | alpha (float): weight of the initial distribution parameters theta 35 | beta (float): weight of the downstream gradient dy 36 | """ 37 | def __init__(self, 38 | alpha: float = 1.0, 39 | beta: float = 1.0): 40 | super().__init__() 41 | self.alpha = alpha 42 | self.beta = beta 43 | 44 | def params(self, 45 | theta: Tensor, 46 | dy: Tensor) -> Tensor: 47 | theta_prime = self.alpha * theta - self.beta * dy 48 | return theta_prime 49 | -------------------------------------------------------------------------------- /Matching/imle/wrapper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import functools 4 | 5 | import torch 6 | from torch import Tensor 7 | 8 | from imle.noise import BaseNoiseDistribution 9 | from imle.target import BaseTargetDistribution, TargetDistribution 10 | 11 | from typing import Callable, Optional 12 | 13 | import logging 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | def imle(function: Callable[[Tensor], Tensor] = None, 19 | target_distribution: Optional[BaseTargetDistribution] = None, 20 | noise_distribution: Optional[BaseNoiseDistribution] = None, 21 | nb_samples: int = 1, 22 | input_noise_temperature: float = 1.0, 23 | target_noise_temperature: float = 1.0): 24 | r"""Turns a black-box combinatorial solver in an Exponential Family distribution via Perturb-and-MAP and I-MLE [1]. 25 | 26 | The input function (solver) needs to return the solution to the problem of finding a MAP state for a constrained 27 | exponential family distribution -- this is the case for most black-box combinatorial solvers [2]. If this condition 28 | is violated though, the result would not hold and there is no guarantee on the validity of the obtained gradients. 29 | 30 | This function can be used directly or as a decorator. 31 | 32 | [1] Mathias Niepert, Pasquale Minervini, Luca Franceschi - Implicit MLE: Backpropagating Through Discrete 33 | Exponential Family Distributions. NeurIPS 2021 (https://arxiv.org/abs/2106.01798) 34 | [2] Marin Vlastelica, Anselm Paulus, Vít Musil, Georg Martius, Michal Rolínek - Differentiation of Blackbox 35 | Combinatorial Solvers. ICLR 2020 (https://arxiv.org/abs/1912.02175) 36 | 37 | Example:: 38 | 39 | >>> from imle.wrapper import imle 40 | >>> from imle.target import TargetDistribution 41 | >>> from imle.noise import SumOfGammaNoiseDistribution 42 | >>> target_distribution = TargetDistribution(alpha=0.0, beta=10.0) 43 | >>> noise_distribution = SumOfGammaNoiseDistribution(k=21, nb_iterations=100) 44 | >>> @imle(target_distribution=target_distribution, noise_distribution=noise_distribution, nb_samples=100, 45 | >>> input_noise_temperature=input_noise_temperature, target_noise_temperature=5.0) 46 | >>> def imle_solver(weights_batch: Tensor) -> Tensor: 47 | >>> return torch_solver(weights_batch) 48 | 49 | Args: 50 | function (Callable[[Tensor], Tensor]): black-box combinatorial solver 51 | target_distribution (Optional[BaseTargetDistribution]): factory for target distributions 52 | noise_distribution (Optional[BaseNoiseDistribution]): noise distribution 53 | nb_samples (int): number of noise sammples 54 | input_noise_temperature (float): noise temperature for the input distribution 55 | target_noise_temperature (float): noise temperature for the target distribution 56 | """ 57 | if target_distribution is None: 58 | target_distribution = TargetDistribution(alpha=1.0, beta=1.0) 59 | 60 | if function is None: 61 | return functools.partial(imle, 62 | target_distribution=target_distribution, 63 | noise_distribution=noise_distribution, 64 | nb_samples=nb_samples, 65 | input_noise_temperature=input_noise_temperature, 66 | target_noise_temperature=target_noise_temperature) 67 | 68 | @functools.wraps(function) 69 | def wrapper(input: Tensor, *args): 70 | class WrappedFunc(torch.autograd.Function): 71 | 72 | @staticmethod 73 | def forward(ctx, input: Tensor, *args): 74 | # [BATCH_SIZE, ...] 75 | input_shape = input.shape 76 | 77 | batch_size = input_shape[0] 78 | instance_shape = input_shape[1:] 79 | 80 | # [BATCH_SIZE, N_SAMPLES, ...] 81 | perturbed_input_shape = [batch_size, nb_samples] + list(instance_shape) 82 | 83 | if noise_distribution is None: 84 | noise = torch.zeros(size=perturbed_input_shape) 85 | else: 86 | noise = noise_distribution.sample(shape=torch.Size(perturbed_input_shape)) 87 | 88 | input_noise = noise * input_noise_temperature 89 | 90 | # [BATCH_SIZE, N_SAMPLES, ...] 91 | perturbed_input_3d = input.view(batch_size, 1, -1).repeat(1, nb_samples, 1).view(perturbed_input_shape) 92 | perturbed_input_3d = perturbed_input_3d + input_noise 93 | 94 | # [BATCH_SIZE * N_SAMPLES, ...] 95 | perturbed_input_2d = perturbed_input_3d.view([-1] + perturbed_input_shape[2:]) 96 | perturbed_input_2d_shape = perturbed_input_2d.shape 97 | 98 | # [BATCH_SIZE * N_SAMPLES, ...] 99 | perturbed_output = function(perturbed_input_2d) 100 | # [BATCH_SIZE, N_SAMPLES, ...] 101 | perturbed_output = perturbed_output.view(perturbed_input_shape) 102 | 103 | ctx.save_for_backward(input, noise, perturbed_output) 104 | 105 | # [BATCH_SIZE * N_SAMPLES, ...] 106 | # res = perturbed_output.view(perturbed_input_2d_shape) 107 | #### New line added 108 | res = perturbed_output.mean(dim=1) 109 | return res 110 | 111 | @staticmethod 112 | def backward(ctx, dy): 113 | # input: [BATCH_SIZE, ...] 114 | # noise: [BATCH_SIZE, N_SAMPLES, ...] 115 | # perturbed_output_3d: # [BATCH_SIZE, N_SAMPLES, ...] 116 | input, noise, perturbed_output_3d = ctx.saved_variables 117 | 118 | input_shape = input.shape 119 | batch_size = input_shape[0] 120 | instance_shape = input_shape[1:] 121 | 122 | 123 | #### New line added 124 | dy = dy.view(batch_size, 1, -1).repeat(1,nb_samples, 1).view([batch_size*nb_samples] +list(instance_shape)) 125 | 126 | 127 | # dy is [BATCH_SIZE * N_SAMPLES, ...] 128 | dy_shape = dy.shape 129 | # noise is [BATCH_SIZE, N_SAMPLES, ...] 130 | noise_shape = noise.shape 131 | 132 | # [BATCH_SIZE * NB_SAMPLES, ...] 133 | input_2d = input.view(batch_size, 1, -1).repeat(1, nb_samples, 1).view(dy_shape) 134 | target_input_2d = target_distribution.params(input_2d, dy) 135 | 136 | # [BATCH_SIZE, NB_SAMPLES, ...] 137 | target_input_3d = target_input_2d.view(noise_shape) 138 | 139 | # [BATCH_SIZE, NB_SAMPLES, ...] 140 | target_noise = noise * target_noise_temperature 141 | 142 | # [BATCH_SIZE, N_SAMPLES, ...] 143 | perturbed_target_input_3d = target_input_3d + target_noise 144 | 145 | # [BATCH_SIZE * N_SAMPLES, ...] 146 | perturbed_target_input_2d = perturbed_target_input_3d.view(dy_shape) 147 | 148 | # [BATCH_SIZE * N_SAMPLES, ...] 149 | target_output_2d = function(perturbed_target_input_2d) 150 | 151 | # [BATCH_SIZE, N_SAMPLES, ...] 152 | target_output_3d = target_output_2d.view(noise_shape) 153 | 154 | # [BATCH_SIZE, ...] 155 | gradient = (perturbed_output_3d - target_output_3d) 156 | gradient = gradient.mean(axis=1) 157 | return gradient 158 | 159 | return WrappedFunc.apply(input, *args) 160 | return wrapper 161 | -------------------------------------------------------------------------------- /Matching/readme.md: -------------------------------------------------------------------------------- 1 | This directory corresponds to the diverse bipartite matching problem. 2 | 3 | To download the data run 4 | ``` 5 | ./get_data.sh 6 | ``` 7 | This will create a folder `data/` and save the data files inside that directory. 8 | Alternatively, you can download the bipartite matching datset from the repository: https://doi.org/10.48804/KT2P3Z and extract the `tar.gz` file. 9 | 10 | 11 | To run experiments use `test_matching.py`. 12 | To reproduce the result of expriements run 13 | ``` 14 | python test_matching.py --scheduler True 15 | ``` 16 | 17 | -------------------------------------------------------------------------------- /Portfolio/DPO/fenchel_young.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # 3 | # Modifications from original work 4 | # 29-03-2021 (tuero@ualberta.ca) : Convert Tensorflow code to PyTorch 5 | # 6 | # Copyright 2021 The Google Research Authors. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | 20 | # Lint as: python3 21 | """Implementation of a Fenchel-Young loss using perturbation techniques.""" 22 | 23 | import torch 24 | import torch.nn as nn 25 | 26 | from DPO import perturbations 27 | 28 | 29 | class PerturbedFunc(torch.autograd.Function): 30 | """Implementation of a Fenchel Young loss.""" 31 | @staticmethod 32 | def forward(ctx, input_tensor, y_true, perturbed, batched, maximize, *args): 33 | diff = perturbed(input_tensor, *args) - y_true.type(input_tensor.dtype) 34 | if not maximize: 35 | diff = -diff 36 | # Computes per-example loss for batched inputs. 37 | if batched: 38 | loss = torch.sum(torch.reshape(diff, [list(diff.shape)[0], -1]) ** 2, dim=-1) 39 | else: # Computes loss for unbatched inputs. 40 | loss = torch.sum(diff ** 2) 41 | ctx.save_for_backward(diff) 42 | ctx.batched = batched 43 | return loss 44 | 45 | @staticmethod 46 | def backward(ctx, dy): 47 | diff, = ctx.saved_tensors 48 | batched = ctx.batched 49 | if batched: # dy has shape (batch_size,) in this case. 50 | dy = torch.reshape(dy, [list(dy.shape)[0]] + (diff.dim() - 1) * [1]) 51 | return dy * diff, None, None, None, None # original 52 | # return diff, None, None, None, None 53 | 54 | 55 | class FenchelYoungLoss(nn.Module): 56 | def __init__(self, 57 | func = None, 58 | num_samples = 1000, 59 | sigma = 0.01, 60 | noise = perturbations._GUMBEL, 61 | batched = True, 62 | maximize = True, 63 | device=None): 64 | """Initializes the Fenchel-Young loss. 65 | 66 | Args: 67 | func: the function whose argmax is to be differentiated by perturbation. 68 | num_samples: (int) the number of perturbed inputs. 69 | sigma: (float) the amount of noise to be considered 70 | noise: (str) the noise distribution to be used to sample perturbations. 71 | batched: whether inputs to the func will have a leading batch dimension 72 | (True) or consist of a single example (False). Defaults to True. 73 | maximize: (bool) whether to maximize or to minimize the input function. 74 | device: The device to create tensors on (cpu/gpu). If None given, it will 75 | default to gpu:0 if available, cpu otherwise. 76 | """ 77 | super().__init__() 78 | self._batched = batched 79 | self._maximize = maximize 80 | self.func = func 81 | self.perturbed = perturbations.perturbed(func=func, 82 | num_samples=num_samples, 83 | sigma=sigma, 84 | noise=noise, 85 | batched=batched, 86 | device=device) 87 | 88 | def forward(self, input_tensor, y_true, *args): 89 | return PerturbedFunc.apply(input_tensor, y_true, self.perturbed, self._batched, self._maximize, *args) 90 | 91 | -------------------------------------------------------------------------------- /Portfolio/Trainer/CacheLosses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import torch.nn.functional as F 4 | ###################################### NCE Loss Functions ######################################### 5 | class NCE(torch.nn.Module): 6 | def __init__(self, minimize = False): 7 | super().__init__() 8 | self.mm = 1 if minimize else -1 9 | def forward(self, y_hat,y_true, sol_true,cache): 10 | 11 | loss = 0 12 | mm = self.mm 13 | ## print("shape to be preinted: ") 14 | # print(sol_true.shape, cache.shape, y_hat.shape) 15 | ### torch.Size([B, 2500]) torch.Size([|S|, 2500]) torch.Size([B, 2500]) 16 | 17 | for ii in range(len( y_hat )): 18 | loss += ( ( mm* ( sol_true[ii] - cache )*y_hat[ii] ).sum(dim=(1)) ).mean() 19 | loss /= len(y_hat) 20 | return loss 21 | 22 | class NCE_c(torch.nn.Module): 23 | def __init__(self, minimize = False): 24 | super().__init__() 25 | self.mm = 1 if minimize else -1 26 | def forward(self, y_hat,y_true, sol_true,cache): 27 | 28 | loss = 0 29 | mm = self.mm 30 | for ii in range(len( y_hat )): 31 | loss += ( ( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii]) ).sum(dim=(1)) ).mean() 32 | loss /= len(y_hat) 33 | return loss 34 | 35 | 36 | class MAP(torch.nn.Module): 37 | def __init__(self, minimize = False): 38 | super().__init__() 39 | self.mm = 1 if minimize else -1 40 | def forward(self, y_hat,y_true,sol_true,cache): 41 | 42 | loss = 0 43 | mm = self.mm 44 | 45 | for ii in range(len( y_hat )): 46 | loss += (( mm* ( sol_true[ii] - cache )*y_hat[ii] ).sum(dim=(1)) ).max() 47 | loss /= len(y_hat) 48 | return loss 49 | 50 | 51 | class MAP_c(torch.nn.Module): 52 | def __init__(self, minimize = False): 53 | super().__init__() 54 | self.mm = 1 if minimize else -1 55 | def forward(self, y_hat,y_true,sol_true,cache): 56 | 57 | loss = 0 58 | mm = self.mm 59 | 60 | for ii in range(len( y_hat )): 61 | loss += (( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii]) ).sum(dim=(1)) ).max() 62 | loss /= len(y_hat) 63 | return loss 64 | 65 | 66 | class MAP_c_actual(torch.nn.Module): 67 | def __init__(self, minimize = False): 68 | super().__init__() 69 | self.mm = 1 if minimize else -1 70 | def forward(self, y_hat,y_true,sol_true,cache): 71 | 72 | loss = 0 73 | mm = self.mm 74 | 75 | for ii in range(len( y_hat )): 76 | 77 | loss += (( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii]) ).sum(dim=(1)) ).max() 78 | loss /= len(y_hat) 79 | return loss 80 | 81 | ###################################### Ranking Loss Functions ######################################### 82 | class PointwiseLoss(torch.nn.Module): 83 | def __init__(self): 84 | super().__init__() 85 | def forward(self, y_hat,y_true,sol_true,cache): 86 | ''' 87 | pred_weights: predicted cost vector [batch_size, img,img] 88 | true_weights: actua cost vector [batch_size, img,img] 89 | target: true shortest path [batch_size, img,img] 90 | cache: cache is torch array [cache_size, img,img] 91 | ''' 92 | loss = 0 93 | 94 | for ii in range(len( y_hat )): 95 | loss += ((cache*y_hat[ii])-(cache*y_true[ii])).square().mean() 96 | loss /= len(y_hat) 97 | 98 | return loss 99 | class ListwiseLoss(torch.nn.Module): 100 | def __init__(self, temperature=0., minimize = False): 101 | super().__init__() 102 | self.temperature = temperature 103 | self.mm = 1 if minimize else -1 104 | def forward(self, y_hat,y_true,sol_true,cache): 105 | 106 | loss = 0 107 | mm, temperature = self.mm, self.temperature 108 | 109 | for ii in range(len( y_hat )): 110 | loss += - ( F.log_softmax((-mm*y_hat[ii]*cache/temperature).sum(dim=(1)),dim=0) * F.softmax((-mm*y_true[ii]*cache/temperature).sum(dim=(1)),dim=0)).mean() 111 | loss /= len(y_hat) 112 | 113 | return loss 114 | 115 | 116 | class PairwisediffLoss(torch.nn.Module): 117 | def __init__(self, minimize = False): 118 | super().__init__() 119 | self.mm = 1 if minimize else -1 120 | 121 | def forward(self, y_hat,y_true,sol_true,cache): 122 | ''' 123 | pred_weights: predicted cost vector [batch_size, img,img] 124 | true_weights: actua cost vector [batch_size, img,img] 125 | target: true shortest path [batch_size, img,img] 126 | cache: cache is torch array [cache_size, img,img] 127 | ''' 128 | 129 | loss = 0 130 | for ii in range(len( y_hat )): 131 | _,indices= np.unique((self.mm*y_true[ii]*cache).sum(dim= (1)).detach().numpy(),return_index=True) 132 | 133 | big_ind = [indices[0] for p in range(len(indices)-1)] #good one 134 | small_ind = [indices[p+1] for p in range(len(indices)-1)] #bad one 135 | 136 | 137 | loss += ( ( cache[big_ind]*y_hat[ii] -cache[small_ind]*y_hat[ii]).sum(dim=(1)) - ( cache[big_ind]*y_true[ii] -cache[small_ind]*y_true[ii]).sum(dim=(1)) ).square().mean() 138 | loss /= len(y_hat) 139 | return loss 140 | 141 | class PairwiseLoss(torch.nn.Module): 142 | def __init__(self, margin=0., minimize = False): 143 | super().__init__() 144 | self.margin = margin 145 | self.mm = 1 if minimize else -1 146 | def forward(self, y_hat,y_true,sol_true,cache): 147 | ''' 148 | pred_weights: predicted cost vector [batch_size, img,img] 149 | true_weights: actua cost vector [batch_size, img,img] 150 | target: true shortest path [batch_size, img,img] 151 | cache: cache is torch array [cache_size, img,img] 152 | ''' 153 | relu = torch.nn.ReLU() 154 | loss = 0 155 | mm, margin = self.mm, self.margin 156 | for ii in range(len( y_hat )): 157 | _,indices= np.unique((self.mm*y_true[ii]*cache).sum(dim= (1)).detach().numpy(),return_index=True) 158 | 159 | big_ind = [indices[0] for p in range(len(indices)-1)] #good one 160 | small_ind = [indices[p+1] for p in range(len(indices)-1)] #bad one 161 | 162 | loss += relu( margin + mm*( cache[big_ind]*y_hat[ii] -cache[small_ind]*y_hat[ii] ).sum(dim=(1)) ).mean() 163 | loss /= len(y_hat) 164 | return loss -------------------------------------------------------------------------------- /Portfolio/Trainer/data_utils.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import pytorch_lightning as pl 4 | import torch 5 | from torch.utils.data import DataLoader 6 | ###################################### Wrapper ######################################### 7 | class datawrapper(): 8 | def __init__(self, x,y, sol=None, solver= None ): 9 | self.x = x 10 | self.y = y 11 | if sol is None: 12 | if solver is None: 13 | raise Exception("Either Give the solutions or provide a solver!") 14 | sol = [] 15 | for i in range(len(y)): 16 | sol.append( solver.solve(y[i]) ) 17 | sol = np.array(sol).astype(np.float32) 18 | 19 | self.sol = sol 20 | 21 | def __len__(self): 22 | return len(self.y) 23 | 24 | def __getitem__(self, index): 25 | return self.x[index], self.y[index],self.sol[index] 26 | 27 | 28 | ###################################### Dataloader ######################################### 29 | 30 | class ShortestPathDataModule(pl.LightningDataModule): 31 | def __init__(self, train_df,valid_df,test_df,generator, normalize=False, batchsize: int = 32, num_workers: int=4): 32 | super().__init__() 33 | self.train_df = train_df 34 | self.valid_df = valid_df 35 | self.test_df = test_df 36 | self.batchsize = batchsize 37 | self.generator = generator 38 | self.num_workers = num_workers 39 | 40 | 41 | def train_dataloader(self): 42 | return DataLoader(self.train_df, batch_size=self.batchsize,generator= self.generator, num_workers=self.num_workers) 43 | 44 | def val_dataloader(self): 45 | return DataLoader(self.valid_df, batch_size=self.batchsize,generator= self.generator, num_workers=self.num_workers) 46 | 47 | def test_dataloader(self): 48 | return DataLoader(self.test_df, batch_size=1000, num_workers=self.num_workers) 49 | -------------------------------------------------------------------------------- /Portfolio/Trainer/diff_layer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import torch 3 | from torch import nn, optim 4 | from torch.autograd import Variable 5 | import torch.nn.functional as F 6 | import pytorch_lightning as pl 7 | import numpy as np 8 | from Trainer.utils import batch_solve 9 | 10 | def SPOlayer(solver,minimize = False): 11 | mm = 1 if minimize else -1 12 | class SPOlayer_cls(torch.autograd.Function): 13 | @staticmethod 14 | def forward(ctx, y_hat,y_true,sol_true ): 15 | sol_hat = batch_solve(solver, y_hat) 16 | 17 | ctx.save_for_backward(y_hat,y_true,sol_true) 18 | 19 | return ( mm*(sol_hat -sol_true)*y_true).sum() 20 | 21 | @staticmethod 22 | def backward(ctx, grad_output): 23 | y_hat,y_true,sol_true = ctx.saved_tensors 24 | y_spo = 2*y_hat - y_true 25 | sol_spo = batch_solve(solver,y_spo) 26 | return (sol_true - sol_spo)*mm, None, None 27 | return SPOlayer_cls.apply 28 | 29 | 30 | def DBBlayer(solver,lambda_val=1., minimize = False): 31 | mm = 1 if minimize else -1 32 | class DBBlayer_cls(torch.autograd.Function): 33 | @staticmethod 34 | def forward(ctx, y_hat,y_true,sol_true ): 35 | sol_hat = batch_solve(solver, y_hat) 36 | 37 | ctx.save_for_backward(y_hat,y_true,sol_true, sol_hat) 38 | 39 | return sol_hat 40 | 41 | @staticmethod 42 | def backward(ctx, grad_output): 43 | """ 44 | In the backward pass we compute gradient to minimize regret 45 | """ 46 | y_hat,y_true,sol_true, sol_hat= ctx.saved_tensors 47 | y_perturbed = y_hat + mm* lambda_val* grad_output 48 | sol_perturbed = batch_solve(solver, y_perturbed) 49 | 50 | return -mm*(sol_hat - sol_perturbed)/lambda_val, None, None 51 | return DBBlayer_cls.apply -------------------------------------------------------------------------------- /Portfolio/Trainer/optimizer_module.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn, optim 4 | import torch.nn.functional as F 5 | 6 | ###################################### Gurobi Solver ######################################### 7 | import gurobipy as gp 8 | from gurobipy import GRB 9 | class gurobi_portfolio_solver: 10 | ''' 11 | Gurobi solver takes the price as parameter, return the solution of the maximizimization problem 12 | ''' 13 | def __init__(self, cov, gamma, n_stocks = 50): 14 | self.n_stocks = n_stocks 15 | model = gp.Model("qp") 16 | model.setParam('OutputFlag', 0) 17 | 18 | x = model.addMVar(shape= n_stocks, lb=0.0, vtype=GRB.CONTINUOUS, name="w") 19 | 20 | model.addConstr(sum(x) <= 1, "1") 21 | ### Original Model invoves inequality, We once tested with Equality 22 | # model.addConstr(sum(x) == 1, "1") 23 | 24 | model.addConstr(x @ cov @ x <= gamma, "2") 25 | self.model = model 26 | self.x = x 27 | def solve(self, price): 28 | model = self.model 29 | x = self.x 30 | 31 | 32 | model.setObjective(price@x, gp.GRB.MAXIMIZE) 33 | model.optimize() 34 | 35 | if model.status==2: 36 | sol = x.x 37 | sol[sol < 1e-4] = 0 38 | return sol 39 | else: 40 | raise Exception("Optimal Solution not found") 41 | def solution_fromtorch(self, y_torch): 42 | if y_torch.dim()==1: 43 | return torch.from_numpy(self.solve( y_torch.detach().numpy())).float() 44 | else: 45 | solutions = [] 46 | for ii in range(len(y_torch)): 47 | solutions.append(torch.from_numpy(self.solve( y_torch[ii].detach().numpy())).float()) 48 | return torch.stack(solutions) 49 | 50 | import cvxpy as cp 51 | import cvxpylayers 52 | from cvxpylayers.torch import CvxpyLayer 53 | 54 | ### Build cvxpy model prototype 55 | class cvxsolver: 56 | ''' 57 | Implementation of QPTL with cvxpylayers and quadratic regularizer 58 | ''' 59 | def __init__(self,cov, gamma, n_stocks = 50, mu=1e-6,regularizer='quadratic'): 60 | ''' 61 | regularizer: form of regularizer- either quadratic or entropic 62 | ''' 63 | self.cov = cov 64 | self.gamma = gamma 65 | self.n_stocks = n_stocks 66 | self.mu = mu 67 | self.regularizer = regularizer 68 | 69 | 70 | 71 | 72 | x = cp.Variable(n_stocks) 73 | constraints = [x >= 0, cp.quad_form( x, cov ) <= gamma, cp.sum(x) <= 1] 74 | ### Original Model invoves inequality, We once tested with Equality 75 | # constraints = [x >= 0, cp.quad_form( x, cov ) <= gamma, cp.sum(x) == 1] 76 | 77 | c = cp.Parameter(n_stocks) 78 | 79 | if self.regularizer=='quadratic': 80 | objective = cp.Minimize(-c @ x+ self.mu*cp.pnorm(x, p=2)) 81 | elif self.regularizer=='entropic': 82 | objective = cp.Minimize(-c @ x - self.mu*cp.sum(cp.entr(x)) ) 83 | problem = cp.Problem(objective, constraints) 84 | self.layer = CvxpyLayer(problem, parameters=[c], variables=[x]) 85 | def solution(self, y): 86 | 87 | sol, = self.layer(y) 88 | return sol 89 | 90 | ### We cannot run intopt in this problem, It can't handle quadratic onstriants 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /Portfolio/Trainer/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | def batch_solve(solver, y,relaxation =False): 5 | sol = [] 6 | for i in range(len(y)): 7 | sol.append( solver.solution_fromtorch(y[i]).reshape(1,-1) ) 8 | return torch.cat(sol,0).float() 9 | 10 | 11 | def regret_list(solver, y_hat,y_true, sol_true, minimize = False): 12 | ''' 13 | computes regret of more than one cost vectors 14 | ''' 15 | mm = 1 if minimize else -1 16 | sol_hat = batch_solve(solver, y_hat ) 17 | return ((mm*(sol_hat - sol_true)*y_true).sum(1)/ (sol_true*y_true).sum(1) ) 18 | def abs_regret_list(solver,y_hat,y_true,sol_true,minimize = False): 19 | mm = 1 if minimize else -1 20 | sol_hat = batch_solve(solver, y_hat ) 21 | return ((mm*(sol_hat - sol_true)*y_true).sum(1) ) 22 | 23 | def regret_fn(solver, y_hat,y_true, sol_true, minimize = False): 24 | ### Converting infinity to 1, there are lots of innities where all the returns are negative 25 | return torch.nan_to_num( regret_list(solver, y_hat,y_true, sol_true, minimize= minimize), nan=0., posinf=1.).mean() 26 | 27 | def abs_regret_fn(solver, y_hat,y_true, sol_true, minimize = False): 28 | return abs_regret_list(solver, y_hat,y_true, sol_true, minimize= minimize).mean() 29 | 30 | 31 | def growcache(solver, cache, y_hat): 32 | ''' 33 | cache is torch array [currentpoolsize,48] 34 | y_hat is torch array [batch_size,48] 35 | ''' 36 | sol = batch_solve(solver, y_hat,relaxation =False).detach().numpy() 37 | cache_np = cache.detach().numpy() 38 | cache_np = np.unique(np.append(cache_np,sol,axis=0),axis=0) 39 | # torch has no unique function, so we need to do this 40 | return torch.from_numpy(cache_np).float() -------------------------------------------------------------------------------- /Portfolio/config.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "model": "DCOL", 4 | "N": 1000, 5 | "noise": 1, 6 | "deg": 1, 7 | "lr": 0.1, 8 | "mu": 10.0 9 | }, 10 | { 11 | "model": "DCOL", 12 | "N": 1000, 13 | "noise": 1, 14 | "deg": 4, 15 | "lr": 0.05, 16 | "mu": 10.0 17 | }, 18 | { 19 | "model": "DCOL", 20 | "N": 1000, 21 | "noise": 1, 22 | "deg": 16, 23 | "lr": 0.05, 24 | "mu": 10.0 25 | }, 26 | { 27 | "model": "DCOL", 28 | "N": 1000, 29 | "noise": 1, 30 | "deg": 8, 31 | "lr": 0.1, 32 | "mu": 10.0 33 | }, 34 | { 35 | "model": "baseline", 36 | "N": 1000, 37 | "noise": 1, 38 | "deg": 1, 39 | "lr": 0.01 40 | }, 41 | { 42 | "model": "baseline", 43 | "N": 1000, 44 | "noise": 1, 45 | "deg": 4, 46 | "lr": 0.05 47 | }, 48 | { 49 | "model": "baseline", 50 | "N": 1000, 51 | "noise": 1, 52 | "deg": 16, 53 | "lr": 0.05 54 | }, 55 | { 56 | "model": "baseline", 57 | "N": 1000, 58 | "noise": 1, 59 | "deg": 8, 60 | "lr": 0.1 61 | }, 62 | { 63 | "model": "SPO", 64 | "N": 1000, 65 | "noise": 1, 66 | "deg": 1, 67 | "lr": 0.5 68 | }, 69 | { 70 | "model": "SPO", 71 | "N": 1000, 72 | "noise": 1, 73 | "deg": 4, 74 | "lr": 1.0 75 | }, 76 | { 77 | "model": "SPO", 78 | "N": 1000, 79 | "noise": 1, 80 | "deg": 16, 81 | "lr": 0.5 82 | }, 83 | { 84 | "model": "SPO", 85 | "N": 1000, 86 | "noise": 1, 87 | "deg": 8, 88 | "lr": 0.5 89 | }, 90 | { 91 | "model": "DBB", 92 | "N": 1000, 93 | "noise": 1, 94 | "deg": 1, 95 | "lr": 0.1, 96 | "lambda_val": 1.0 97 | }, 98 | { 99 | "model": "DBB", 100 | "N": 1000, 101 | "noise": 1, 102 | "deg": 4, 103 | "lr": 0.1, 104 | "lambda_val": 1.0 105 | }, 106 | { 107 | "model": "DBB", 108 | "N": 1000, 109 | "noise": 1, 110 | "deg": 16, 111 | "lr": 0.1, 112 | "lambda_val": 1.0 113 | }, 114 | { 115 | "model": "DBB", 116 | "N": 1000, 117 | "noise": 1, 118 | "deg": 8, 119 | "lr": 0.1, 120 | "lambda_val": 1.0 121 | }, 122 | { 123 | "model": "FenchelYoung", 124 | "N": 1000, 125 | "noise": 1, 126 | "deg": 1, 127 | "lr": 0.1, 128 | "sigma": 0.01 129 | }, 130 | { 131 | "model": "FenchelYoung", 132 | "N": 1000, 133 | "noise": 1, 134 | "deg": 4, 135 | "lr": 0.5, 136 | "sigma": 0.01 137 | }, 138 | { 139 | "model": "FenchelYoung", 140 | "N": 1000, 141 | "noise": 1, 142 | "deg": 16, 143 | "lr": 1.0, 144 | "sigma": 2.0 145 | }, 146 | { 147 | "model": "FenchelYoung", 148 | "N": 1000, 149 | "noise": 1, 150 | "deg": 8, 151 | "lr": 1.0, 152 | "sigma": 0.01 153 | }, 154 | { 155 | "model": "IMLE", 156 | "N": 1000, 157 | "noise": 1, 158 | "deg": 1, 159 | "lr": 0.5, 160 | "beta": 0.1, 161 | "temperature": 0.1, 162 | "k": 5 163 | }, 164 | { 165 | "model": "IMLE", 166 | "N": 1000, 167 | "noise": 1, 168 | "deg": 4, 169 | "lr": 0.5, 170 | "beta": 0.1, 171 | "temperature": 0.5, 172 | "k": 5 173 | }, 174 | { 175 | "model": "IMLE", 176 | "N": 1000, 177 | "noise": 1, 178 | "deg": 16, 179 | "lr": 0.5, 180 | "beta": 0.1, 181 | "temperature": 0.05, 182 | "k": 5 183 | }, 184 | { 185 | "model": "IMLE", 186 | "N": 1000, 187 | "noise": 1, 188 | "deg": 8, 189 | "lr": 0.5, 190 | "beta": 0.1, 191 | "temperature": 0.05, 192 | "k": 5 193 | }, 194 | { 195 | "model": "CachingPO", 196 | "loss": "MAP_c", 197 | "N": 1000, 198 | "noise": 1, 199 | "deg": 1, 200 | "lr": 0.01 201 | }, 202 | { 203 | "model": "CachingPO", 204 | "loss": "MAP_c", 205 | "N": 1000, 206 | "noise": 1, 207 | "deg": 4, 208 | "lr": 1.0 209 | }, 210 | { 211 | "model": "CachingPO", 212 | "loss": "MAP_c", 213 | "N": 1000, 214 | "noise": 1, 215 | "deg": 16, 216 | "lr": 1.0 217 | }, 218 | { 219 | "model": "CachingPO", 220 | "loss": "MAP_c", 221 | "N": 1000, 222 | "noise": 1, 223 | "deg": 8, 224 | "lr": 0.05 225 | }, 226 | { 227 | "model": "CachingPO", 228 | "loss": "pairwise_diff", 229 | "N": 1000, 230 | "noise": 1, 231 | "deg": 1, 232 | "lr": 0.1 233 | }, 234 | { 235 | "model": "CachingPO", 236 | "loss": "pairwise_diff", 237 | "N": 1000, 238 | "noise": 1, 239 | "deg": 4, 240 | "lr": 0.1 241 | }, 242 | { 243 | "model": "CachingPO", 244 | "loss": "pairwise_diff", 245 | "N": 1000, 246 | "noise": 1, 247 | "deg": 16, 248 | "lr": 0.05 249 | }, 250 | { 251 | "model": "CachingPO", 252 | "loss": "pairwise_diff", 253 | "N": 1000, 254 | "noise": 1, 255 | "deg": 8, 256 | "lr": 0.1 257 | }, 258 | { 259 | "model": "CachingPO", 260 | "loss": "pairwise", 261 | "N": 1000, 262 | "noise": 1, 263 | "deg": 1, 264 | "lr": 0.01, 265 | "tau": 0.01 266 | }, 267 | { 268 | "model": "CachingPO", 269 | "loss": "pairwise", 270 | "N": 1000, 271 | "noise": 1, 272 | "deg": 4, 273 | "lr": 0.01, 274 | "tau": 0.1 275 | }, 276 | { 277 | "model": "CachingPO", 278 | "loss": "pairwise", 279 | "N": 1000, 280 | "noise": 1, 281 | "deg": 16, 282 | "lr": 0.1, 283 | "tau": 0.05 284 | }, 285 | { 286 | "model": "CachingPO", 287 | "loss": "pairwise", 288 | "N": 1000, 289 | "noise": 1, 290 | "deg": 8, 291 | "lr": 0.01, 292 | "tau": 0.01 293 | }, 294 | { 295 | "model": "CachingPO", 296 | "loss": "listwise", 297 | "N": 1000, 298 | "noise": 1, 299 | "deg": 1, 300 | "lr": 0.1, 301 | "tau": 0.01 302 | }, 303 | { 304 | "model": "CachingPO", 305 | "loss": "listwise", 306 | "N": 1000, 307 | "noise": 1, 308 | "deg": 4, 309 | "lr": 0.1, 310 | "tau": 0.01 311 | }, 312 | { 313 | "model": "CachingPO", 314 | "loss": "listwise", 315 | "N": 1000, 316 | "noise": 1, 317 | "deg": 16, 318 | "lr": 0.05, 319 | "tau": 0.005 320 | }, 321 | { 322 | "model": "CachingPO", 323 | "loss": "listwise", 324 | "N": 1000, 325 | "noise": 1, 326 | "deg": 8, 327 | "lr": 0.1, 328 | "tau": 0.01 329 | } 330 | ] -------------------------------------------------------------------------------- /Portfolio/imle/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PredOpt/predopt-benchmarks/1a8e048c5aa640f73a05e29878b8e1e8f6f73610/Portfolio/imle/__init__.py -------------------------------------------------------------------------------- /Portfolio/imle/noise.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import math 4 | 5 | import torch 6 | from torch import Tensor, Size 7 | from torch.distributions.gamma import Gamma 8 | 9 | from abc import ABC, abstractmethod 10 | 11 | from typing import Optional 12 | 13 | import logging 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | class BaseNoiseDistribution(ABC): 19 | def __init__(self): 20 | super().__init__() 21 | 22 | @abstractmethod 23 | def sample(self, 24 | shape: Size) -> Tensor: 25 | raise NotImplementedError 26 | 27 | 28 | class SumOfGammaNoiseDistribution(BaseNoiseDistribution): 29 | r""" 30 | Creates a generator of samples for the Sum-of-Gamma distribution [1], parameterized 31 | by :attr:`k`, :attr:`nb_iterations`, and :attr:`device`. 32 | 33 | [1] Mathias Niepert, Pasquale Minervini, Luca Franceschi - Implicit MLE: Backpropagating Through Discrete 34 | Exponential Family Distributions. NeurIPS 2021 (https://arxiv.org/abs/2106.01798) 35 | 36 | Example:: 37 | 38 | >>> import torch 39 | >>> noise_distribution = SumOfGammaNoiseDistribution(k=5, nb_iterations=100) 40 | >>> noise_distribution.sample(torch.Size([5])) 41 | tensor([ 0.2504, 0.0112, 0.5466, 0.0051, -0.1497]) 42 | 43 | Args: 44 | k (float): k parameter -- see [1] for more details. 45 | nb_iterations (int): number of iterations for estimating the sample. 46 | device (torch.devicde): device where to store samples. 47 | """ 48 | def __init__(self, 49 | k: float, 50 | nb_iterations: int = 10, 51 | device: Optional[torch.device] = None): 52 | super().__init__() 53 | self.k = k 54 | self.nb_iterations = nb_iterations 55 | self.device = device 56 | 57 | def sample(self, 58 | shape: Size) -> Tensor: 59 | samples = torch.zeros(size=shape, device=self.device) 60 | for i in range(1, self.nb_iterations + 1): 61 | concentration = torch.tensor(1. / self.k, device=self.device) 62 | rate = torch.tensor(i / self.k, device=self.device) 63 | 64 | gamma = Gamma(concentration=concentration, rate=rate) 65 | samples = samples + gamma.sample(sample_shape=shape).to(self.device) 66 | samples = (samples - math.log(self.nb_iterations)) / self.k 67 | return samples.to(self.device) 68 | -------------------------------------------------------------------------------- /Portfolio/imle/target.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from torch import Tensor 4 | from abc import ABC, abstractmethod 5 | 6 | import logging 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class BaseTargetDistribution(ABC): 12 | def __init__(self): 13 | super().__init__() 14 | 15 | @abstractmethod 16 | def params(self, 17 | theta: Tensor, 18 | dy: Tensor) -> Tensor: 19 | raise NotImplementedError 20 | 21 | 22 | class TargetDistribution(BaseTargetDistribution): 23 | r""" 24 | Creates a generator of target distributions parameterized by :attr:`alpha` and :attr:`beta`. 25 | 26 | Example:: 27 | 28 | >>> import torch 29 | >>> target_distribution = TargetDistribution(alpha=1.0, beta=1.0) 30 | >>> target_distribution.params(theta=torch.tensor([1.0]), dy=torch.tensor([1.0])) 31 | tensor([2.]) 32 | 33 | Args: 34 | alpha (float): weight of the initial distribution parameters theta 35 | beta (float): weight of the downstream gradient dy 36 | """ 37 | def __init__(self, 38 | alpha: float = 1.0, 39 | beta: float = 1.0): 40 | super().__init__() 41 | self.alpha = alpha 42 | self.beta = beta 43 | 44 | def params(self, 45 | theta: Tensor, 46 | dy: Tensor) -> Tensor: 47 | theta_prime = self.alpha * theta - self.beta * dy 48 | return theta_prime 49 | -------------------------------------------------------------------------------- /Portfolio/readme.md: -------------------------------------------------------------------------------- 1 | This directory corresponds to the portfolio optimization problem. 2 | 3 | 4 | Download the portfolio optimization datset from the repository: https://doi.org/10.48804/KT2P3Z. 5 | 6 | Then extract the data by running 7 | ``` 8 | tar -xvzf PortfolioData.tar.gz 9 | ``` 10 | 11 | Then the test_sp.py can be used to run an experiment. 12 | To reproduce the result of expriements run 13 | ``` 14 | python test_sp.py --scheduler True 15 | ``` 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Benchmarking Predict-then-Optimize (PtO) Problems 2 | 3 | ## About 4 | 5 | This repository provides a comprehensive framework for benchmarking Predict-then-Optimize (PtO) problems using Decision-Focused Learning (DFL) approaches. PtO problems involve making predictions that are used as input to downstream optimization tasks, where traditional two-stage methods often lead to suboptimal solutions. DFL addresses this by training machine learning models that directly optimize for the downstream decision-making objectives. 6 | 7 | 8 | This repository contains the implementation for the paper (Accepted to Journal of Artificial Intelligence Research (JAIR)): 9 | 10 | > Mandi, J., Kotary, J., Berden, S., Mulamba, M., Bucarey, V., Guns, T., & Fioretto, F. (2024). Decision-focused learning: Foundations, state of the art, benchmark and future opportunities. Journal of Artificial Intelligence Research, 80, 1623-1701. [DOI: 10.1613/jair.1.15320](https://doi.org/10.1613/jair.1.15320) 11 | 12 | If you use this code in your research, please cite: 13 | ```bibtex 14 | @article{mandi2024decision, 15 | title={Decision-focused learning: Foundations, state of the art, benchmark and future opportunities}, 16 | author={Mandi, Jayanta and Kotary, James and Berden, Senne and Mulamba, Maxime and Bucarey, Victor and Guns, Tias and Fioretto, Ferdinando}, 17 | journal={Journal of Artificial Intelligence Research}, 18 | volume={80}, 19 | pages={1623--1701}, 20 | year={2024}, 21 | doi={10.1613/jair.1.15320} 22 | } 23 | ``` 24 | 25 | 26 | 27 | ## Installation 28 | 29 | ### Prerequisites 30 | - Python 3.7.3 (recommended) 31 | - pip or conda package manager 32 | 33 | ### Option 1: Using venv (Recommended) 34 | 35 | 1. Create and activate a virtual environment: 36 | ```bash 37 | python3 -m venv benchmarking_env 38 | source benchmarking_env/bin/activate 39 | ``` 40 | 41 | 2. Upgrade pip: 42 | ```bash 43 | pip install --upgrade pip 44 | ``` 45 | 46 | 3. Install required packages: 47 | ```bash 48 | pip install -r requirements.txt 49 | ``` 50 | 51 | ### Option 2: Using Conda 52 | 53 | 1. Install Conda by following the [official installation guide](https://conda.io/projects/conda/en/latest/user-guide/install/index.html) 54 | 55 | 2. Create and activate the environment: 56 | ```bash 57 | # Create environment 58 | conda env create -n benchmarking_env --file environment.yml 59 | 60 | # Activate on Linux/macOS 61 | conda activate benchmarking_env 62 | 63 | # Activate on Windows 64 | source activate benchmarking_env 65 | ``` 66 | 67 | ## Running Experiments 68 | 69 | Navigate to the corresponding experiment directory to run specific benchmarks. 70 | 71 | ## Contributing 72 | 73 | Feel free to open issues or submit pull requests if you find any problems or have suggestions for improvements. 74 | -------------------------------------------------------------------------------- /ShortestPath/DPO/fenchel_young.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # 3 | # Modifications from original work 4 | # 29-03-2021 (tuero@ualberta.ca) : Convert Tensorflow code to PyTorch 5 | # 6 | # Copyright 2021 The Google Research Authors. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | 20 | # Lint as: python3 21 | """Implementation of a Fenchel-Young loss using perturbation techniques.""" 22 | 23 | import torch 24 | import torch.nn as nn 25 | 26 | from DPO import perturbations 27 | 28 | 29 | class PerturbedFunc(torch.autograd.Function): 30 | """Implementation of a Fenchel Young loss.""" 31 | @staticmethod 32 | def forward(ctx, input_tensor, y_true, perturbed, batched, maximize, *args): 33 | diff = perturbed(input_tensor, *args) - y_true.type(input_tensor.dtype) 34 | if not maximize: 35 | diff = -diff 36 | # Computes per-example loss for batched inputs. 37 | if batched: 38 | loss = torch.sum(torch.reshape(diff, [list(diff.shape)[0], -1]) ** 2, dim=-1) 39 | else: # Computes loss for unbatched inputs. 40 | loss = torch.sum(diff ** 2) 41 | ctx.save_for_backward(diff) 42 | ctx.batched = batched 43 | return loss 44 | 45 | @staticmethod 46 | def backward(ctx, dy): 47 | diff, = ctx.saved_tensors 48 | batched = ctx.batched 49 | if batched: # dy has shape (batch_size,) in this case. 50 | dy = torch.reshape(dy, [list(dy.shape)[0]] + (diff.dim() - 1) * [1]) 51 | return dy * diff, None, None, None, None # original 52 | # return diff, None, None, None, None 53 | 54 | 55 | class FenchelYoungLoss(nn.Module): 56 | def __init__(self, 57 | func = None, 58 | num_samples = 1000, 59 | sigma = 0.01, 60 | noise = perturbations._GUMBEL, 61 | batched = True, 62 | maximize = True, 63 | device=None): 64 | """Initializes the Fenchel-Young loss. 65 | 66 | Args: 67 | func: the function whose argmax is to be differentiated by perturbation. 68 | num_samples: (int) the number of perturbed inputs. 69 | sigma: (float) the amount of noise to be considered 70 | noise: (str) the noise distribution to be used to sample perturbations. 71 | batched: whether inputs to the func will have a leading batch dimension 72 | (True) or consist of a single example (False). Defaults to True. 73 | maximize: (bool) whether to maximize or to minimize the input function. 74 | device: The device to create tensors on (cpu/gpu). If None given, it will 75 | default to gpu:0 if available, cpu otherwise. 76 | """ 77 | super().__init__() 78 | self._batched = batched 79 | self._maximize = maximize 80 | self.func = func 81 | self.perturbed = perturbations.perturbed(func=func, 82 | num_samples=num_samples, 83 | sigma=sigma, 84 | noise=noise, 85 | batched=batched, 86 | device=device) 87 | 88 | def forward(self, input_tensor, y_true, *args): 89 | return PerturbedFunc.apply(input_tensor, y_true, self.perturbed, self._batched, self._maximize, *args) 90 | 91 | -------------------------------------------------------------------------------- /ShortestPath/Trainer/data_utils.py: -------------------------------------------------------------------------------- 1 | from Trainer.optimizer_module import spsolver 2 | import numpy as np 3 | import pytorch_lightning as pl 4 | import torch 5 | from torch.utils.data import DataLoader 6 | ###################################### Wrapper ######################################### 7 | class datawrapper(): 8 | def __init__(self, x,y, sol=None, solver= spsolver ): 9 | self.x = x 10 | self.y = y 11 | if sol is None: 12 | sol = [] 13 | for i in range(len(y)): 14 | sol.append( solver.shortest_pathsolution(y[i]) ) 15 | sol = np.array(sol).astype(np.float32) 16 | self.sol = sol 17 | 18 | def __len__(self): 19 | return len(self.y) 20 | 21 | def __getitem__(self, index): 22 | return self.x[index], self.y[index],self.sol[index] 23 | 24 | 25 | ###################################### Dataloader ######################################### 26 | 27 | class ShortestPathDataModule(pl.LightningDataModule): 28 | def __init__(self, train_df,valid_df,test_df,generator, normalize=False, batchsize: int = 32, num_workers: int=4): 29 | super().__init__() 30 | self.train_df = train_df 31 | self.valid_df = valid_df 32 | self.test_df = test_df 33 | self.batchsize = batchsize 34 | self.generator = generator 35 | self.num_workers = num_workers 36 | 37 | 38 | def train_dataloader(self): 39 | return DataLoader(self.train_df, batch_size=self.batchsize,generator= self.generator, num_workers=self.num_workers) 40 | 41 | def val_dataloader(self): 42 | return DataLoader(self.valid_df, batch_size=self.batchsize,generator= self.generator, num_workers=self.num_workers) 43 | 44 | def test_dataloader(self): 45 | return DataLoader(self.test_df, batch_size=1000, num_workers=self.num_workers) 46 | -------------------------------------------------------------------------------- /ShortestPath/Trainer/diff_layer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import torch 3 | from torch import nn, optim 4 | from torch.autograd import Variable 5 | import torch.nn.functional as F 6 | import pytorch_lightning as pl 7 | import numpy as np 8 | from Trainer.utils import batch_solve 9 | 10 | def SPOlayer(solver,minimize = True): 11 | mm = 1 if minimize else -1 12 | class SPOlayer_cls(torch.autograd.Function): 13 | @staticmethod 14 | def forward(ctx, y_hat,y_true,sol_true ): 15 | sol_hat = batch_solve(solver, y_hat) 16 | 17 | ctx.save_for_backward(y_hat,y_true,sol_true) 18 | 19 | return ( mm*(sol_hat -sol_true)*y_true).sum() 20 | 21 | @staticmethod 22 | def backward(ctx, grad_output): 23 | y_hat,y_true,sol_true = ctx.saved_tensors 24 | y_spo = 2*y_hat - y_true 25 | sol_spo = batch_solve(solver,y_spo) 26 | return (sol_true - sol_spo)*mm, None, None 27 | return SPOlayer_cls.apply 28 | 29 | 30 | def DBBlayer(solver,lambda_val=1., minimize = True): 31 | mm = 1 if minimize else -1 32 | class DBBlayer_cls(torch.autograd.Function): 33 | @staticmethod 34 | def forward(ctx, y_hat,y_true,sol_true ): 35 | sol_hat = batch_solve(solver, y_hat) 36 | 37 | ctx.save_for_backward(y_hat,y_true,sol_true, sol_hat) 38 | 39 | return sol_hat 40 | 41 | @staticmethod 42 | def backward(ctx, grad_output): 43 | """ 44 | In the backward pass we compute gradient to minimize regret 45 | """ 46 | y_hat,y_true,sol_true, sol_hat= ctx.saved_tensors 47 | y_perturbed = y_hat + mm* lambda_val* grad_output 48 | sol_perturbed = batch_solve(solver, y_perturbed) 49 | 50 | return -mm*(sol_hat - sol_perturbed)/lambda_val, None, None 51 | return DBBlayer_cls.apply -------------------------------------------------------------------------------- /ShortestPath/Trainer/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn, optim 3 | from torch.autograd import Variable 4 | import torch.nn.functional as F 5 | from torch.utils.data import DataLoader 6 | import pytorch_lightning as pl 7 | import numpy as np 8 | 9 | def batch_solve(solver, y,relaxation =False): 10 | sol = [] 11 | for i in range(len(y)): 12 | sol.append( solver.solution_fromtorch(y[i]).reshape(1,-1) ) 13 | return torch.cat(sol,0).float() 14 | 15 | 16 | def regret_list(solver, y_hat,y_true, sol_true, minimize= True): 17 | ''' 18 | computes regret of more than one cost vectors 19 | ''' 20 | mm = 1 if minimize else -1 21 | sol_hat = batch_solve(solver, y_hat ) 22 | return ((mm*(sol_hat - sol_true)*y_true).sum(1)/ (sol_true*y_true).sum(1) ) 23 | def abs_regret_list(solver,y_hat,y_true,sol_true,minimize = True): 24 | mm = 1 if minimize else -1 25 | sol_hat = batch_solve(solver, y_hat ) 26 | return ((mm*(sol_hat - sol_true)*y_true).sum(1) ) 27 | 28 | def regret_fn(solver, y_hat,y_true, sol_true, minimize= True): 29 | return regret_list(solver, y_hat,y_true, sol_true, minimize= minimize).mean() 30 | 31 | def abs_regret_fn(solver, y_hat,y_true, sol_true, minimize= True): 32 | return abs_regret_list(solver, y_hat,y_true, sol_true, minimize= minimize).mean() 33 | 34 | 35 | def growcache(solver, cache, y_hat): 36 | ''' 37 | cache is torch array [currentpoolsize,48] 38 | y_hat is torch array [batch_size,48] 39 | ''' 40 | sol = batch_solve(solver, y_hat,relaxation =False).detach().numpy() 41 | cache_np = cache.detach().numpy() 42 | cache_np = np.unique(np.append(cache_np,sol,axis=0),axis=0) 43 | # torch has no unique function, so we need to do this 44 | return torch.from_numpy(cache_np).float() 45 | 46 | 47 | def cachingsolver(cache, y_hat, minimize= True): 48 | mm = 1 if minimize else -1 49 | solutions = [] 50 | for ii in range(len(y_hat)): 51 | val,ind = torch.min(((cache)*y_hat[ii]*mm).sum(dim=1),0) 52 | solutions.append(cache[ind]) 53 | 54 | return torch.stack(solutions).float() -------------------------------------------------------------------------------- /ShortestPath/config.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "model": "IntOpt", 4 | "N": 1000, 5 | "noise": 0.5, 6 | "deg": 1, 7 | "lr": 0.1, 8 | "thr": 0.001, 9 | "damping": 1.0 10 | }, 11 | { 12 | "model": "IntOpt", 13 | "N": 1000, 14 | "noise": 0.5, 15 | "deg": 2, 16 | "lr": 0.1, 17 | "thr": 0.1, 18 | "damping": 0.01 19 | }, 20 | { 21 | "model": "IntOpt", 22 | "N": 1000, 23 | "noise": 0.5, 24 | "deg": 4, 25 | "lr": 0.1, 26 | "thr": 1e-06, 27 | "damping": 0.1 28 | }, 29 | { 30 | "model": "IntOpt", 31 | "N": 1000, 32 | "noise": 0.5, 33 | "deg": 6, 34 | "lr": 0.1, 35 | "thr": 0.001, 36 | "damping": 1e-06 37 | }, 38 | { 39 | "model": "IntOpt", 40 | "N": 1000, 41 | "noise": 0.5, 42 | "deg": 8, 43 | "lr": 0.1, 44 | "thr": 10.0, 45 | "damping": 1.0 46 | }, 47 | { 48 | "model": "CachingPO", 49 | "loss": "pairwise_diff", 50 | "N": 1000, 51 | "noise": 0.5, 52 | "deg": 1, 53 | "lr": 0.1 54 | }, 55 | { 56 | "model": "CachingPO", 57 | "loss": "pairwise_diff", 58 | "N": 1000, 59 | "noise": 0.5, 60 | "deg": 2, 61 | "lr": 0.1 62 | }, 63 | { 64 | "model": "CachingPO", 65 | "loss": "pairwise_diff", 66 | "N": 1000, 67 | "noise": 0.5, 68 | "deg": 4, 69 | "lr": 0.5 70 | }, 71 | { 72 | "model": "CachingPO", 73 | "loss": "pairwise_diff", 74 | "N": 1000, 75 | "noise": 0.5, 76 | "deg": 6, 77 | "lr": 0.1 78 | }, 79 | { 80 | "model": "CachingPO", 81 | "loss": "pairwise_diff", 82 | "N": 1000, 83 | "noise": 0.5, 84 | "deg": 8, 85 | "lr": 1.0 86 | }, 87 | { 88 | "model": "CachingPO", 89 | "loss": "pairwise", 90 | "N": 1000, 91 | "noise": 0.5, 92 | "deg": 1, 93 | "lr": 1.0, 94 | "tau": 1.0 95 | }, 96 | { 97 | "model": "CachingPO", 98 | "loss": "pairwise", 99 | "N": 1000, 100 | "noise": 0.5, 101 | "deg": 2, 102 | "lr": 0.1, 103 | "tau": 0.5 104 | }, 105 | { 106 | "model": "CachingPO", 107 | "loss": "pairwise", 108 | "N": 1000, 109 | "noise": 0.5, 110 | "deg": 4, 111 | "lr": 0.1, 112 | "tau": 1.0 113 | }, 114 | { 115 | "model": "CachingPO", 116 | "loss": "pairwise", 117 | "N": 1000, 118 | "noise": 0.5, 119 | "deg": 6, 120 | "lr": 1.0, 121 | "tau": 10.0 122 | }, 123 | { 124 | "model": "CachingPO", 125 | "loss": "pairwise", 126 | "N": 1000, 127 | "noise": 0.5, 128 | "deg": 8, 129 | "lr": 1.0, 130 | "tau": 10.0 131 | }, 132 | { 133 | "model": "CachingPO", 134 | "loss": "listwise", 135 | "N": 1000, 136 | "noise": 0.5, 137 | "deg": 1, 138 | "lr": 0.1, 139 | "tau": 0.1 140 | }, 141 | { 142 | "model": "CachingPO", 143 | "loss": "listwise", 144 | "N": 1000, 145 | "noise": 0.5, 146 | "deg": 2, 147 | "lr": 0.1, 148 | "tau": 0.1 149 | }, 150 | { 151 | "model": "CachingPO", 152 | "loss": "listwise", 153 | "N": 1000, 154 | "noise": 0.5, 155 | "deg": 4, 156 | "lr": 1.0, 157 | "tau": 0.1 158 | }, 159 | { 160 | "model": "CachingPO", 161 | "loss": "listwise", 162 | "N": 1000, 163 | "noise": 0.5, 164 | "deg": 6, 165 | "lr": 0.1, 166 | "tau": 1.0 167 | }, 168 | { 169 | "model": "CachingPO", 170 | "loss": "listwise", 171 | "N": 1000, 172 | "noise": 0.5, 173 | "deg": 8, 174 | "lr": 1.0, 175 | "tau": 1.0 176 | } 177 | ] -------------------------------------------------------------------------------- /ShortestPath/imle/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PredOpt/predopt-benchmarks/1a8e048c5aa640f73a05e29878b8e1e8f6f73610/ShortestPath/imle/__init__.py -------------------------------------------------------------------------------- /ShortestPath/imle/noise.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import math 4 | 5 | import torch 6 | from torch import Tensor, Size 7 | from torch.distributions.gamma import Gamma 8 | 9 | from abc import ABC, abstractmethod 10 | 11 | from typing import Optional 12 | 13 | import logging 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | class BaseNoiseDistribution(ABC): 19 | def __init__(self): 20 | super().__init__() 21 | 22 | @abstractmethod 23 | def sample(self, 24 | shape: Size) -> Tensor: 25 | raise NotImplementedError 26 | 27 | 28 | class SumOfGammaNoiseDistribution(BaseNoiseDistribution): 29 | r""" 30 | Creates a generator of samples for the Sum-of-Gamma distribution [1], parameterized 31 | by :attr:`k`, :attr:`nb_iterations`, and :attr:`device`. 32 | 33 | [1] Mathias Niepert, Pasquale Minervini, Luca Franceschi - Implicit MLE: Backpropagating Through Discrete 34 | Exponential Family Distributions. NeurIPS 2021 (https://arxiv.org/abs/2106.01798) 35 | 36 | Example:: 37 | 38 | >>> import torch 39 | >>> noise_distribution = SumOfGammaNoiseDistribution(k=5, nb_iterations=100) 40 | >>> noise_distribution.sample(torch.Size([5])) 41 | tensor([ 0.2504, 0.0112, 0.5466, 0.0051, -0.1497]) 42 | 43 | Args: 44 | k (float): k parameter -- see [1] for more details. 45 | nb_iterations (int): number of iterations for estimating the sample. 46 | device (torch.devicde): device where to store samples. 47 | """ 48 | def __init__(self, 49 | k: float, 50 | nb_iterations: int = 10, 51 | device: Optional[torch.device] = None): 52 | super().__init__() 53 | self.k = k 54 | self.nb_iterations = nb_iterations 55 | self.device = device 56 | 57 | def sample(self, 58 | shape: Size) -> Tensor: 59 | samples = torch.zeros(size=shape, device=self.device) 60 | for i in range(1, self.nb_iterations + 1): 61 | concentration = torch.tensor(1. / self.k, device=self.device) 62 | rate = torch.tensor(i / self.k, device=self.device) 63 | 64 | gamma = Gamma(concentration=concentration, rate=rate) 65 | samples = samples + gamma.sample(sample_shape=shape).to(self.device) 66 | samples = (samples - math.log(self.nb_iterations)) / self.k 67 | return samples.to(self.device) 68 | -------------------------------------------------------------------------------- /ShortestPath/imle/target.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from torch import Tensor 4 | from abc import ABC, abstractmethod 5 | 6 | import logging 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class BaseTargetDistribution(ABC): 12 | def __init__(self): 13 | super().__init__() 14 | 15 | @abstractmethod 16 | def params(self, 17 | theta: Tensor, 18 | dy: Tensor) -> Tensor: 19 | raise NotImplementedError 20 | 21 | 22 | class TargetDistribution(BaseTargetDistribution): 23 | r""" 24 | Creates a generator of target distributions parameterized by :attr:`alpha` and :attr:`beta`. 25 | 26 | Example:: 27 | 28 | >>> import torch 29 | >>> target_distribution = TargetDistribution(alpha=1.0, beta=1.0) 30 | >>> target_distribution.params(theta=torch.tensor([1.0]), dy=torch.tensor([1.0])) 31 | tensor([2.]) 32 | 33 | Args: 34 | alpha (float): weight of the initial distribution parameters theta 35 | beta (float): weight of the downstream gradient dy 36 | """ 37 | def __init__(self, 38 | alpha: float = 1.0, 39 | beta: float = 1.0): 40 | super().__init__() 41 | self.alpha = alpha 42 | self.beta = beta 43 | 44 | def params(self, 45 | theta: Tensor, 46 | dy: Tensor) -> Tensor: 47 | theta_prime = self.alpha * theta - self.beta * dy 48 | return theta_prime 49 | -------------------------------------------------------------------------------- /ShortestPath/readme.md: -------------------------------------------------------------------------------- 1 | This directory corresponds to the Shortest path problem on a $5 \times 5$ grid. 2 | 3 | Download the shortest path datset from the repository: https://doi.org/10.48804/KT2P3Z. 4 | Then extract the data by running 5 | ``` 6 | tar -xvzf ShortestPathData.tar.gz 7 | ``` 8 | 9 | Then the test_sp.py can be used to run an experiment. 10 | To reproduce the result of expriements run 11 | ``` 12 | python test_sp.py --scheduler True 13 | ``` 14 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: benchmarking_env 2 | channels: 3 | - conda-forge 4 | - defaults 5 | - gurobi 6 | dependencies: 7 | - python=3.7.3 8 | - pandas=1.3.5 9 | - tensorboard=2.9.1 10 | - pytorch-lightning=1.6.4 11 | - torch=1.12.0 12 | - torchvision=0.13.0 13 | - scipy=1.6.3 14 | - numpy=1.21.6 15 | - scikit-learn=1.0.2 16 | - cvxpy=1.3.0 17 | - cvxpylayers=0.1.5 18 | - networkx=2.6.3 19 | - ortools=9.3.10459 20 | - qpth=0.0.15 21 | - gurobipy=9.5.2 22 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cvxpy==1.3.0 2 | cvxpylayers==0.1.5 3 | diffcp==1.0.23 4 | gurobipy==9.5.2 5 | joblib==1.4.2 6 | Markdown==3.6 7 | multidict==6.0.5 8 | networkx==2.6.3 9 | numpy==1.22.4 10 | ortools==9.3.10497 11 | pandas==1.3.5 12 | pillow==10.3.0 13 | protobuf==3.19.6 14 | pytorch-lightning==1.6.4 15 | PyYAML==6.0.1 16 | scikit-learn==1.0.2 17 | scipy==1.7.3 18 | scs==3.2.4.post1 19 | tensorboard==2.9.1 20 | torch==1.12.0 21 | torchmetrics==1.3.2 22 | torchvision==0.13.0 23 | tqdm==4.66.4 24 | -------------------------------------------------------------------------------- /warcraft/DPO/fenchel_young.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # 3 | # Modifications from original work 4 | # 29-03-2021 (tuero@ualberta.ca) : Convert Tensorflow code to PyTorch 5 | # 6 | # Copyright 2021 The Google Research Authors. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | 20 | # Lint as: python3 21 | """Implementation of a Fenchel-Young loss using perturbation techniques.""" 22 | 23 | import torch 24 | import torch.nn as nn 25 | 26 | from DPO import perturbations 27 | 28 | 29 | class PerturbedFunc(torch.autograd.Function): 30 | """Implementation of a Fenchel Young loss.""" 31 | @staticmethod 32 | def forward(ctx, input_tensor, y_true, perturbed, batched, maximize, *args): 33 | diff = perturbed(input_tensor, *args) - y_true.type(input_tensor.dtype) 34 | if not maximize: 35 | diff = -diff 36 | # Computes per-example loss for batched inputs. 37 | if batched: 38 | loss = torch.sum(torch.reshape(diff, [list(diff.shape)[0], -1]) ** 2, dim=-1) 39 | else: # Computes loss for unbatched inputs. 40 | loss = torch.sum(diff ** 2) 41 | ctx.save_for_backward(diff) 42 | ctx.batched = batched 43 | return loss 44 | 45 | @staticmethod 46 | def backward(ctx, dy): 47 | diff, = ctx.saved_tensors 48 | batched = ctx.batched 49 | if batched: # dy has shape (batch_size,) in this case. 50 | dy = torch.reshape(dy, [list(dy.shape)[0]] + (diff.dim() - 1) * [1]) 51 | return dy * diff, None, None, None, None # original 52 | # return diff, None, None, None, None 53 | 54 | 55 | class FenchelYoungLoss(nn.Module): 56 | def __init__(self, 57 | func = None, 58 | num_samples = 1000, 59 | sigma = 0.01, 60 | noise = perturbations._GUMBEL, 61 | batched = True, 62 | maximize = True, 63 | device=None): 64 | """Initializes the Fenchel-Young loss. 65 | 66 | Args: 67 | func: the function whose argmax is to be differentiated by perturbation. 68 | num_samples: (int) the number of perturbed inputs. 69 | sigma: (float) the amount of noise to be considered 70 | noise: (str) the noise distribution to be used to sample perturbations. 71 | batched: whether inputs to the func will have a leading batch dimension 72 | (True) or consist of a single example (False). Defaults to True. 73 | maximize: (bool) whether to maximize or to minimize the input function. 74 | device: The device to create tensors on (cpu/gpu). If None given, it will 75 | default to gpu:0 if available, cpu otherwise. 76 | """ 77 | super().__init__() 78 | self._batched = batched 79 | self._maximize = maximize 80 | self.func = func 81 | self.perturbed = perturbations.perturbed(func=func, 82 | num_samples=num_samples, 83 | sigma=sigma, 84 | noise=noise, 85 | batched=batched, 86 | device=device) 87 | 88 | def forward(self, input_tensor, y_true, *args): 89 | return PerturbedFunc.apply(input_tensor, y_true, self.perturbed, self._batched, self._maximize, *args) 90 | 91 | -------------------------------------------------------------------------------- /warcraft/Trainer/computervisionmodels.py: -------------------------------------------------------------------------------- 1 | from math import sqrt 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import torchvision 7 | import pytorch_lightning as pl 8 | 9 | def get_model(model_name, out_features, in_channels, arch_params): 10 | preloaded_models = {"ResNet18": torchvision.models.resnet18} 11 | 12 | own_models = {"ConvNet": ConvNet, "MLP": MLP, "PureConvNet": PureConvNet, "CombResnet18": CombRenset18} 13 | 14 | if model_name in preloaded_models: 15 | model = preloaded_models[model_name](pretrained=False, num_classes=out_features, **arch_params) 16 | 17 | # Hacking ResNets to expect 'in_channels' input channel (and not three) 18 | del model.conv1 19 | model.conv1 = nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False) 20 | return model 21 | elif model_name in own_models: 22 | return own_models[model_name](out_features=out_features, in_channels=in_channels, **arch_params) 23 | else: 24 | raise ValueError(f"Model name {model_name} not recognized!") 25 | 26 | 27 | def dim_after_conv2D(input_dim, stride, kernel_size): 28 | return (input_dim - kernel_size + 2) // stride 29 | 30 | 31 | class CombRenset18(nn.Module): 32 | 33 | def __init__(self, out_features, in_channels): 34 | super().__init__() 35 | self.resnet_model = torchvision.models.resnet18(pretrained=False, num_classes=out_features) 36 | del self.resnet_model.conv1 37 | self.resnet_model.conv1 = nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False) 38 | output_shape = (int(sqrt(out_features)), int(sqrt(out_features))) 39 | self.pool = nn.AdaptiveMaxPool2d(output_shape) 40 | #self.last_conv = nn.Conv2d(128, 1, kernel_size=1, stride=1) 41 | 42 | 43 | def forward(self, x): 44 | x = self.resnet_model.conv1(x) 45 | x = self.resnet_model.bn1(x) 46 | x = self.resnet_model.relu(x) 47 | x = self.resnet_model.maxpool(x) 48 | x = self.resnet_model.layer1(x) 49 | #x = self.resnet_model.layer2(x) 50 | #x = self.resnet_model.layer3(x) 51 | #x = self.last_conv(x) 52 | x = self.pool(x) 53 | x = x.mean(dim=1) 54 | return x 55 | 56 | 57 | class ConvNet(torch.nn.Module): 58 | def __init__(self, out_features, in_channels, kernel_size, stride, linear_layer_size, channels_1, channels_2): 59 | super().__init__() 60 | self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=channels_1, kernel_size=kernel_size, stride=stride) 61 | self.conv2 = nn.Conv2d(in_channels=channels_1, out_channels=channels_2, kernel_size=kernel_size, stride=stride) 62 | 63 | output_shape = (4, 4) 64 | self.pool = nn.AdaptiveAvgPool2d(output_shape) 65 | 66 | self.fc1 = nn.Linear(in_features=output_shape[0] * output_shape[1] * channels_2, out_features=linear_layer_size) 67 | self.fc2 = nn.Linear(in_features=linear_layer_size, out_features=out_features) 68 | 69 | def forward(self, x): 70 | batch_size = x.shape[0] 71 | x = F.relu(self.conv1(x)) 72 | x = F.max_pool2d(x, 2, 2) 73 | x = F.relu(self.conv2(x)) 74 | x = self.pool(x) 75 | x = x.view(batch_size, -1) 76 | x = F.relu(self.fc1(x)) 77 | x = self.fc2(x) 78 | return x 79 | 80 | 81 | class MLP(torch.nn.Module): 82 | def __init__(self, out_features, in_channels, hidden_layer_size): 83 | super().__init__() 84 | input_dim = in_channels * 40 * 20 85 | self.fc1 = nn.Linear(in_features=input_dim, out_features=hidden_layer_size) 86 | self.fc2 = nn.Linear(in_features=hidden_layer_size, out_features=out_features) 87 | 88 | def forward(self, x): 89 | batch_size = x.shape[0] 90 | x = x.view(batch_size, -1) 91 | x = torch.tanh(self.fc1(x)) 92 | x = self.fc2(x) 93 | return x 94 | 95 | 96 | class PureConvNet(torch.nn.Module): 97 | 98 | act_funcs = {"relu": F.relu, "tanh": F.tanh, "identity": lambda x: x} 99 | 100 | def __init__(self, out_features, pooling, use_second_conv, kernel_size, in_channels, 101 | channels_1=20, channels_2=20, act_func="relu"): 102 | super().__init__() 103 | self.use_second_conv = use_second_conv 104 | 105 | self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=channels_1, kernel_size=kernel_size, stride=1) 106 | self.conv2 = nn.Conv2d(in_channels=channels_1, out_channels=channels_2, kernel_size=kernel_size, stride=1) 107 | 108 | output_shape = (int(sqrt(out_features)), int(sqrt(out_features))) 109 | if pooling == "average": 110 | self.pool = nn.AdaptiveAvgPool2d(output_shape) 111 | elif pooling == "max": 112 | self.pool = nn.AdaptiveMaxPool2d(output_shape) 113 | 114 | self.conv3 = nn.Conv2d(in_channels=channels_2 if use_second_conv else channels_1, 115 | out_channels=1, kernel_size=1, stride=1) 116 | self.act_func = PureConvNet.act_funcs[act_func] 117 | 118 | def forward(self, x): 119 | x = self.act_func(self.conv1(x)) 120 | if self.use_second_conv: 121 | x = self.act_func(self.conv2(x)) 122 | x = self.pool(x) 123 | x = self.conv3(x) 124 | return x 125 | -------------------------------------------------------------------------------- /warcraft/Trainer/data_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | from torch.utils.data import Dataset, DataLoader 4 | import pytorch_lightning as pl 5 | import torch 6 | 7 | class WarcraftImageDataset(Dataset): 8 | def __init__(self, inputs, labels, true_weights): 9 | self.inputs = inputs 10 | self.labels = labels 11 | self.true_weights = true_weights 12 | 13 | def __len__(self): 14 | return len(self.labels) 15 | 16 | def __getitem__(self, idx): 17 | 18 | return self.inputs[idx], self.labels[idx], self.true_weights[idx] 19 | 20 | def return_trainlabel(data_dir): 21 | train_prefix = "train" 22 | 23 | train_labels = np.load(os.path.join(data_dir, train_prefix + "_shortest_paths.npy")) 24 | train_labels = np.unique(train_labels,axis=0) 25 | return torch.from_numpy(train_labels) 26 | 27 | 28 | class WarcraftDataModule(pl.LightningDataModule): 29 | def __init__(self, data_dir, use_test_set=True, normalize=True, batch_size=70, generator=None,num_workers=4): 30 | super().__init__() 31 | self.batch_size = batch_size 32 | self.generator = generator 33 | self.num_workers = num_workers 34 | 35 | train_prefix = "train" 36 | val_prefix = "val" 37 | test_prefix = "test" 38 | data_suffix = "maps" 39 | train_inputs = np.load(os.path.join(data_dir, train_prefix + "_" + data_suffix + ".npy")).astype(np.float32) 40 | train_inputs = train_inputs.transpose(0, 3, 1, 2) # channel first 41 | 42 | val_inputs = np.load(os.path.join(data_dir, val_prefix + "_" + data_suffix + ".npy")).astype(np.float32) 43 | val_inputs = val_inputs.transpose(0, 3, 1, 2) # channel first 44 | if use_test_set: 45 | test_inputs = np.load(os.path.join(data_dir, test_prefix + "_" + data_suffix + ".npy")).astype(np.float32) 46 | test_inputs = test_inputs.transpose(0, 3, 1, 2) # channel first 47 | 48 | train_labels = np.load(os.path.join(data_dir, train_prefix + "_shortest_paths.npy")) 49 | train_true_weights = np.load(os.path.join(data_dir, train_prefix + "_vertex_weights.npy")).astype(np.float32) 50 | if normalize: 51 | mean, std = ( 52 | np.mean(train_inputs, axis=(0, 2, 3), keepdims=True), 53 | np.std(train_inputs, axis=(0, 2, 3), keepdims=True), 54 | ) 55 | train_inputs -= mean 56 | train_inputs /= std 57 | val_inputs -= mean 58 | val_inputs /= std 59 | if use_test_set: 60 | test_inputs -= mean 61 | test_inputs /= std 62 | val_labels = np.load(os.path.join(data_dir, val_prefix + "_shortest_paths.npy")) 63 | val_true_weights = np.load(os.path.join(data_dir, val_prefix + "_vertex_weights.npy")).astype(np.float32) 64 | val_full_images = np.load(os.path.join(data_dir, val_prefix + "_maps.npy")) 65 | if use_test_set: 66 | test_labels = np.load(os.path.join(data_dir, test_prefix + "_shortest_paths.npy")) 67 | test_true_weights = np.load(os.path.join(data_dir, test_prefix + "_vertex_weights.npy")).astype(np.float32) 68 | # test_full_images = np.load(os.path.join(data_dir, test_prefix + "_maps.npy")) 69 | self.training_data = WarcraftImageDataset(train_inputs, train_labels, train_true_weights) 70 | self.val_data = WarcraftImageDataset(val_inputs, val_labels, val_true_weights) 71 | if use_test_set: 72 | self.test_data = WarcraftImageDataset(test_inputs, test_labels, test_true_weights) 73 | 74 | def denormalize(x): 75 | return (x * std) + mean 76 | 77 | self.metadata = { 78 | "input_image_size": val_full_images[0].shape[1], 79 | "output_features": val_true_weights[0].shape[0] * val_true_weights[0].shape[1], 80 | "num_channels": val_full_images[0].shape[-1], 81 | "output_shape": (val_true_weights[0].shape[0] , val_true_weights[0].shape[1]), 82 | "denormalize": denormalize 83 | } 84 | 85 | 86 | 87 | 88 | def train_dataloader(self): 89 | return DataLoader(self.training_data, batch_size=self.batch_size,generator= self.generator, num_workers=self.num_workers) 90 | 91 | def val_dataloader(self): 92 | return DataLoader(self.val_data, batch_size=self.batch_size,generator= self.generator, num_workers=self.num_workers) 93 | 94 | def test_dataloader(self): 95 | return DataLoader(self.test_data, batch_size=self.batch_size,generator= self.generator, num_workers=self.num_workers) 96 | -------------------------------------------------------------------------------- /warcraft/Trainer/metric.py: -------------------------------------------------------------------------------- 1 | import pytorch_lightning as pl 2 | import torch 3 | from torch import nn, optim 4 | from torch.autograd import Variable 5 | import torch.nn.functional as F 6 | 7 | def normalized_regret(true_weights, true_paths, suggested_paths, minimize=True): 8 | mm = 1 if minimize else -1 9 | suggested_paths_costs = (suggested_paths * true_weights).sum((1,2)) 10 | true_paths_costs = (true_paths * true_weights).sum((1,2)) 11 | 12 | return mm*(( suggested_paths_costs - true_paths_costs)/true_paths_costs).mean() 13 | 14 | 15 | def regret_list(true_weights, true_paths, suggested_paths, minimize=True): 16 | mm = 1 if minimize else -1 17 | suggested_paths_costs = (suggested_paths * true_weights).sum((1,2)) 18 | true_paths_costs = (true_paths * true_weights).sum((1,2)) 19 | 20 | return mm*(( suggested_paths_costs - true_paths_costs)/true_paths_costs) 21 | 22 | def normalized_hamming(true_weights, true_paths, suggested_paths, minimize=True): 23 | errors = suggested_paths * (1.0 - true_paths) + (1.0 - suggested_paths) * true_paths 24 | # print( errors.sum((1,2)), true_paths.sum((1,2)) ) 25 | return (errors.sum((1,2))/true_paths.sum((1,2))).mean() 26 | 27 | 28 | class HammingLoss(torch.nn.Module): 29 | def forward(self, suggested, target, true_weights): 30 | errors = suggested * (1.0 - target) + (1.0 - suggested) * target 31 | return errors.mean(dim=0).sum() 32 | # return (torch.mean(suggested*(1.0-target)) + torch.mean((1.0-suggested)*target)) * 25.0 -------------------------------------------------------------------------------- /warcraft/Trainer/utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import torch 3 | import numpy as np 4 | try: 5 | import ray 6 | except ImportError as e: 7 | print(e) 8 | 9 | def maybe_parallelize(function, arg_list): 10 | """ 11 | Parallelizes execution is ray is enabled 12 | :param function: callable 13 | :param arg_list: list of function arguments (one for each execution) 14 | :return: 15 | """ 16 | # Passive ray module check 17 | if 'ray' in sys.modules and ray.is_initialized(): 18 | ray_fn = ray.remote(function) 19 | return ray.get([ray_fn.remote(arg) for arg in arg_list]) 20 | else: 21 | return [function(arg) for arg in arg_list] 22 | def shortest_pathsolution(solver, weights): 23 | ''' 24 | solver: dijkstra solver 25 | weights: torch tensor matrix 26 | ''' 27 | np_weights = weights.detach().cpu().numpy() 28 | suggested_tours = np.asarray (maybe_parallelize(solver, arg_list=list(np_weights))) 29 | return torch.from_numpy(suggested_tours).float().to(weights.device) 30 | 31 | 32 | 33 | def growcache(solver, cache, output): 34 | ''' 35 | cache is torch array [currentpoolsize,48] 36 | y_hat is torch array [batch_size,48] 37 | ''' 38 | weights = output.reshape(-1, output.shape[-1], output.shape[-1]) 39 | shortest_path = shortest_pathsolution(solver, weights).numpy() 40 | cache_np = cache.detach().numpy() 41 | cache_np = np.unique(np.append(cache_np,shortest_path,axis=0),axis=0) 42 | # torch has no unique function, so we need to do this 43 | return torch.from_numpy(cache_np).float() -------------------------------------------------------------------------------- /warcraft/comb_modules/dijkstra.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import heapq 3 | import torch 4 | from functools import partial 5 | from comb_modules.utils import get_neighbourhood_func 6 | from collections import namedtuple 7 | # from utils import maybe_parallelize 8 | 9 | DijkstraOutput = namedtuple("DijkstraOutput", ["shortest_path", "is_unique", "transitions"]) 10 | 11 | 12 | def dijkstra(matrix, neighbourhood_fn="8-grid", request_transitions=False): 13 | 14 | x_max, y_max = matrix.shape 15 | neighbors_func = partial(get_neighbourhood_func(neighbourhood_fn), x_max=x_max, y_max=y_max) 16 | 17 | costs = np.full_like(matrix, 1.0e10) 18 | costs[0][0] = matrix[0][0] 19 | num_path = np.zeros_like(matrix) 20 | num_path[0][0] = 1 21 | priority_queue = [(matrix[0][0], (0, 0))] 22 | certain = set() 23 | transitions = dict() 24 | 25 | while priority_queue: 26 | cur_cost, (cur_x, cur_y) = heapq.heappop(priority_queue) 27 | if (cur_x, cur_y) in certain: 28 | pass 29 | 30 | for x, y in neighbors_func(cur_x, cur_y): 31 | if (x, y) not in certain: 32 | if matrix[x][y] + costs[cur_x][cur_y] < costs[x][y]: 33 | costs[x][y] = matrix[x][y] + costs[cur_x][cur_y] 34 | heapq.heappush(priority_queue, (costs[x][y], (x, y))) 35 | transitions[(x, y)] = (cur_x, cur_y) 36 | num_path[x, y] = num_path[cur_x, cur_y] 37 | elif matrix[x][y] + costs[cur_x][cur_y] == costs[x][y]: 38 | num_path[x, y] += 1 39 | 40 | certain.add((cur_x, cur_y)) 41 | # retrieve the path 42 | cur_x, cur_y = x_max - 1, y_max - 1 43 | on_path = np.zeros_like(matrix) 44 | on_path[-1][-1] = 1 45 | while (cur_x, cur_y) != (0, 0): 46 | cur_x, cur_y = transitions[(cur_x, cur_y)] 47 | on_path[cur_x, cur_y] = 1.0 48 | 49 | is_unique = num_path[-1, -1] == 1 50 | 51 | if request_transitions: 52 | return DijkstraOutput(shortest_path=on_path, is_unique=is_unique, transitions=transitions) 53 | else: 54 | return DijkstraOutput(shortest_path=on_path, is_unique=is_unique, transitions=None) 55 | 56 | 57 | def get_solver(neighbourhood_fn): 58 | def solver(matrix): 59 | return dijkstra(matrix, neighbourhood_fn).shortest_path 60 | 61 | return solver 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /warcraft/comb_modules/gurobi_dijkstra.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import gurobipy as gp 3 | import numpy as np 4 | 5 | # A = nx.adjacency_matrix(G, weight=None).todense() 6 | # I = np.identity(len(A)) 7 | 8 | name_concat = lambda *s: '_'.join( list(map(str, s)) ) 9 | def ILP(matrix): 10 | x_max, y_max = matrix.shape 11 | print("weight of sink node ",matrix[-1,-1]) 12 | # row_sum_constraintmat= np.zeros((x_max, x_max*y_max)) 13 | # col_sum_constraintmat= np.zeros((y_max, x_max*y_max)) 14 | # for i in range(x_max): 15 | # row_sum_constraintmat[i,i*x_max:((i+1)*x_max)]=1 16 | 17 | # for j in range(y_max): 18 | # col_sum_constraintmat[j,np.arange(j,x_max*y_max, y_max)]=1 19 | E = [] 20 | N = [name_concat(x, y) for x in range(x_max) for y in range(y_max)] 21 | ''' 22 | The goal is to create a directed graph with (x_max*y_max) nodes. 23 | Each node is connected to its 8 neighbours- (x-1,y), (x-1,y+1),(x,y+1),(x+1,y+1), (x+1,y),(x+1,y-1), 24 | (x,y-1),(x-1,y-1). Care is taken for node which does not have 8 neighbours. 25 | ''' 26 | for i in range(x_max): 27 | for j in range(y_max): 28 | if (( (x_max-1)> i>0) & ( (y_max-1)> j>0)): 29 | x_minus,x_plus, y_minus, y_plus = -1,2,-1,2 30 | elif(i==j==0 ): 31 | x_minus,x_plus, y_minus, y_plus = 0,2,0,2 32 | elif ((i==0)&(j==y_max-1)): 33 | x_minus,x_plus, y_minus, y_plus = 0,2,-1,1 34 | elif ((i==x_max-1)&(j==0)): 35 | x_minus,x_plus, y_minus, y_plus = -1,1,0,2 36 | elif (i==0): 37 | x_minus,x_plus, y_minus, y_plus = 0,2,-1,2 38 | elif (j==0): 39 | x_minus,x_plus, y_minus, y_plus = -1,2,0,2 40 | elif ( (i== (x_max -1)) & (j== (y_max-1) )): 41 | x_minus,x_plus, y_minus, y_plus = -1,1,-1,1 42 | elif ( (i== (x_max -1))): 43 | x_minus,x_plus, y_minus, y_plus = -1,1,-1,2 44 | elif ( (j== (y_max -1))): 45 | x_minus,x_plus, y_minus, y_plus = -1,2,-1,1 46 | 47 | 48 | 49 | E.extend([ ( name_concat(i,j), name_concat(i+p,j+q)) for p in range(x_minus,x_plus) 50 | for q in range(y_minus, y_plus) if ((p!=0)|(q!=0)) ]) 51 | # E.extend([ ( name_concat(i+p,j+q), name_concat(i,j) ) for p in range(x_minus,x_plus) 52 | # for q in range(y_minus,y_plus) if ((p!=0)|(q!=0)) ]) 53 | 54 | 55 | G = nx.DiGraph() 56 | G.add_nodes_from(N) 57 | G.add_edges_from(E) 58 | 59 | A = -nx.incidence_matrix(G, oriented=True).todense() 60 | A_pos = A.copy() 61 | A_pos[A_pos==-1]=0 62 | 63 | bigM = 1e18 64 | 65 | 66 | b = np.zeros(len(A)) 67 | b[0] = 1 68 | b[-1] = -1 69 | model = gp.Model() 70 | model.setParam('OutputFlag', 0) 71 | # x = model.addMVar(shape=A.shape[1], vtype=gp.GRB.BINARY, name="x") 72 | # z = model.addMVar(shape=A.shape[0], vtype=gp.GRB.BINARY, name="z") 73 | 74 | 75 | x = model.addMVar(shape=A.shape[1],lb=0.0, ub=1.0, vtype=gp.GRB.CONTINUOUS, name="x") 76 | z = model.addMVar(shape=A.shape[0],lb=0.0, ub=1.0, vtype=gp.GRB.CONTINUOUS, name="z") 77 | 78 | # model.addConstr( z[0]==1, name="source") 79 | #### force sink node to be 1 80 | model.addConstr( z[-1]==1, name="sink") 81 | 82 | model.addConstr( A@ x == b, name="eq") 83 | model.addConstr( A_pos@ x <= z, name="eq") 84 | ''' 85 | Inequality constraint only for sink nodes, as there is no incoming edge at sink, 86 | sink node variable can't be 1 otherwise. 87 | ''' 88 | 89 | model.setObjective(matrix.flatten() @z, gp.GRB.MINIMIZE) 90 | model.optimize() 91 | 92 | if model.status==2: 93 | return z.x.reshape( x_max, y_max ) 94 | else: 95 | print(model.status) 96 | model.computeIIS() 97 | model.write("infreasible_nodeweightedSP.ilp") 98 | raise Exception("Soluion Not found") 99 | 100 | 101 | def ILP_reformulated(matrix): 102 | x_max, y_max = matrix.shape 103 | print("weight of sink node ",matrix[-1,-1]) 104 | 105 | E = [ ( name_concat(x,y,'in'), name_concat(x,y,'out')) for x in range(x_max) for y in range(y_max) ] 106 | N = [name_concat(x, y, s) for x in range(x_max) for y in range(y_max) for s in ['in','out']] 107 | ''' 108 | The goal is to create a directed graph with (x_max*y_max) nodes. 109 | Each node is connected to its 8 neighbours- (x-1,y), (x-1,y+1),(x,y+1),(x+1,y+1), (x+1,y),(x+1,y-1), 110 | (x,y-1),(x-1,y-1). Care is taken for node which does not have 8 neighbours. 111 | ''' 112 | for i in range(x_max): 113 | for j in range(y_max): 114 | if (( (x_max-1)> i>0) & ( (y_max-1)> j>0)): 115 | x_minus,x_plus, y_minus, y_plus = -1,2,-1,2 116 | elif(i==j==0 ): 117 | x_minus,x_plus, y_minus, y_plus = 0,2,0,2 118 | elif ((i==0)&(j==y_max-1)): 119 | x_minus,x_plus, y_minus, y_plus = 0,2,-1,1 120 | elif ((i==x_max-1)&(j==0)): 121 | x_minus,x_plus, y_minus, y_plus = -1,1,0,2 122 | elif (i==0): 123 | x_minus,x_plus, y_minus, y_plus = 0,2,-1,2 124 | elif (j==0): 125 | x_minus,x_plus, y_minus, y_plus = -1,2,0,2 126 | elif ( (i== (x_max -1)) & (j== (y_max-1) )): 127 | x_minus,x_plus, y_minus, y_plus = -1,1,-1,1 128 | elif ( (i== (x_max -1))): 129 | x_minus,x_plus, y_minus, y_plus = -1,1,-1,2 130 | elif ( (j== (y_max -1))): 131 | x_minus,x_plus, y_minus, y_plus = -1,2,-1,1 132 | 133 | 134 | 135 | E.extend([ ( name_concat(i,j,'out'), name_concat(i+p,j+q,'in')) for p in range(x_minus,x_plus) 136 | for q in range(y_minus, y_plus) if ((p!=0)|(q!=0)) ]) 137 | # E.extend([ ( name_concat(i+p,j+q), name_concat(i,j) ) for p in range(x_minus,x_plus) 138 | # for q in range(y_minus,y_plus) if ((p!=0)|(q!=0)) ]) 139 | G = nx.DiGraph() 140 | G.add_nodes_from(N) 141 | G.add_edges_from(E) 142 | 143 | A = -nx.incidence_matrix(G, oriented=True).todense() 144 | b = np.zeros(len(A)) 145 | b[0] = 1 146 | b[-1] = -1 147 | 148 | c = np.zeros(A.shape[1]) 149 | non_zero_edge_idx = [ i for i,k in enumerate( list(G.edges) ) if "_".join(k[0].split("_", 2)[:2]) == "_".join(k[1].split("_", 2)[:2])] 150 | c[non_zero_edge_idx] = matrix.flatten() 151 | print(c[0:10]) 152 | 153 | print(c[10:20]) 154 | 155 | print(c[100:120]) 156 | 157 | print(c[450:480]) 158 | 159 | model = gp.Model() 160 | model.setParam('OutputFlag', 0) 161 | # x = model.addMVar(shape=A.shape[1], vtype=gp.GRB.BINARY, name="x") 162 | x = model.addMVar(shape=A.shape[1], lb=0.0, ub=1.0, vtype=gp.GRB.CONTINUOUS, name="x") 163 | model.setObjective(c @x, gp.GRB.MINIMIZE) 164 | model.addConstr(A @ x == b, name="eq") 165 | model.optimize() 166 | 167 | if model.status==2: 168 | sol = x.x[non_zero_edge_idx] 169 | return sol.reshape( x_max, y_max ) 170 | else: 171 | print(model.status) 172 | model.computeIIS() 173 | model.write("infreasible_nodeweightedSP.ilp") 174 | raise Exception("Soluion Not found") 175 | -------------------------------------------------------------------------------- /warcraft/comb_modules/utils.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import functools 3 | import numpy as np 4 | 5 | 6 | def neighbours_8(x, y, x_max, y_max): 7 | deltas_x = (-1, 0, 1) 8 | deltas_y = (-1, 0, 1) 9 | for (dx, dy) in itertools.product(deltas_x, deltas_y): 10 | x_new, y_new = x + dx, y + dy 11 | if 0 <= x_new < x_max and 0 <= y_new < y_max and (dx, dy) != (0, 0): 12 | yield x_new, y_new 13 | 14 | 15 | def neighbours_4(x, y, x_max, y_max): 16 | for (dx, dy) in [(1, 0), (0, 1), (0, -1), (-1, 0)]: 17 | x_new, y_new = x + dx, y + dy 18 | if 0 <= x_new < x_max and 0 <= y_new < y_max and (dx, dy) != (0, 0): 19 | yield x_new, y_new 20 | 21 | 22 | def get_neighbourhood_func(neighbourhood_fn): 23 | if neighbourhood_fn == "4-grid": 24 | return neighbours_4 25 | elif neighbourhood_fn == "8-grid": 26 | return neighbours_8 27 | else: 28 | raise Exception(f"neighbourhood_fn of {neighbourhood_fn} not possible") 29 | 30 | 31 | def edges_from_vertex(x, y, N, neighbourhood_fn): 32 | v = (x, y) 33 | neighbours = get_neighbourhood_func(neighbourhood_fn)(*v, x_max=N, y_max=N) 34 | v_edges = [ 35 | (*v, *vn) for vn in neighbours if vertex_index(v, N) < vertex_index(vn, N) 36 | ] # Enforce ordering on vertices 37 | return v_edges 38 | 39 | 40 | def vertex_index(v, dim): 41 | x, y = v 42 | return x * dim + y 43 | 44 | 45 | @functools.lru_cache(32) 46 | def edges_from_grid(N, neighbourhood_fn): 47 | all_vertices = itertools.product(range(N), range(N)) 48 | all_edges = [edges_from_vertex(x, y, N, neighbourhood_fn=neighbourhood_fn) for x, y in all_vertices] 49 | all_edges_flat = sum(all_edges, []) 50 | all_edges_flat_unique = list(set(all_edges_flat)) 51 | return np.asarray(all_edges_flat_unique) 52 | 53 | 54 | @functools.lru_cache(32) 55 | def cached_vertex_grid_to_edges_grid_coords(grid_dim: int): 56 | edges_grid_idxs = edges_from_grid(grid_dim, neighbourhood_fn="4-grid") 57 | return edges_grid_idxs[:, 0], edges_grid_idxs[:, 1], edges_grid_idxs[:, 2], edges_grid_idxs[:, 3] 58 | 59 | 60 | @functools.lru_cache(32) 61 | def cached_vertex_grid_to_edges(grid_dim: int): 62 | x, y, xn, yn = cached_vertex_grid_to_edges_grid_coords(grid_dim) 63 | return np.vstack([vertex_index((x, y), grid_dim), vertex_index((xn, yn), grid_dim)]).T 64 | -------------------------------------------------------------------------------- /warcraft/config.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "model": "baseline", 4 | "img_size": 12, 5 | "loss": "mse", 6 | "lr": 0.001 7 | }, 8 | { 9 | "model": "baseline", 10 | "img_size": 30, 11 | "loss": "mse", 12 | "lr": 0.01 13 | }, 14 | { 15 | "model": "baseline", 16 | "img_size": 24, 17 | "loss": "mse", 18 | "lr": 0.001 19 | }, 20 | { 21 | "model": "baseline", 22 | "img_size": 18, 23 | "loss": "mse", 24 | "lr": 0.0005 25 | }, 26 | { 27 | "model": "SPO", 28 | "img_size": 12, 29 | "lr": 0.005 30 | }, 31 | { 32 | "model": "SPO", 33 | "img_size": 30, 34 | "lr": 0.0005 35 | }, 36 | { 37 | "model": "SPO", 38 | "img_size": 24, 39 | "lr": 0.005 40 | }, 41 | { 42 | "model": "SPO", 43 | "img_size": 18, 44 | "lr": 0.01 45 | }, 46 | { 47 | "model": "DBB", 48 | "img_size": 12, 49 | "lr": 0.001, 50 | "lambda_val": 10.0 51 | }, 52 | { 53 | "model": "DBB", 54 | "img_size": 30, 55 | "lr": 0.005, 56 | "lambda_val": 10.0 57 | }, 58 | { 59 | "model": "DBB", 60 | "img_size": 24, 61 | "lr": 0.001, 62 | "lambda_val": 100.0 63 | }, 64 | { 65 | "model": "DBB", 66 | "img_size": 18, 67 | "lr": 0.001, 68 | "lambda_val": 10.0 69 | }, 70 | { 71 | "model": "FenchelYoung", 72 | "img_size": 12, 73 | "lr": 0.01, 74 | "sigma": 0.01 75 | }, 76 | { 77 | "model": "FenchelYoung", 78 | "img_size": 30, 79 | "lr": 0.001, 80 | "sigma": 0.01 81 | }, 82 | { 83 | "model": "FenchelYoung", 84 | "img_size": 24, 85 | "lr": 0.01, 86 | "sigma": 0.01 87 | }, 88 | { 89 | "model": "FenchelYoung", 90 | "img_size": 18, 91 | "lr": 0.01, 92 | "sigma": 0.01 93 | }, 94 | { 95 | "model": "IMLE", 96 | "img_size": 12, 97 | "lr": 0.001, 98 | "beta": 10.0, 99 | "temperature": 0.05, 100 | "k": 50 101 | }, 102 | { 103 | "model": "IMLE", 104 | "img_size": 30, 105 | "lr": 0.001, 106 | "beta": 100.0, 107 | "temperature": 0.05, 108 | "k": 50 109 | }, 110 | { 111 | "model": "IMLE", 112 | "img_size": 24, 113 | "lr": 0.001, 114 | "beta": 10.0, 115 | "temperature": 0.05, 116 | "k": 50 117 | }, 118 | { 119 | "model": "IMLE", 120 | "img_size": 18, 121 | "lr": 0.01, 122 | "beta": 10.0, 123 | "temperature": 0.05, 124 | "k": 5 125 | }, 126 | { 127 | "model": "CachingPO", 128 | "loss": "MAP_c", 129 | "img_size": 12, 130 | "lr": 0.005 131 | }, 132 | { 133 | "model": "CachingPO", 134 | "loss": "MAP_c", 135 | "img_size": 30, 136 | "lr": 0.01 137 | }, 138 | { 139 | "model": "CachingPO", 140 | "loss": "MAP_c", 141 | "img_size": 24, 142 | "lr": 0.005 143 | }, 144 | { 145 | "model": "CachingPO", 146 | "loss": "MAP_c", 147 | "img_size": 18, 148 | "lr": 0.005 149 | }, 150 | { 151 | "model": "CachingPO", 152 | "loss": "pairwise_diff", 153 | "img_size": 12, 154 | "lr": 0.005 155 | }, 156 | { 157 | "model": "CachingPO", 158 | "loss": "pairwise_diff", 159 | "img_size": 30, 160 | "lr": 0.005 161 | }, 162 | { 163 | "model": "CachingPO", 164 | "loss": "pairwise_diff", 165 | "img_size": 24, 166 | "lr": 0.005 167 | }, 168 | { 169 | "model": "CachingPO", 170 | "loss": "pairwise_diff", 171 | "img_size": 18, 172 | "lr": 0.005 173 | }, 174 | { 175 | "model": "CachingPO", 176 | "loss": "pairwise", 177 | "img_size": 12, 178 | "lr": 0.01, 179 | "tau": 0.1 180 | }, 181 | { 182 | "model": "CachingPO", 183 | "loss": "pairwise", 184 | "img_size": 30, 185 | "lr": 0.01, 186 | "tau": 0.1 187 | }, 188 | { 189 | "model": "CachingPO", 190 | "loss": "pairwise", 191 | "img_size": 24, 192 | "lr": 0.01, 193 | "tau": 0.1 194 | }, 195 | { 196 | "model": "CachingPO", 197 | "loss": "pairwise", 198 | "img_size": 18, 199 | "lr": 0.005, 200 | "tau": 0.1 201 | }, 202 | { 203 | "model": "CachingPO", 204 | "loss": "listwise", 205 | "img_size": 12, 206 | "lr": 0.005, 207 | "tau": 0.5 208 | }, 209 | { 210 | "model": "CachingPO", 211 | "loss": "listwise", 212 | "img_size": 30, 213 | "lr": 0.005, 214 | "tau": 1.0 215 | }, 216 | { 217 | "model": "CachingPO", 218 | "loss": "listwise", 219 | "img_size": 24, 220 | "lr": 0.005, 221 | "tau": 0.5 222 | }, 223 | { 224 | "model": "CachingPO", 225 | "loss": "listwise", 226 | "img_size": 18, 227 | "lr": 0.005, 228 | "tau": 0.05 229 | } 230 | ] -------------------------------------------------------------------------------- /warcraft/data/data_prep.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | pip install --upgrade --no-cache-dir gdown 3 | gdown 16heKlpA9cBq8GXnAtBJgQCYUgV9LQ-Rw 4 | tar -xvzf warcaft-data.tar.gz 5 | rm warcaft-data.tar.gz 6 | -------------------------------------------------------------------------------- /warcraft/imle/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PredOpt/predopt-benchmarks/1a8e048c5aa640f73a05e29878b8e1e8f6f73610/warcraft/imle/__init__.py -------------------------------------------------------------------------------- /warcraft/imle/noise.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import math 4 | 5 | import torch 6 | from torch import Tensor, Size 7 | from torch.distributions.gamma import Gamma 8 | 9 | from abc import ABC, abstractmethod 10 | 11 | from typing import Optional 12 | 13 | import logging 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | class BaseNoiseDistribution(ABC): 19 | def __init__(self): 20 | super().__init__() 21 | 22 | @abstractmethod 23 | def sample(self, 24 | shape: Size) -> Tensor: 25 | raise NotImplementedError 26 | 27 | 28 | class SumOfGammaNoiseDistribution(BaseNoiseDistribution): 29 | r""" 30 | Creates a generator of samples for the Sum-of-Gamma distribution [1], parameterized 31 | by :attr:`k`, :attr:`nb_iterations`, and :attr:`device`. 32 | 33 | [1] Mathias Niepert, Pasquale Minervini, Luca Franceschi - Implicit MLE: Backpropagating Through Discrete 34 | Exponential Family Distributions. NeurIPS 2021 (https://arxiv.org/abs/2106.01798) 35 | 36 | Example:: 37 | 38 | >>> import torch 39 | >>> noise_distribution = SumOfGammaNoiseDistribution(k=5, nb_iterations=100) 40 | >>> noise_distribution.sample(torch.Size([5])) 41 | tensor([ 0.2504, 0.0112, 0.5466, 0.0051, -0.1497]) 42 | 43 | Args: 44 | k (float): k parameter -- see [1] for more details. 45 | nb_iterations (int): number of iterations for estimating the sample. 46 | device (torch.devicde): device where to store samples. 47 | """ 48 | def __init__(self, 49 | k: float, 50 | nb_iterations: int = 10, 51 | device: Optional[torch.device] = None): 52 | super().__init__() 53 | self.k = k 54 | self.nb_iterations = nb_iterations 55 | self.device = device 56 | 57 | def sample(self, 58 | shape: Size) -> Tensor: 59 | samples = torch.zeros(size=shape, device=self.device) 60 | for i in range(1, self.nb_iterations + 1): 61 | concentration = torch.tensor(1. / self.k, device=self.device) 62 | rate = torch.tensor(i / self.k, device=self.device) 63 | 64 | gamma = Gamma(concentration=concentration, rate=rate) 65 | samples = samples + gamma.sample(sample_shape=shape).to(self.device) 66 | samples = (samples - math.log(self.nb_iterations)) / self.k 67 | return samples.to(self.device) 68 | -------------------------------------------------------------------------------- /warcraft/imle/target.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from torch import Tensor 4 | from abc import ABC, abstractmethod 5 | 6 | import logging 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class BaseTargetDistribution(ABC): 12 | def __init__(self): 13 | super().__init__() 14 | 15 | @abstractmethod 16 | def params(self, 17 | theta: Tensor, 18 | dy: Tensor) -> Tensor: 19 | raise NotImplementedError 20 | 21 | 22 | class TargetDistribution(BaseTargetDistribution): 23 | r""" 24 | Creates a generator of target distributions parameterized by :attr:`alpha` and :attr:`beta`. 25 | 26 | Example:: 27 | 28 | >>> import torch 29 | >>> target_distribution = TargetDistribution(alpha=1.0, beta=1.0) 30 | >>> target_distribution.params(theta=torch.tensor([1.0]), dy=torch.tensor([1.0])) 31 | tensor([2.]) 32 | 33 | Args: 34 | alpha (float): weight of the initial distribution parameters theta 35 | beta (float): weight of the downstream gradient dy 36 | """ 37 | def __init__(self, 38 | alpha: float = 1.0, 39 | beta: float = 1.0): 40 | super().__init__() 41 | self.alpha = alpha 42 | self.beta = beta 43 | 44 | def params(self, 45 | theta: Tensor, 46 | dy: Tensor) -> Tensor: 47 | theta_prime = self.alpha * theta - self.beta * dy 48 | return theta_prime 49 | -------------------------------------------------------------------------------- /warcraft/readme.md: -------------------------------------------------------------------------------- 1 | This directory corresponds to the Warcraft shortest path problem/ 2 | 3 | To download the data go inside the data folder and run 4 | ``` 5 | ./data_prep.sh 6 | ``` 7 | This wil download and preprocess the data. 8 | 9 | You may download the data directly from [https://doi.org/10.48804/KT2P3Z](https://doi.org/10.48804/KT2P3Z) and extract by running `tar -xvzf warcraft-data.tar.gz`. 10 | 11 | 12 | Then an experiment can be run using the `TestWarcraft.py` file. 13 | To reproduce the result of expriements run 14 | ``` 15 | python TestWarcraft.py 16 | ``` 17 | --------------------------------------------------------------------------------