├── .gitignore
├── Energy
    ├── DPO
    │   ├── fenchel_young.py
    │   └── perturbations.py
    ├── SchedulingInstances
    │   ├── load1
    │   │   └── day01.txt
    │   ├── load2
    │   │   └── day01.txt
    │   └── load3
    │   │   └── day01.txt
    ├── Trainer
    │   ├── CacheLosses.py
    │   ├── ICON_solving.py
    │   ├── PO_models.py
    │   ├── comb_solver.py
    │   ├── data_utils.py
    │   ├── diff_layer.py
    │   ├── get_energy.py
    │   ├── prices2013.dat
    │   └── utils.py
    ├── config.json
    ├── imle
    │   ├── __init__.py
    │   ├── noise.py
    │   ├── target.py
    │   └── wrapper.py
    ├── intopt
    │   ├── intopt.py
    │   ├── presolve.py
    │   ├── requirements.txt
    │   ├── solveLP.py
    │   └── util.py
    ├── readme.md
    └── testenergy.py
├── HyperparamConfiguration.pdf
├── Knapsack
    ├── DPO
    │   ├── fenchel_young.py
    │   └── perturbations.py
    ├── Trainer
    │   ├── CacheLosses.py
    │   ├── Data.npz
    │   ├── PO_models.py
    │   ├── comb_solver.py
    │   ├── data_utils.py
    │   ├── diff_layer.py
    │   └── utils.py
    ├── config.json
    ├── imle
    │   ├── __init__.py
    │   ├── noise.py
    │   ├── target.py
    │   └── wrapper.py
    ├── intopt
    │   ├── intopt.py
    │   ├── presolve.py
    │   ├── solveLP.py
    │   └── util.py
    ├── readme.md
    └── testknapsack.py
├── LICENSE
├── Matching
    ├── DPO
    │   ├── fenchel_young.py
    │   └── perturbations.py
    ├── Trainer
    │   ├── CacheLosses.py
    │   ├── NNModels.py
    │   ├── PO_models.py
    │   ├── bipartite.py
    │   ├── data_utils.py
    │   ├── diff_layer.py
    │   └── utils.py
    ├── config.json
    ├── get_data.sh
    ├── imle
    │   ├── __init__.py
    │   ├── noise.py
    │   ├── target.py
    │   └── wrapper.py
    ├── intopt
    │   ├── intopt.py
    │   ├── presolve.py
    │   ├── solveLP.py
    │   └── util.py
    ├── readme.md
    └── test_matching.py
├── Portfolio
    ├── DPO
    │   ├── fenchel_young.py
    │   └── perturbations.py
    ├── Trainer
    │   ├── CacheLosses.py
    │   ├── PO_modelsSP.py
    │   ├── data_utils.py
    │   ├── diff_layer.py
    │   ├── optimizer_module.py
    │   └── utils.py
    ├── config.json
    ├── imle
    │   ├── __init__.py
    │   ├── noise.py
    │   ├── target.py
    │   └── wrapper.py
    ├── readme.md
    └── test_sp.py
├── README.md
├── ShortestPath
    ├── DPO
    │   ├── fenchel_young.py
    │   └── perturbations.py
    ├── Trainer
    │   ├── CacheLosses.py
    │   ├── PO_modelsSP.py
    │   ├── data_utils.py
    │   ├── diff_layer.py
    │   ├── optimizer_module.py
    │   └── utils.py
    ├── config.json
    ├── imle
    │   ├── __init__.py
    │   ├── noise.py
    │   ├── target.py
    │   └── wrapper.py
    ├── intopt
    │   ├── intopt.py
    │   ├── presolve.py
    │   ├── solveLP.py
    │   └── util.py
    ├── readme.md
    └── test_sp.py
├── environment.yml
├── requirements.txt
└── warcraft
    ├── DPO
        ├── fenchel_young.py
        └── perturbations.py
    ├── TestWarcraft.py
    ├── Trainer
        ├── Trainer.py
        ├── computervisionmodels.py
        ├── data_utils.py
        ├── diff_layer.py
        ├── metric.py
        └── utils.py
    ├── comb_modules
        ├── dijkstra.py
        ├── gurobi_dijkstra.py
        ├── losses.py
        └── utils.py
    ├── config.json
    ├── data
        └── data_prep.sh
    ├── imle
        ├── __init__.py
        ├── noise.py
        ├── target.py
        └── wrapper.py
    ├── intopt
        ├── intopt.py
        ├── presolve.py
        ├── solveLP.py
        └── util.py
    └── readme.md


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/Energy/DPO/fenchel_young.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # 
 3 | # Modifications from original work
 4 | # 29-03-2021 (tuero@ualberta.ca) : Convert Tensorflow code to PyTorch
 5 | # 
 6 | # Copyright 2021 The Google Research Authors.
 7 | #
 8 | # Licensed under the Apache License, Version 2.0 (the "License");
 9 | # you may not use this file except in compliance with the License.
10 | # You may obtain a copy of the License at
11 | #
12 | #     http://www.apache.org/licenses/LICENSE-2.0
13 | #
14 | # Unless required by applicable law or agreed to in writing, software
15 | # distributed under the License is distributed on an "AS IS" BASIS,
16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | # See the License for the specific language governing permissions and
18 | # limitations under the License.
19 | 
20 | # Lint as: python3
21 | """Implementation of a Fenchel-Young loss using perturbation techniques."""
22 | 
23 | import torch
24 | import torch.nn as nn
25 | 
26 | from DPO import perturbations
27 | 
28 | 
29 | class PerturbedFunc(torch.autograd.Function):
30 |     """Implementation of a Fenchel Young loss."""
31 |     @staticmethod
32 |     def forward(ctx, input_tensor, y_true, perturbed, batched, maximize, *args):
33 |         diff = perturbed(input_tensor, *args) - y_true.type(input_tensor.dtype)
34 |         if not maximize:
35 |             diff = -diff
36 |         # Computes per-example loss for batched inputs.
37 |         if batched:
38 |             loss = torch.sum(torch.reshape(diff, [list(diff.shape)[0], -1]) ** 2, dim=-1)
39 |         else:  # Computes loss for unbatched inputs.
40 |             loss = torch.sum(diff ** 2)
41 |         ctx.save_for_backward(diff)
42 |         ctx.batched = batched
43 |         return loss
44 | 
45 |     @staticmethod
46 |     def backward(ctx, dy):
47 |         diff,  = ctx.saved_tensors
48 |         batched = ctx.batched
49 |         if batched:  # dy has shape (batch_size,) in this case.
50 |             dy = torch.reshape(dy, [list(dy.shape)[0]] + (diff.dim() - 1) * [1])
51 |         return dy * diff, None, None, None, None # original
52 |         # return  diff, None, None, None, None
53 | 
54 | 
55 | class FenchelYoungLoss(nn.Module):
56 |     def __init__(self,
57 |                  func = None,
58 |                  num_samples = 1000,
59 |                  sigma = 0.01,
60 |                  noise = perturbations._GUMBEL,
61 |                  batched = True,
62 |                  maximize = True,
63 |                  device=None):
64 |         """Initializes the Fenchel-Young loss.
65 | 
66 |         Args:
67 |             func: the function whose argmax is to be differentiated by perturbation.
68 |             num_samples: (int) the number of perturbed inputs.
69 |             sigma: (float) the amount of noise to be considered
70 |             noise: (str) the noise distribution to be used to sample perturbations.
71 |             batched: whether inputs to the func will have a leading batch dimension
72 |             (True) or consist of a single example (False). Defaults to True.
73 |             maximize: (bool) whether to maximize or to minimize the input function.
74 |             device: The device to create tensors on (cpu/gpu). If None given, it will
75 |             default to gpu:0 if available, cpu otherwise.
76 |         """
77 |         super().__init__()
78 |         self._batched = batched
79 |         self._maximize = maximize
80 |         self.func = func
81 |         self.perturbed = perturbations.perturbed(func=func,
82 |                                                 num_samples=num_samples,
83 |                                                 sigma=sigma,
84 |                                                 noise=noise,
85 |                                                 batched=batched,
86 |                                                 device=device)
87 | 
88 |     def forward(self, input_tensor, y_true, *args):
89 |         return PerturbedFunc.apply(input_tensor, y_true, self.perturbed, self._batched, self._maximize, *args)
90 | 
91 | 


--------------------------------------------------------------------------------
/Energy/SchedulingInstances/load1/day01.txt:
--------------------------------------------------------------------------------
 1 | 30
 2 | 1
 3 | 3
 4 | 0 190 0.1 0.0
 5 | 5481
 6 | 1 170 0.1 0.1
 7 | 5583
 8 | 2 130 0.0 0.1
 9 | 5547
10 | 10
11 | 0 10 1 25 471.40
12 | 2610
13 | 1 23 8 47 426.43
14 | 2361
15 | 2 25 20 47 389.04
16 | 2154
17 | 3 7 4 42 200.66
18 | 1111
19 | 4 19 19 47 104.39
20 | 578
21 | 5 19 19 48 744.67
22 | 4123
23 | 6 11 1 14 468.51
24 | 2594
25 | 7 30 5 48 292.05
26 | 1617
27 | 8 13 4 22 505.36
28 | 2798
29 | 9 10 2 37 227.03
30 | 1257
31 | 


--------------------------------------------------------------------------------
/Energy/SchedulingInstances/load2/day01.txt:
--------------------------------------------------------------------------------
 1 | 30
 2 | 1
 3 | 3
 4 | 0 120 0.1 0.1
 5 | 5433
 6 | 1 130 0.0 0.1
 7 | 5639
 8 | 2 130 0.0 0.1
 9 | 5595
10 | 15
11 | 0 8 4 48 64.10
12 | 356
13 | 1 6 7 48 207.59
14 | 1153
15 | 2 13 14 47 475.85
16 | 2643
17 | 3 10 16 45 191.57
18 | 1064
19 | 4 25 14 47 18.36
20 | 102
21 | 5 3 3 26 34.21
22 | 190
23 | 6 26 0 33 3.78
24 | 21
25 | 7 13 2 21 390.33
26 | 2168
27 | 8 9 12 48 8.64
28 | 48
29 | 9 7 7 16 185.26
30 | 1029
31 | 10 26 7 47 393.75
32 | 2187
33 | 11 3 0 31 240.36
34 | 1335
35 | 12 4 2 44 606.56
36 | 3369
37 | 13 26 14 47 737.09
38 | 4094
39 | 14 26 7 47 200.03
40 | 1111
41 | 


--------------------------------------------------------------------------------
/Energy/SchedulingInstances/load3/day01.txt:
--------------------------------------------------------------------------------
 1 | 30
 2 | 1
 3 | 5
 4 | 0 150 0.1 0.1
 5 | 5644
 6 | 1 190 0.1 0.0
 7 | 5433
 8 | 2 170 0.1 0.1
 9 | 5533
10 | 3 120 0.1 0.1
11 | 5429
12 | 4 130 0.0 0.1
13 | 5530
14 | 20
15 | 0 22 22 48 507.56
16 | 2798
17 | 1 2 0 17 495.95
18 | 2734
19 | 2 17 2 30 336.32
20 | 1854
21 | 3 25 10 47 523.70
22 | 2887
23 | 4 23 10 46 474.54
24 | 2616
25 | 5 19 27 47 85.08
26 | 469
27 | 6 1 0 5 303.12
28 | 1671
29 | 7 19 0 33 381.48
30 | 2103
31 | 8 9 38 48 377.86
32 | 2083
33 | 9 22 12 47 392.73
34 | 2165
35 | 10 1 1 47 109.93
36 | 606
37 | 11 20 0 34 208.79
38 | 1151
39 | 12 25 2 43 74.74
40 | 412
41 | 13 1 0 23 323.98
42 | 1786
43 | 14 11 6 21 230.92
44 | 1273
45 | 15 11 4 47 211.69
46 | 1167
47 | 16 45 0 47 401.80
48 | 2215
49 | 17 8 15 48 78.36
50 | 432
51 | 18 5 30 47 388.38
52 | 2141
53 | 19 18 1 48 224.03
54 | 1235
55 | 


--------------------------------------------------------------------------------
/Energy/Trainer/CacheLosses.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | 
  7 | 
  8 | ###################################### NCE Loss  Functions  #########################################
  9 | class NCE(torch.nn.Module):
 10 |     def __init__(self, minimize= True):
 11 |         super().__init__()
 12 |         self.mm  = 1 if minimize else -1
 13 |     def forward(self, y_hat,y_true, sol_true,cache):
 14 | 
 15 |         loss = 0
 16 |         mm = self.mm
 17 | 
 18 | 
 19 |         for ii in range(len( y_hat )):
 20 |             loss += ( ( mm* ( sol_true[ii] - cache )*y_hat[ii]  ).sum(dim=(1)) ).mean() 
 21 |         loss /= len(y_hat)
 22 |         return loss
 23 | 
 24 | class NCE_c(torch.nn.Module):
 25 |     def __init__(self, minimize= True):
 26 |         super().__init__()
 27 |         self.mm  = 1 if minimize else -1
 28 |     def forward(self, y_hat,y_true, sol_true,cache):
 29 | 
 30 |         loss = 0
 31 |         mm = self.mm
 32 |         for ii in range(len( y_hat )):
 33 |             loss += ( ( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii])  ).sum(dim=(1)) ).mean() 
 34 |         loss /= len(y_hat)
 35 |         return loss
 36 | 
 37 | 
 38 | class MAP(torch.nn.Module):
 39 |     def __init__(self, minimize= True):
 40 |         super().__init__()
 41 |         self.mm  = 1 if minimize else -1
 42 |     def forward(self, y_hat,y_true,sol_true,cache):
 43 | 
 44 |         loss = 0
 45 |         mm = self.mm
 46 | 
 47 |         for ii in range(len( y_hat )):
 48 |             loss += (( mm* ( sol_true[ii] - cache )*y_hat[ii]  ).sum(dim=(1)) ).max() 
 49 |         loss /= len(y_hat)
 50 |         return loss
 51 | 
 52 | 
 53 | class MAP_c(torch.nn.Module):
 54 |     def __init__(self, minimize= True):
 55 |         super().__init__()
 56 |         self.mm  = 1 if minimize else -1
 57 |     def forward(self, y_hat,y_true,sol_true,cache):
 58 |         '''
 59 |         pred_weights: predicted cost vector [batch_size, img,img]
 60 |         true_weights: actua cost vector [batch_size, img,img]
 61 |         target: true shortest path [batch_size, img,img]
 62 |         cache: cache is torch array [cache_size, img,img]
 63 |         '''
 64 |         loss = 0
 65 |         mm = self.mm
 66 | 
 67 |         for ii in range(len( y_hat )):
 68 |             loss += (( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii])   ).sum(dim=(1)) ).max() 
 69 |         loss /= len(y_hat)
 70 |         return loss
 71 | 
 72 | 
 73 | class MAP_c_actual(torch.nn.Module):
 74 |     def __init__(self, minimize= True):
 75 |         super().__init__()
 76 |         self.mm  = 1 if minimize else -1
 77 |     def forward(self, y_hat,y_true,sol_true,cache):
 78 |         '''
 79 |         pred_weights: predicted cost vector [batch_size, img,img]
 80 |         true_weights: actua cost vector [batch_size, img,img]
 81 |         target: true shortest path [batch_size, img,img]
 82 |         cache: cache is torch array [cache_size, img,img]
 83 |         '''
 84 |         loss = 0
 85 |         mm = self.mm
 86 | 
 87 |         for ii in range(len( y_hat )):
 88 | 
 89 |             loss += (( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii])   ).sum(dim=(1)) ).max() 
 90 |         loss /= len(y_hat)
 91 |         return loss
 92 | 
 93 | 
 94 | ###################################### Ranking Loss  Functions  #########################################
 95 | class PointwiseLoss(torch.nn.Module):
 96 |     def __init__(self):
 97 |         super().__init__()
 98 |     def forward(self, y_hat,y_true,sol_true,cache):
 99 |         '''
100 |         pred_weights: predicted cost vector [batch_size, img,img]
101 |         true_weights: actua cost vector [batch_size, img,img]
102 |         target: true shortest path [batch_size, img,img]
103 |         cache: cache is torch array [cache_size, img,img]
104 |         '''
105 |         loss = 0
106 | 
107 |         for ii in range(len( y_hat )):
108 |             loss += ((cache*y_hat[ii])-(cache*y_true[ii])).square().mean() 
109 |         loss /= len(y_hat)
110 | 
111 |         return loss
112 | class ListwiseLoss(torch.nn.Module):
113 |     def __init__(self, temperature=0., minimize= True):
114 |         super().__init__()
115 |         self.temperature = temperature
116 |         self.mm  = 1 if minimize else -1
117 |     def forward(self, y_hat,y_true,sol_true,cache):
118 | 
119 |         loss = 0
120 |         mm, temperature  = self.mm, self.temperature
121 | 
122 |         for ii in range(len( y_hat )):
123 |             loss += - ( F.log_softmax((-mm*y_hat[ii]*cache/temperature).sum(dim=(1)),dim=0) * F.softmax((-mm*y_true[ii]*cache/temperature).sum(dim=(1)),dim=0)).mean()
124 |         loss /= len(y_hat)
125 | 
126 |         return loss
127 | 
128 | 
129 | class PairwisediffLoss(torch.nn.Module):
130 |     def __init__(self, minimize=True):
131 |         super().__init__()
132 |         self.mm  = 1 if minimize else -1
133 | 
134 |     def forward(self, y_hat,y_true,sol_true,cache):
135 |         '''
136 |         pred_weights: predicted cost vector [batch_size, img,img]
137 |         true_weights: actua cost vector [batch_size, img,img]
138 |         target: true shortest path [batch_size, img,img]
139 |         cache: cache is torch array [cache_size, img,img]
140 |         '''
141 |         
142 |         loss = 0
143 |         for ii in range(len( y_hat )):
144 |             _,indices= np.unique((self.mm*y_true[ii]*cache).sum(dim= (1)).detach().numpy(),return_index=True)
145 | 
146 |             big_ind = [indices[0] for p in range(len(indices)-1)] #good one
147 |             small_ind = [indices[p+1] for p in range(len(indices)-1)] #bad one
148 |         
149 |             
150 |             loss += ( ( cache[big_ind]*y_hat[ii] -cache[small_ind]*y_hat[ii]).sum(dim=(1)) - ( cache[big_ind]*y_true[ii] -cache[small_ind]*y_true[ii]).sum(dim=(1)) ).square().mean()
151 |         loss /= len(y_hat)
152 |         return loss
153 | 
154 | class PairwiseLoss(torch.nn.Module):
155 |     def __init__(self, margin=0., minimize=True):
156 |         super().__init__()
157 |         self.margin = margin
158 |         self.mm  = 1 if minimize else -1
159 |     def forward(self, y_hat,y_true,sol_true,cache):
160 |         '''
161 |         pred_weights: predicted cost vector [batch_size, img,img]
162 |         true_weights: actua cost vector [batch_size, img,img]
163 |         target: true shortest path [batch_size, img,img]
164 |         cache: cache is torch array [cache_size, img,img]
165 |         '''
166 |         relu = torch.nn.ReLU()
167 |         loss = 0
168 |         mm, margin  = self.mm, self.margin
169 |         for ii in range(len( y_hat )):
170 |             _,indices= np.unique((self.mm*y_true[ii]*cache).sum(dim= (1)).detach().numpy(),return_index=True)
171 | 
172 |             big_ind = [indices[0] for p in range(len(indices)-1)] #good one
173 |             small_ind = [indices[p+1] for p in range(len(indices)-1)] #bad one
174 |             
175 |             loss += relu(  margin + mm*( cache[big_ind]*y_hat[ii] -cache[small_ind]*y_hat[ii] ).sum(dim=(1)) ).mean()
176 |         loss /= len(y_hat)
177 |         return loss


--------------------------------------------------------------------------------
/Energy/Trainer/ICON_solving.py:
--------------------------------------------------------------------------------
1 | import math
2 | import numpy as np
3 | from gurobipy import *
4 | if __name__== "__main__":
5 |   main()
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/Energy/Trainer/data_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | from torch.utils.data import Dataset, DataLoader
 4 | import pytorch_lightning as pl
 5 | import torch
 6 | from sklearn.preprocessing import StandardScaler
 7 | import sklearn
 8 | from Trainer.utils import batch_solve
 9 | from Trainer.get_energy import get_energy
10 | from Trainer.comb_solver import SolveICON
11 | 
12 | class EnergyDatasetWrapper():
13 |     def __init__(self, X,y, sol=None, solver=False):
14 |         self.X = X.astype(np.float32)
15 |         self.y = y.astype(np.float32)
16 |         if sol is None:
17 |             sol = batch_solve(solver, y)
18 | 
19 |         self.sol = np.array(sol).astype(np.float32)
20 |         
21 |     def __len__(self):
22 |         return len(self.y)
23 |     
24 |     def __getitem__(self, idx):
25 |         return self.X[idx],self.y[idx],self.sol[idx]
26 | 
27 | 
28 | class EnergyDataModule(pl.LightningDataModule):
29 |     def __init__(self,param, standardize=True, batch_size= 16, generator=None,num_workers=4, seed=0, relax=False):
30 |         super().__init__()
31 | 
32 |         x_train, y_train, x_test, y_test = get_energy(fname= 'Trainer/prices2013.dat')
33 | 
34 | 
35 |         x_train = x_train[:,1:]
36 |         x_test = x_test[:,1:]
37 |         if standardize:
38 |             scaler = StandardScaler()
39 |             x_train = scaler.fit_transform(x_train)
40 |             x_test = scaler.transform(x_test)
41 |         x_train = x_train.reshape(-1,48,x_train.shape[1])
42 |         y_train = y_train.reshape(-1,48)
43 |         x_test = x_test.reshape(-1,48,x_test.shape[1])
44 |         y_test = y_test.reshape(-1,48)
45 |         x = np.concatenate((x_train, x_test), axis=0)
46 |         y = np.concatenate((y_train,y_test), axis=0)
47 |         x,y = sklearn.utils.shuffle(x,y,random_state=seed)
48 |         x_train, y_train = x[:550], y[:550]
49 |         x_valid, y_valid = x[550:650], y[550:650]
50 |         x_test, y_test = x[650:], y[650:]
51 | 
52 |         solver = SolveICON(relax=relax, **param)
53 |         solver.make_model()
54 | 
55 |         self.train_df = EnergyDatasetWrapper( x_train,y_train,solver=solver)
56 |         self.valid_df  = EnergyDatasetWrapper( x_valid, y_valid,solver=solver )
57 |         self.test_df = EnergyDatasetWrapper( x_test, y_test,solver=solver )
58 |         self.train_solutions= self.train_df.sol
59 | 
60 |         self.batch_size = batch_size
61 |         self.generator = generator
62 |         self.num_workers = num_workers
63 | 
64 | 
65 |     def train_dataloader(self):
66 |         return DataLoader(self.train_df, batch_size=self.batch_size,generator= self.generator, num_workers=self.num_workers)
67 | 
68 |     def val_dataloader(self):
69 |         return DataLoader(self.valid_df, batch_size=self.batch_size,generator= self.generator, num_workers=self.num_workers)
70 | 
71 |     def test_dataloader(self):
72 |         return DataLoader(self.test_df, batch_size=self.batch_size,generator= self.generator, num_workers=self.num_workers)


--------------------------------------------------------------------------------
/Energy/Trainer/diff_layer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | from Trainer.utils import batch_solve
 5 | 
 6 | def SPOlayer(solver,minimize = True):
 7 |     mm = 1 if minimize else -1
 8 |     class SPOlayer_cls(torch.autograd.Function):
 9 |         @staticmethod
10 |         def forward(ctx, y_hat,y_true,sol_true ):
11 |             sol_hat = batch_solve(solver, y_hat)
12 | 
13 |             ctx.save_for_backward(y_hat,y_true,sol_true)
14 | 
15 |             return ( mm*(sol_hat -sol_true)*y_true).sum()
16 | 
17 |         @staticmethod
18 |         def backward(ctx, grad_output):
19 |             y_hat,y_true,sol_true = ctx.saved_tensors
20 |             y_spo = 2*y_hat - y_true
21 |             sol_spo =  batch_solve(solver,y_spo) 
22 |             return (sol_true - sol_spo)*mm, None, None
23 |     return SPOlayer_cls.apply
24 | 
25 | 
26 | def DBBlayer(solver,lambda_val=1., minimize = True):
27 |     mm = 1 if minimize else -1
28 |     class DBBlayer_cls(torch.autograd.Function):
29 |         @staticmethod
30 |         def forward(ctx, y_hat,y_true,sol_true ):
31 |             sol_hat =  batch_solve(solver, y_hat) 
32 | 
33 |             ctx.save_for_backward(y_hat,y_true,sol_true, sol_hat)
34 | 
35 |             return sol_hat
36 | 
37 |         @staticmethod
38 |         def backward(ctx, grad_output):
39 |             """
40 |             In the backward pass we compute gradient to minimize regret
41 |             """
42 |             y_hat,y_true,sol_true, sol_hat= ctx.saved_tensors
43 |             y_perturbed = y_hat + mm* lambda_val* grad_output
44 |             sol_perturbed =  batch_solve(solver, y_perturbed) 
45 |             
46 |             return -mm*(sol_hat - sol_perturbed)/lambda_val, None, None
47 |     return DBBlayer_cls.apply
48 | 
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/Energy/Trainer/get_energy.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Reading in the data, sklearn style...
  3 | 
  4 | The data contains the following column:
  5 | 
  6 | #DateTime Holiday HolidayFlag DayOfWeek WeekOfYear Day Month Year PeriodOfDay Fo recastWindProduction SystemLoadEA SMPEA ORKTemperature ORKWindspeed CO2Intensity ActualWindProduction SystemLoadEP2 SMPEP2
  7 | 
  8 | #DateTime and Holiday: is a string and subsumed by following features HolidayFlag: is Boolean and identicaly for each day DayOfWeek WeekOfYear Day Month Year: is discrete and identicaly for each day PeriodOfDay: is discrete 0..47 ORKTemperature ORKWindspeed: contains NAN and are questionable (actual values) ActualWindProduction SystemLoadEP2 SMPEP2: Actual values, with SMPEP2 the label
  9 | """
 10 | 
 11 | import numpy as np
 12 | import pandas as pd
 13 | from sklearn.model_selection import train_test_split
 14 | 
 15 | # prep numpy arrays, Xs will contain groupID as first column
 16 | def get_energy(fname=None, trainTestRatio=0.70):
 17 |     df = get_energy_pandas(fname)
 18 | 
 19 |     length = df['groupID'].nunique()
 20 |     grouplength = 48
 21 | 
 22 |     # numpy arrays, X contains groupID as first column
 23 |     X1g = df.loc[:, df.columns != 'SMPEP2'].values
 24 |     y = df.loc[:, 'SMPEP2'].values
 25 | 
 26 |     # no negative values allowed...for now I just clamp these values to zero. They occur three times in the training data.
 27 |     # for i in range(len(y)):
 28 |     #     y[i] = max(y[i], 0)
 29 | 
 30 | 
 31 |     # ordered split per complete group
 32 |     train_len = int(trainTestRatio*length)
 33 | 
 34 |     # the splitting
 35 |     X_1gtrain = X1g[:grouplength*train_len]
 36 |     y_train = y[:grouplength*train_len]
 37 |     X_1gtest  = X1g[grouplength*train_len:]
 38 |     y_test  = y[grouplength*train_len:]
 39 |     
 40 |     
 41 | 
 42 |     #print(len(X1g_train),len(X1g_test),len(X),len(X1g_train)+len(X1g_test))
 43 |     return (X_1gtrain, y_train, X_1gtest, y_test)
 44 | 
 45 | 
 46 | def get_energy_grouped(fname=None):
 47 |     df = get_energy_pandas(fname)
 48 | 
 49 |     # put the 'y's into columns (I hope this respects the ordering!)
 50 |     t = df.groupby('groupID')['SMPEP2'].apply(np.array)
 51 |     grpY = np.vstack(t.values) # stack into a 2D array
 52 |     # now something similar but for the features... lets naively just take averages
 53 |     grpX = df.loc[:, df.columns != 'SMPEP2'].groupby('groupID').mean().values
 54 | 
 55 |     # train/test splitting, sklearn is so convenient
 56 |     (grpX_train, grpX_test, grpY_train, grpY_test) = \
 57 |         train_test_split(grpX, grpY, test_size=0.3, shuffle=False)
 58 | 
 59 |     return (grpX_train, grpY_train, grpX_test, grpY_test)
 60 | 
 61 | 
 62 | def get_energy_pandas(fname=None):
 63 |     if fname == None:
 64 |         fname = "prices2013.dat"
 65 | 
 66 |     df = pd.read_csv(fname, delim_whitespace=True, quotechar='"')
 67 |     # remove unnecessary columns
 68 |     df.drop(['#DateTime', 'Holiday', 'ActualWindProduction', 'SystemLoadEP2'], axis=1, inplace=True)
 69 |     # remove columns with missing values
 70 |     df.drop(['ORKTemperature', 'ORKWindspeed'], axis=1, inplace=True)
 71 | 
 72 |     # missing value treatment
 73 |     # df[pd.isnull(df).any(axis=1)]
 74 |     # impute missing CO2 intensities linearly
 75 |     df.loc[df.loc[:,'CO2Intensity'] == 0, 'CO2Intensity'] = np.nan # an odity
 76 |     df.loc[:,'CO2Intensity'].interpolate(inplace=True)
 77 |     # remove remaining 3 days with missing values
 78 |     grouplength = 48
 79 |     for i in range(0, len(df), grouplength):
 80 |         day_has_nan = pd.isnull(df.loc[i:i+(grouplength-1)]).any(axis=1).any()
 81 |         if day_has_nan:
 82 |             #print("Dropping",i)
 83 |             df.drop(range(i,i+grouplength), inplace=True)
 84 |     # data is sorted by year, month, day, periodofday; don't want learning over this
 85 |     df.drop(['Day', 'Year', 'PeriodOfDay'], axis=1, inplace=True)
 86 | 
 87 |     # insert group identifier at beginning
 88 |     grouplength = 48
 89 |     length = int(len(df)/48) # 792
 90 |     gids = [gid for gid in range(length) for i in range(grouplength)]
 91 |     df.insert(0, 'groupID', gids)
 92 | 
 93 |     return df
 94 | 
 95 | 
 96 | 
 97 | if __name__ == '__main__':
 98 |     df = get_energy_pandas()
 99 |     print(df.head())
100 | 
101 |     (X_1gtrain, y_train, X_1gtest, y_test) = get_energy()
102 |     print([len(x) for x in (X_1gtrain, y_train, X_1gtest, y_test)])
103 | 
104 |     ### Options to try for learning:
105 |     # split DayOfWeek into Weekday/Weekend, perhaps even split up days
106 |     # split up Month into seasons
107 |     # do use ORK*s but with missing value imputation
108 |     # remove WeekOfYear?
109 | 
110 | 


--------------------------------------------------------------------------------
/Energy/Trainer/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | 
 5 | 
 6 | def batch_solve(solver,y):
 7 |     '''
 8 |     wrapper around te solver to return solution of a vector of cost coefficients
 9 |     '''
10 |     sol = []
11 |     for i in range(len(y)):
12 |         sol.append( solver.solve(y[i]))
13 |     return torch.from_numpy( np.array(sol) ).float()
14 | 
15 | 
16 | def regret_aslist(solver, y_hat,y_true, sol_true, minimize=True): 
17 |     '''
18 |     computes regret of more than one cost vectors
19 |     ''' 
20 |     mm = 1 if minimize else -1    
21 |     sol_hat = batch_solve(solver,y_hat.detach().numpy())
22 |     return  ((mm*(sol_hat - sol_true)*y_true).sum(1)/(sol_true*y_true).sum(1))
23 | 
24 | def regret_fn(solver, y_hat,y_true, sol_true, minimize=True):
25 |     '''
26 |     computes average regret given a predicted cost vector and the true solution vector and the true cost vector
27 |     y_hat,y, sol_true are torch tensors
28 |     '''
29 |     return regret_aslist(solver,y_hat,y_true,sol_true,minimize).mean()
30 | 
31 | 
32 | def abs_regret_aslist(solver, y_hat,y_true, sol_true, minimize=True): 
33 |     '''
34 |     computes regret of more than one cost vectors
35 |     ''' 
36 |     mm = 1 if minimize else -1    
37 |     sol_hat = batch_solve(solver,y_hat.detach().numpy())
38 |     return  ((mm*(sol_hat - sol_true)*y_true).sum(1))
39 | 
40 | 
41 | def abs_regret_fn(solver, y_hat,y_true, sol_true, minimize=True):
42 |     '''
43 |     computes average regret given a predicted cost vector and the true solution vector and the true cost vector
44 |     y_hat,y, sol_true are torch tensors
45 |     '''
46 |     return abs_regret_aslist(solver,y_hat,y_true,sol_true,minimize).mean()
47 | 
48 | 
49 | def growpool_fn(solver,cache, y_hat):
50 |     '''
51 |     cache is torch array [currentpoolsize,48]
52 |     y_hat is  torch array [batch_size,48]
53 |     '''
54 |     sol = batch_solve(solver,y_hat)
55 |     cache_np = cache.detach().numpy()
56 |     cache_np = np.unique(np.append(cache_np,sol,axis=0),axis=0)
57 |     # torch has no unique function, so we have to do this
58 |     return torch.from_numpy(cache_np).float()


--------------------------------------------------------------------------------
/Energy/config.json:
--------------------------------------------------------------------------------
  1 | [
  2 |     {
  3 |         "model": "baseline_mse",
  4 |         "instance": 1,
  5 |         "lr": 0.5
  6 |     },
  7 |     {
  8 |         "model": "baseline_mse",
  9 |         "instance": 2,
 10 |         "lr": 0.5
 11 |     },
 12 |     {
 13 |         "model": "baseline_mse",
 14 |         "instance": 3,
 15 |         "lr": 0.5
 16 |     },
 17 |     {
 18 |         "model": "CachingPO",
 19 |         "loss": "MAP_c",
 20 |         "instance": 1,
 21 |         "lr": 0.5
 22 |     },
 23 |     {
 24 |         "model": "CachingPO",
 25 |         "loss": "MAP_c",
 26 |         "instance": 2,
 27 |         "lr": 0.5
 28 |     },
 29 |     {
 30 |         "model": "CachingPO",
 31 |         "loss": "MAP_c",
 32 |         "instance": 3,
 33 |         "lr": 0.5
 34 |     },
 35 |     {
 36 |         "model": "CachingPO",
 37 |         "loss": "pairwise_diff",
 38 |         "instance": 1,
 39 |         "lr": 0.5
 40 |     },
 41 |     {
 42 |         "model": "CachingPO",
 43 |         "loss": "pairwise_diff",
 44 |         "instance": 2,
 45 |         "lr": 0.5
 46 |     },
 47 |     {
 48 |         "model": "CachingPO",
 49 |         "loss": "pairwise_diff",
 50 |         "instance": 3,
 51 |         "lr": 0.1
 52 |     },
 53 |     {
 54 |         "model": "CachingPO",
 55 |         "loss": "pairwise",
 56 |         "instance": 1,
 57 |         "lr": 0.1,
 58 |         "tau": 1
 59 |     },
 60 |     {
 61 |         "model": "CachingPO",
 62 |         "loss": "pairwise",
 63 |         "instance": 2,
 64 |         "lr": 0.1,
 65 |         "tau": 5
 66 |     },
 67 |     {
 68 |         "model": "CachingPO",
 69 |         "loss": "pairwise",
 70 |         "instance": 3,
 71 |         "lr": 0.1,
 72 |         "tau": 50
 73 |     },
 74 |     {
 75 |         "model": "CachingPO",
 76 |         "loss": "listwise",
 77 |         "instance": 1,
 78 |         "lr": 0.1,
 79 |         "tau": 5.0
 80 |     },
 81 |     {
 82 |         "model": "CachingPO",
 83 |         "loss": "listwise",
 84 |         "instance": 2,
 85 |         "lr": 0.1,
 86 |         "tau": 5.0
 87 |     },
 88 |     {
 89 |         "model": "CachingPO",
 90 |         "loss": "listwise",
 91 |         "instance": 3,
 92 |         "lr": 0.1,
 93 |         "tau": 5.0
 94 |     },
 95 |     {
 96 |         "model": "SPO",
 97 |         "instance": 1,
 98 |         "lr": 1.0
 99 |     },
100 |     {
101 |         "model": "SPO",
102 |         "instance": 2,
103 |         "lr": 0.5
104 |     },
105 |     {
106 |         "model": "SPO",
107 |         "instance": 3,
108 |         "lr": 0.5
109 |     },
110 |     {
111 |         "model": "DBB",
112 |         "instance": 1,
113 |         "lr": 0.01,
114 |         "lambda_val": 0.1
115 |     },
116 |     {
117 |         "model": "DBB",
118 |         "instance": 2,
119 |         "lr": 0.5,
120 |         "lambda_val": 1.0
121 |     },
122 |     {
123 |         "model": "DBB",
124 |         "instance": 3,
125 |         "lr": 0.5,
126 |         "lambda_val": 1.0
127 |     },
128 |     {
129 |         "model": "FenchelYoung",
130 |         "instance": 1,
131 |         "lr": 0.01,
132 |         "sigma": 0.1
133 |     },
134 |     {
135 |         "model": "FenchelYoung",
136 |         "instance": 2,
137 |         "lr": 0.5,
138 |         "sigma": 5.0
139 |     },
140 |     {
141 |         "model": "FenchelYoung",
142 |         "instance": 3,
143 |         "lr": 0.01,
144 |         "sigma": 0.1
145 |     },
146 |     {
147 |         "model": "IMLE",
148 |         "instance": 1,
149 |         "lr": 0.5,
150 |         "beta": 1.0,
151 |         "temperature": 2.0,
152 |         "k": 5
153 |     },
154 |     {
155 |         "model": "IMLE",
156 |         "instance": 2,
157 |         "lr": 0.5,
158 |         "beta": 1.0,
159 |         "temperature": 1.0,
160 |         "k": 5
161 |     },
162 |     {
163 |         "model": "IMLE",
164 |         "instance": 3,
165 |         "lr": 0.5,
166 |         "beta": 1.0,
167 |         "temperature": 1.0,
168 |         "k": 5
169 |     },
170 |     {
171 |         "model": "DCOL",
172 |         "instance": 1,
173 |         "lr": 0.1,
174 |         "mu": 1.0
175 |     },
176 |     {
177 |         "model": "DCOL",
178 |         "instance": 2,
179 |         "lr": 0.1,
180 |         "mu": 1.0
181 |     },
182 |     {
183 |         "model": "DCOL",
184 |         "instance": 3,
185 |         "lr": 0.1,
186 |         "mu": 1.0
187 |     },
188 |     {
189 |         "model": "IntOpt",
190 |         "instance": 1,
191 |         "lr": 0.1,
192 |         "thr": 0.1,
193 |         "damping": 1e-06
194 |     },
195 |     {
196 |         "model": "IntOpt",
197 |         "instance": 2,
198 |         "lr": 0.1,
199 |         "thr": 0.001,
200 |         "damping": 1e-06
201 |     },
202 |     {
203 |         "model": "IntOpt",
204 |         "instance": 3,
205 |         "lr": 0.1,
206 |         "thr": 0.1,
207 |         "damping": 0.1
208 |     }
209 | ]


--------------------------------------------------------------------------------
/Energy/imle/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PredOpt/predopt-benchmarks/1a8e048c5aa640f73a05e29878b8e1e8f6f73610/Energy/imle/__init__.py


--------------------------------------------------------------------------------
/Energy/imle/noise.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import math
 4 | 
 5 | import torch
 6 | from torch import Tensor, Size
 7 | from torch.distributions.gamma import Gamma
 8 | 
 9 | from abc import ABC, abstractmethod
10 | 
11 | from typing import Optional
12 | 
13 | import logging
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | class BaseNoiseDistribution(ABC):
19 |     def __init__(self):
20 |         super().__init__()
21 | 
22 |     @abstractmethod
23 |     def sample(self,
24 |                shape: Size) -> Tensor:
25 |         raise NotImplementedError
26 | 
27 | 
28 | class SumOfGammaNoiseDistribution(BaseNoiseDistribution):
29 |     r"""
30 |     Creates a generator of samples for the Sum-of-Gamma distribution [1], parameterized
31 |     by :attr:`k`, :attr:`nb_iterations`, and :attr:`device`.
32 | 
33 |     [1] Mathias Niepert, Pasquale Minervini, Luca Franceschi - Implicit MLE: Backpropagating Through Discrete
34 |     Exponential Family Distributions. NeurIPS 2021 (https://arxiv.org/abs/2106.01798)
35 | 
36 |     Example::
37 | 
38 |         >>> import torch
39 |         >>> noise_distribution = SumOfGammaNoiseDistribution(k=5, nb_iterations=100)
40 |         >>> noise_distribution.sample(torch.Size([5]))
41 |         tensor([ 0.2504,  0.0112,  0.5466,  0.0051, -0.1497])
42 | 
43 |     Args:
44 |         k (float): k parameter -- see [1] for more details.
45 |         nb_iterations (int): number of iterations for estimating the sample.
46 |         device (torch.devicde): device where to store samples.
47 |     """
48 |     def __init__(self,
49 |                  k: float,
50 |                  nb_iterations: int = 10,
51 |                  device: Optional[torch.device] = None):
52 |         super().__init__()
53 |         self.k = k
54 |         self.nb_iterations = nb_iterations
55 |         self.device = device
56 | 
57 |     def sample(self,
58 |                shape: Size) -> Tensor:
59 |         samples = torch.zeros(size=shape, device=self.device)
60 |         for i in range(1, self.nb_iterations + 1):
61 |             concentration = torch.tensor(1. / self.k, device=self.device)
62 |             rate = torch.tensor(i / self.k, device=self.device)
63 | 
64 |             gamma = Gamma(concentration=concentration, rate=rate)
65 |             samples = samples + gamma.sample(sample_shape=shape).to(self.device)
66 |         samples = (samples - math.log(self.nb_iterations)) / self.k
67 |         return samples.to(self.device)
68 | 


--------------------------------------------------------------------------------
/Energy/imle/target.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from torch import Tensor
 4 | from abc import ABC, abstractmethod
 5 | 
 6 | import logging
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class BaseTargetDistribution(ABC):
12 |     def __init__(self):
13 |         super().__init__()
14 | 
15 |     @abstractmethod
16 |     def params(self,
17 |                theta: Tensor,
18 |                dy: Tensor) -> Tensor:
19 |         raise NotImplementedError
20 | 
21 | 
22 | class TargetDistribution(BaseTargetDistribution):
23 |     r"""
24 |     Creates a generator of target distributions parameterized by :attr:`alpha` and :attr:`beta`.
25 | 
26 |     Example::
27 | 
28 |         >>> import torch
29 |         >>> target_distribution = TargetDistribution(alpha=1.0, beta=1.0)
30 |         >>> target_distribution.params(theta=torch.tensor([1.0]), dy=torch.tensor([1.0]))
31 |         tensor([2.])
32 | 
33 |     Args:
34 |         alpha (float): weight of the initial distribution parameters theta
35 |         beta (float): weight of the downstream gradient dy
36 |     """
37 |     def __init__(self,
38 |                  alpha: float = 1.0,
39 |                  beta: float = 1.0):
40 |         super().__init__()
41 |         self.alpha = alpha
42 |         self.beta = beta
43 | 
44 |     def params(self,
45 |                theta: Tensor,
46 |                dy: Tensor) -> Tensor:
47 |         theta_prime = self.alpha * theta - self.beta * dy
48 |         return theta_prime
49 | 


--------------------------------------------------------------------------------
/Energy/imle/wrapper.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import functools
  4 | 
  5 | import torch
  6 | from torch import Tensor
  7 | 
  8 | from imle.noise import BaseNoiseDistribution
  9 | from imle.target import BaseTargetDistribution, TargetDistribution
 10 | 
 11 | from typing import Callable, Optional
 12 | 
 13 | import logging
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | 
 18 | def imle(function: Callable[[Tensor], Tensor] = None,
 19 |          target_distribution: Optional[BaseTargetDistribution] = None,
 20 |          noise_distribution: Optional[BaseNoiseDistribution] = None,
 21 |          nb_samples: int = 1,
 22 |          input_noise_temperature: float = 1.0,
 23 |          target_noise_temperature: float = 1.0):
 24 |     r"""Turns a black-box combinatorial solver in an Exponential Family distribution via Perturb-and-MAP and I-MLE [1].
 25 | 
 26 |     The input function (solver) needs to return the solution to the problem of finding a MAP state for a constrained
 27 |     exponential family distribution -- this is the case for most black-box combinatorial solvers [2]. If this condition
 28 |     is violated though, the result would not hold and there is no guarantee on the validity of the obtained gradients.
 29 | 
 30 |     This function can be used directly or as a decorator.
 31 | 
 32 |     [1] Mathias Niepert, Pasquale Minervini, Luca Franceschi - Implicit MLE: Backpropagating Through Discrete
 33 |     Exponential Family Distributions. NeurIPS 2021 (https://arxiv.org/abs/2106.01798)
 34 |     [2] Marin Vlastelica, Anselm Paulus, Vít Musil, Georg Martius, Michal Rolínek - Differentiation of Blackbox
 35 |     Combinatorial Solvers. ICLR 2020 (https://arxiv.org/abs/1912.02175)
 36 | 
 37 |     Example::
 38 | 
 39 |         >>> from imle.wrapper import imle
 40 |         >>> from imle.target import TargetDistribution
 41 |         >>> from imle.noise import SumOfGammaNoiseDistribution
 42 |         >>> target_distribution = TargetDistribution(alpha=0.0, beta=10.0)
 43 |         >>> noise_distribution = SumOfGammaNoiseDistribution(k=21, nb_iterations=100)
 44 |         >>> @imle(target_distribution=target_distribution, noise_distribution=noise_distribution, nb_samples=100,
 45 |         >>>       input_noise_temperature=input_noise_temperature, target_noise_temperature=5.0)
 46 |         >>> def imle_solver(weights_batch: Tensor) -> Tensor:
 47 |         >>>     return torch_solver(weights_batch)
 48 | 
 49 |     Args:
 50 |         function (Callable[[Tensor], Tensor]): black-box combinatorial solver
 51 |         target_distribution (Optional[BaseTargetDistribution]): factory for target distributions
 52 |         noise_distribution (Optional[BaseNoiseDistribution]): noise distribution
 53 |         nb_samples (int): number of noise sammples
 54 |         input_noise_temperature (float): noise temperature for the input distribution
 55 |         target_noise_temperature (float): noise temperature for the target distribution
 56 |     """
 57 |     if target_distribution is None:
 58 |         target_distribution = TargetDistribution(alpha=1.0, beta=1.0)
 59 | 
 60 |     if function is None:
 61 |         return functools.partial(imle,
 62 |                                  target_distribution=target_distribution,
 63 |                                  noise_distribution=noise_distribution,
 64 |                                  nb_samples=nb_samples,
 65 |                                  input_noise_temperature=input_noise_temperature,
 66 |                                  target_noise_temperature=target_noise_temperature)
 67 | 
 68 |     @functools.wraps(function)
 69 |     def wrapper(input: Tensor, *args):
 70 |         class WrappedFunc(torch.autograd.Function):
 71 | 
 72 |             @staticmethod
 73 |             def forward(ctx, input: Tensor, *args):
 74 |                 # [BATCH_SIZE, ...]
 75 |                 input_shape = input.shape
 76 | 
 77 |                 batch_size = input_shape[0]
 78 |                 instance_shape = input_shape[1:]
 79 | 
 80 |                 # [BATCH_SIZE, N_SAMPLES, ...]
 81 |                 perturbed_input_shape = [batch_size, nb_samples] + list(instance_shape)
 82 | 
 83 |                 if noise_distribution is None:
 84 |                     noise = torch.zeros(size=perturbed_input_shape)
 85 |                 else:
 86 |                     noise = noise_distribution.sample(shape=torch.Size(perturbed_input_shape))
 87 | 
 88 |                 input_noise = noise * input_noise_temperature
 89 | 
 90 |                 # [BATCH_SIZE, N_SAMPLES, ...]
 91 |                 perturbed_input_3d = input.view(batch_size, 1, -1).repeat(1, nb_samples, 1).view(perturbed_input_shape)
 92 |                 perturbed_input_3d = perturbed_input_3d + input_noise
 93 | 
 94 |                 # [BATCH_SIZE * N_SAMPLES, ...]
 95 |                 perturbed_input_2d = perturbed_input_3d.view([-1] + perturbed_input_shape[2:])
 96 |                 perturbed_input_2d_shape = perturbed_input_2d.shape
 97 | 
 98 |                 # [BATCH_SIZE * N_SAMPLES, ...]
 99 |                 perturbed_output = function(perturbed_input_2d)
100 |                 # [BATCH_SIZE, N_SAMPLES, ...]
101 |                 perturbed_output = perturbed_output.view(perturbed_input_shape)
102 | 
103 |                 ctx.save_for_backward(input, noise, perturbed_output)
104 | 
105 |                 # [BATCH_SIZE * N_SAMPLES, ...]
106 |                 # res = perturbed_output.view(perturbed_input_2d_shape)
107 |                 ####  New line added
108 |                 res = perturbed_output.mean(dim=1)
109 |                 return res
110 | 
111 |             @staticmethod
112 |             def backward(ctx, dy):
113 |                 # input: [BATCH_SIZE, ...]
114 |                 # noise: [BATCH_SIZE, N_SAMPLES, ...]
115 |                 # perturbed_output_3d: # [BATCH_SIZE, N_SAMPLES, ...]
116 |                 input, noise, perturbed_output_3d = ctx.saved_variables
117 | 
118 |                 input_shape = input.shape
119 |                 batch_size = input_shape[0]
120 |                 instance_shape = input_shape[1:]
121 | 
122 | 
123 |                 ####  New line added
124 |                 dy = dy.view(batch_size, 1, -1).repeat(1,nb_samples, 1).view([batch_size*nb_samples] +list(instance_shape))
125 | 
126 | 
127 |                 # dy is [BATCH_SIZE * N_SAMPLES, ...]
128 |                 dy_shape = dy.shape
129 |                 # noise is [BATCH_SIZE, N_SAMPLES, ...]
130 |                 noise_shape = noise.shape
131 | 
132 |                 # [BATCH_SIZE * NB_SAMPLES, ...]
133 |                 input_2d = input.view(batch_size, 1, -1).repeat(1, nb_samples, 1).view(dy_shape)
134 |                 target_input_2d = target_distribution.params(input_2d, dy)
135 | 
136 |                 # [BATCH_SIZE, NB_SAMPLES, ...]
137 |                 target_input_3d = target_input_2d.view(noise_shape)
138 | 
139 |                 # [BATCH_SIZE, NB_SAMPLES, ...]
140 |                 target_noise = noise * target_noise_temperature
141 | 
142 |                 # [BATCH_SIZE, N_SAMPLES, ...]
143 |                 perturbed_target_input_3d = target_input_3d + target_noise
144 | 
145 |                 # [BATCH_SIZE * N_SAMPLES, ...]
146 |                 perturbed_target_input_2d = perturbed_target_input_3d.view(dy_shape)
147 | 
148 |                 # [BATCH_SIZE * N_SAMPLES, ...]
149 |                 target_output_2d = function(perturbed_target_input_2d)
150 | 
151 |                 # [BATCH_SIZE, N_SAMPLES, ...]
152 |                 target_output_3d = target_output_2d.view(noise_shape)
153 | 
154 |                 # [BATCH_SIZE, ...]
155 |                 gradient = (perturbed_output_3d - target_output_3d)
156 |                 gradient = gradient.mean(axis=1)
157 |                 return gradient
158 | 
159 |         return WrappedFunc.apply(input, *args)
160 |     return wrapper
161 | 


--------------------------------------------------------------------------------
/Energy/intopt/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==1.12.0
2 | numpy==1.21.6
3 | scipy==1.6.3
4 | 


--------------------------------------------------------------------------------
/Energy/readme.md:
--------------------------------------------------------------------------------
 1 | This directory corresponds to the Energy-cost aware scheduling problem.
 2 | 
 3 | The data is included in `Trainer/prices2013.dat`
 4 | There exist three instances of the scheduling problem in the director `SchedulingInstances`. 
 5 |  The first, second, and third instances contain 10, 15, and 20 tasks, respectively.
 6 | 
 7 | To run an experiment use `testenergy.py`.
 8 | To reproduce the result of expriements run
 9 | ```
10 | python testenergy.py --scheduler True
11 | ```


--------------------------------------------------------------------------------
/HyperparamConfiguration.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PredOpt/predopt-benchmarks/1a8e048c5aa640f73a05e29878b8e1e8f6f73610/HyperparamConfiguration.pdf


--------------------------------------------------------------------------------
/Knapsack/DPO/fenchel_young.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # 
 3 | # Modifications from original work
 4 | # 29-03-2021 (tuero@ualberta.ca) : Convert Tensorflow code to PyTorch
 5 | # 
 6 | # Copyright 2021 The Google Research Authors.
 7 | #
 8 | # Licensed under the Apache License, Version 2.0 (the "License");
 9 | # you may not use this file except in compliance with the License.
10 | # You may obtain a copy of the License at
11 | #
12 | #     http://www.apache.org/licenses/LICENSE-2.0
13 | #
14 | # Unless required by applicable law or agreed to in writing, software
15 | # distributed under the License is distributed on an "AS IS" BASIS,
16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | # See the License for the specific language governing permissions and
18 | # limitations under the License.
19 | 
20 | # Lint as: python3
21 | """Implementation of a Fenchel-Young loss using perturbation techniques."""
22 | 
23 | import torch
24 | import torch.nn as nn
25 | 
26 | from DPO import perturbations
27 | 
28 | 
29 | class PerturbedFunc(torch.autograd.Function):
30 |     """Implementation of a Fenchel Young loss."""
31 |     @staticmethod
32 |     def forward(ctx, input_tensor, y_true, perturbed, batched, maximize, *args):
33 |         diff = perturbed(input_tensor, *args) - y_true.type(input_tensor.dtype)
34 |         if not maximize:
35 |             diff = -diff
36 |         # Computes per-example loss for batched inputs.
37 |         if batched:
38 |             loss = torch.sum(torch.reshape(diff, [list(diff.shape)[0], -1]) ** 2, dim=-1)
39 |         else:  # Computes loss for unbatched inputs.
40 |             loss = torch.sum(diff ** 2)
41 |         ctx.save_for_backward(diff)
42 |         ctx.batched = batched
43 |         return loss
44 | 
45 |     @staticmethod
46 |     def backward(ctx, dy):
47 |         diff,  = ctx.saved_tensors
48 |         batched = ctx.batched
49 |         if batched:  # dy has shape (batch_size,) in this case.
50 |             dy = torch.reshape(dy, [list(dy.shape)[0]] + (diff.dim() - 1) * [1])
51 |         return dy * diff, None, None, None, None # original
52 |         # return  diff, None, None, None, None
53 | 
54 | 
55 | class FenchelYoungLoss(nn.Module):
56 |     def __init__(self,
57 |                  func = None,
58 |                  num_samples = 1000,
59 |                  sigma = 0.01,
60 |                  noise = perturbations._GUMBEL,
61 |                  batched = True,
62 |                  maximize = True,
63 |                  device=None):
64 |         """Initializes the Fenchel-Young loss.
65 | 
66 |         Args:
67 |             func: the function whose argmax is to be differentiated by perturbation.
68 |             num_samples: (int) the number of perturbed inputs.
69 |             sigma: (float) the amount of noise to be considered
70 |             noise: (str) the noise distribution to be used to sample perturbations.
71 |             batched: whether inputs to the func will have a leading batch dimension
72 |             (True) or consist of a single example (False). Defaults to True.
73 |             maximize: (bool) whether to maximize or to minimize the input function.
74 |             device: The device to create tensors on (cpu/gpu). If None given, it will
75 |             default to gpu:0 if available, cpu otherwise.
76 |         """
77 |         super().__init__()
78 |         self._batched = batched
79 |         self._maximize = maximize
80 |         self.func = func
81 |         self.perturbed = perturbations.perturbed(func=func,
82 |                                                 num_samples=num_samples,
83 |                                                 sigma=sigma,
84 |                                                 noise=noise,
85 |                                                 batched=batched,
86 |                                                 device=device)
87 | 
88 |     def forward(self, input_tensor, y_true, *args):
89 |         return PerturbedFunc.apply(input_tensor, y_true, self.perturbed, self._batched, self._maximize, *args)
90 | 
91 | 


--------------------------------------------------------------------------------
/Knapsack/Trainer/CacheLosses.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | 
  7 | 
  8 | ###################################### NCE Loss  Functions  #########################################
  9 | class NCE(torch.nn.Module):
 10 |     def __init__(self, minimize=False):
 11 |         super().__init__()
 12 |         self.mm  = 1 if minimize else -1
 13 |     def forward(self, y_hat,y_true, sol_true,cache):
 14 | 
 15 |         loss = 0
 16 |         mm = self.mm
 17 |         ## print("shape to be preinted: ")
 18 |         # print(sol_true.shape, cache.shape, y_hat.shape)
 19 |         ### torch.Size([B, 2500]) torch.Size([|S|, 2500]) torch.Size([B, 2500])
 20 | 
 21 |         for ii in range(len( y_hat )):
 22 |             loss += ( ( mm* ( sol_true[ii] - cache )*y_hat[ii]  ).sum(dim=(1)) ).mean() 
 23 |         loss /= len(y_hat)
 24 |         return loss
 25 | 
 26 | class NCE_c(torch.nn.Module):
 27 |     def __init__(self, minimize=False):
 28 |         super().__init__()
 29 |         self.mm  = 1 if minimize else -1
 30 |     def forward(self, y_hat,y_true, sol_true,cache):
 31 | 
 32 |         loss = 0
 33 |         mm = self.mm
 34 |         for ii in range(len( y_hat )):
 35 |             loss += ( ( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii])  ).sum(dim=(1)) ).mean() 
 36 |         loss /= len(y_hat)
 37 |         return loss
 38 | 
 39 | 
 40 | class MAP(torch.nn.Module):
 41 |     def __init__(self, minimize=False):
 42 |         super().__init__()
 43 |         self.mm  = 1 if minimize else -1
 44 |     def forward(self, y_hat,y_true,sol_true,cache):
 45 | 
 46 |         loss = 0
 47 |         mm = self.mm
 48 | 
 49 |         for ii in range(len( y_hat )):
 50 |             loss += (( mm* ( sol_true[ii] - cache )*y_hat[ii]  ).sum(dim=(1)) ).max() 
 51 |         loss /= len(y_hat)
 52 |         return loss
 53 | 
 54 | 
 55 | class MAP_c(torch.nn.Module):
 56 |     def __init__(self, minimize=False):
 57 |         super().__init__()
 58 |         self.mm  = 1 if minimize else -1
 59 |     def forward(self, y_hat,y_true,sol_true,cache):
 60 |         '''
 61 |         pred_weights: predicted cost vector [batch_size, img,img]
 62 |         true_weights: actua cost vector [batch_size, img,img]
 63 |         target: true shortest path [batch_size, img,img]
 64 |         cache: cache is torch array [cache_size, img,img]
 65 |         '''
 66 |         loss = 0
 67 |         mm = self.mm
 68 | 
 69 |         for ii in range(len( y_hat )):
 70 |             loss += (( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii])   ).sum(dim=(1)) ).max() 
 71 |         loss /= len(y_hat)
 72 |         return loss
 73 | 
 74 | 
 75 | class MAP_c_actual(torch.nn.Module):
 76 |     def __init__(self, minimize=False):
 77 |         super().__init__()
 78 |         self.mm  = 1 if minimize else -1
 79 |     def forward(self, y_hat,y_true,sol_true,cache):
 80 |         '''
 81 |         pred_weights: predicted cost vector [batch_size, img,img]
 82 |         true_weights: actua cost vector [batch_size, img,img]
 83 |         target: true shortest path [batch_size, img,img]
 84 |         cache: cache is torch array [cache_size, img,img]
 85 |         '''
 86 |         loss = 0
 87 |         mm = self.mm
 88 | 
 89 |         for ii in range(len( y_hat )):
 90 | 
 91 |             loss += (( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii])   ).sum(dim=(1)) ).max() 
 92 |         loss /= len(y_hat)
 93 |         return loss
 94 | 
 95 | 
 96 | ###################################### Ranking Loss  Functions  #########################################
 97 | class PointwiseLoss(torch.nn.Module):
 98 |     def __init__(self):
 99 |         super().__init__()
100 |     def forward(self, y_hat,y_true,sol_true,cache):
101 |         '''
102 |         pred_weights: predicted cost vector [batch_size, img,img]
103 |         true_weights: actua cost vector [batch_size, img,img]
104 |         target: true shortest path [batch_size, img,img]
105 |         cache: cache is torch array [cache_size, img,img]
106 |         '''
107 |         loss = 0
108 | 
109 |         for ii in range(len( y_hat )):
110 |             loss += ((cache*y_hat[ii])-(cache*y_true[ii])).square().mean() 
111 |         loss /= len(y_hat)
112 | 
113 |         return loss
114 | class ListwiseLoss(torch.nn.Module):
115 |     def __init__(self, temperature=0., minimize=False):
116 |         super().__init__()
117 |         self.temperature = temperature
118 |         self.mm  = 1 if minimize else -1
119 |     def forward(self, y_hat,y_true,sol_true,cache):
120 | 
121 |         loss = 0
122 |         mm, temperature  = self.mm, self.temperature
123 | 
124 |         for ii in range(len( y_hat )):
125 |             loss += - ( F.log_softmax((-mm*y_hat[ii]*cache/temperature).sum(dim=(1)),dim=0) * F.softmax((-mm*y_true[ii]*cache/temperature).sum(dim=(1)),dim=0)).mean()
126 |         loss /= len(y_hat)
127 | 
128 |         return loss
129 | 
130 | 
131 | class PairwisediffLoss(torch.nn.Module):
132 |     def __init__(self, minimize=False):
133 |         super().__init__()
134 |         self.mm  = 1 if minimize else -1
135 | 
136 |     def forward(self, y_hat,y_true,sol_true,cache):
137 |         '''
138 |         pred_weights: predicted cost vector [batch_size, img,img]
139 |         true_weights: actua cost vector [batch_size, img,img]
140 |         target: true shortest path [batch_size, img,img]
141 |         cache: cache is torch array [cache_size, img,img]
142 |         '''
143 |         
144 |         loss = 0
145 |         for ii in range(len( y_hat )):
146 |             _,indices= np.unique((self.mm*y_true[ii]*cache).sum(dim= (1)).detach().numpy(),return_index=True)
147 | 
148 |             big_ind = [indices[0] for p in range(len(indices)-1)] #good one
149 |             small_ind = [indices[p+1] for p in range(len(indices)-1)] #bad one
150 |         
151 |             
152 |             loss += ( ( cache[big_ind]*y_hat[ii] -cache[small_ind]*y_hat[ii]).sum(dim=(1)) - ( cache[big_ind]*y_true[ii] -cache[small_ind]*y_true[ii]).sum(dim=(1)) ).square().mean()
153 |         loss /= len(y_hat)
154 |         return loss
155 | 
156 | class PairwiseLoss(torch.nn.Module):
157 |     def __init__(self, margin=0., minimize=False):
158 |         super().__init__()
159 |         self.margin = margin
160 |         self.mm  = 1 if minimize else -1
161 |     def forward(self, y_hat,y_true,sol_true,cache):
162 |         '''
163 |         pred_weights: predicted cost vector [batch_size, img,img]
164 |         true_weights: actua cost vector [batch_size, img,img]
165 |         target: true shortest path [batch_size, img,img]
166 |         cache: cache is torch array [cache_size, img,img]
167 |         '''
168 |         relu = torch.nn.ReLU()
169 |         loss = 0
170 |         mm, margin  = self.mm, self.margin
171 |         for ii in range(len( y_hat )):
172 |             _,indices= np.unique((self.mm*y_true[ii]*cache).sum(dim= (1)).detach().numpy(),return_index=True)
173 | 
174 |             big_ind = [indices[0] for p in range(len(indices)-1)] #good one
175 |             small_ind = [indices[p+1] for p in range(len(indices)-1)] #bad one
176 |             
177 |             loss += relu(  margin + mm*( cache[big_ind]*y_hat[ii] -cache[small_ind]*y_hat[ii] ).sum(dim=(1)) ).mean()
178 |         loss /= len(y_hat)
179 |         return loss


--------------------------------------------------------------------------------
/Knapsack/Trainer/Data.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PredOpt/predopt-benchmarks/1a8e048c5aa640f73a05e29878b8e1e8f6f73610/Knapsack/Trainer/Data.npz


--------------------------------------------------------------------------------
/Knapsack/Trainer/comb_solver.py:
--------------------------------------------------------------------------------
 1 | from ortools.linear_solver import pywraplp
 2 | import numpy as np
 3 | import torch
 4 | import torch.nn as nn
 5 | import cvxpy as cp
 6 | import cvxpylayers
 7 | from cvxpylayers.torch import CvxpyLayer
 8 | from qpth.qp import QPFunction
 9 | 
10 | 
11 | class knapsack_solver:
12 |     def __init__(self, weights,capacity,n_items):
13 |         self.weights=  weights
14 |         self.capacity = capacity
15 |         self.n_items = n_items
16 |         self.make_model()
17 |     def make_model(self):
18 |         solver = pywraplp.Solver.CreateSolver('SCIP')
19 |         x = {}
20 |         for i in range(self.n_items):
21 |             x[i] = solver.BoolVar(f'x_{i}')
22 |         solver.Add( sum(x[i] * self.weights[i] for i in range(self.n_items)) <= self.capacity)
23 |         
24 |        
25 |         self.x  = x
26 |         self.solver = solver
27 |     def solve(self,y):
28 |         y= y.astype(np.float64)
29 |         x = self.x
30 |         solver = self.solver
31 |     
32 |         objective = solver.Objective()
33 |         for i in range(self.n_items):
34 |                 objective.SetCoefficient(x[i],y[i])
35 |         objective.SetMaximization()   
36 |         status = solver.Solve()
37 |         
38 |         if status == pywraplp.Solver.OPTIMAL:
39 |             sol = np.zeros(self.n_items)
40 |             for i in range(self.n_items):
41 |                 sol[i]= x[i].solution_value()
42 |             return sol
43 |         else:
44 |             raise Exception("No soluton found")
45 | 
46 | class cvx_knapsack_solver(nn.Module):
47 |     def __init__(self, weights,capacity,n_items, mu=1.):
48 |         super().__init__()
49 |         self.weights=  weights
50 |         self.capacity = capacity
51 |         self.n_items = n_items  
52 |         A = weights.reshape(1,-1).astype(np.float32)
53 |         b = capacity
54 |         x = cp.Variable(n_items)
55 |         c = cp.Parameter(n_items)
56 |         constraints = [x >= 0,x<=1,A @ x <= b]  
57 |         objective = cp.Maximize(c @ x - mu*cp.pnorm(x, p=2))  #cp.pnorm(A @ x - b, p=1)
58 |         problem = cp.Problem(objective, constraints)
59 |         self.layer = CvxpyLayer(problem, parameters=[c], variables=[x])
60 |     def forward(self,costs):
61 |         sol, = self.layer(costs)
62 | 
63 |         return sol
64 | 
65 | 
66 | 
67 | from intopt.intopt import intopt
68 | class intopt_knapsack_solver(nn.Module):
69 |     def __init__(self, weights,capacity,n_items, thr=0.1,damping=1e-3, diffKKT = False, dopresolve = True,):
70 |         super().__init__()
71 |         self.weights=  weights
72 |         self.capacity = capacity
73 |         self.n_items = n_items  
74 |         A = weights.reshape(1,-1).astype(np.float32)
75 |         b = np.array([capacity]).astype(np.float32)
76 |         A_lb  = -np.eye(n_items).astype(np.float32)
77 |         b_lb = np.zeros(n_items).astype(np.float32)
78 |         A_ub  = np.eye(n_items).astype(np.float32)
79 |         b_ub = np.ones(n_items).astype(np.float32)
80 | 
81 |         # G = np.concatenate((A_lb, A_ub   ), axis=0).astype(np.float32)
82 |         # h = np.concatenate(( b_lb, b_ub )).astype(np.float32)
83 |         self.A, self.b,self.G, self.h =  torch.from_numpy(A), torch.from_numpy(b),  torch.from_numpy(A_ub),  torch.from_numpy(b_ub)
84 |         self.thr =thr
85 |         self.damping = damping
86 |         self.layer = intopt(self.A, self.b,self.G, self.h, thr, damping, diffKKT, dopresolve)
87 | 
88 |     def forward(self,costs):
89 |         return self.layer(-costs)
90 | 
91 |         # sol = [self.layer(-cost) for cost in costs]
92 | 
93 | 
94 | 
95 | 
96 |         # return torch.stack(sol)
97 |             


--------------------------------------------------------------------------------
/Knapsack/Trainer/data_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | from torch.utils.data import Dataset, DataLoader
 4 | import pytorch_lightning as pl
 5 | import torch
 6 | from sklearn.preprocessing import StandardScaler
 7 | import sklearn
 8 | from Trainer.comb_solver import knapsack_solver
 9 | 
10 | class Datawrapper():
11 |     def __init__(self, X,y, sol=None,solver=None):
12 |         assert (sol is not None) or (solver is not None)
13 |         self.X = X.astype(np.float32)
14 |         self.y = y.astype(np.float32) 
15 |         if sol is None:
16 |             sol = []
17 |             for i in range(len(y)):
18 |                 sol.append(  solver.solve(y[i]) )
19 |             sol = np.array (sol).astype(np.float32)
20 |         self.sol = sol
21 | 
22 |     def __len__(self):
23 |         return len(self.y)
24 |     
25 |     def __getitem__(self, idx):
26 |         return self.X[idx],self.y[idx], self.sol[idx]
27 | 
28 | 
29 | class KnapsackDataModule(pl.LightningDataModule):
30 |     def __init__(self,capacity, standardize=True, batch_size=70, generator=None,num_workers=8, seed=0):
31 |         super().__init__()
32 | 
33 |         data = np.load('Trainer/Data.npz')
34 |         weights = data['weights']
35 |         weights = np.array(weights)
36 |         n_items = len(weights)
37 |         x_train,  x_test, y_train,y_test = data['X_1gtrain'],data['X_1gtest'],data['y_train'],data['y_test']
38 |         x_train = x_train[:,1:]
39 |         x_test = x_test[:,1:]
40 |         if standardize:
41 |             scaler = StandardScaler()
42 |             x_train = scaler.fit_transform(x_train)
43 |             x_test = scaler.transform(x_test)
44 |         x_train = x_train.reshape(-1,48,x_train.shape[1])
45 |         y_train = y_train.reshape(-1,48)
46 |         x_test = x_test.reshape(-1,48,x_test.shape[1])
47 |         y_test = y_test.reshape(-1,48)
48 |         x = np.concatenate((x_train, x_test), axis=0)
49 |         y = np.concatenate((y_train,y_test), axis=0)
50 |         x,y = sklearn.utils.shuffle(x,y,random_state=seed)
51 |         x_train, y_train = x[:550], y[:550]
52 |         x_valid, y_valid = x[550:650], y[550:650]
53 |         x_test, y_test = x[650:], y[650:]
54 | 
55 |         solver = knapsack_solver(weights,capacity= capacity, n_items= len(weights) )
56 | 
57 |         self.train_df = Datawrapper( x_train,y_train,solver=solver)
58 |         self.valid_df  = Datawrapper( x_valid, y_valid,solver=solver )
59 |         self.test_df = Datawrapper( x_test, y_test,solver=solver )
60 |         self.train_solutions= self.train_df.sol
61 | 
62 |         self.batch_size = batch_size
63 |         self.generator = generator
64 |         self.num_workers = num_workers
65 | 
66 |         self.weights, self.n_items = weights, n_items
67 | 
68 |     def train_dataloader(self):
69 |         return DataLoader(self.train_df, batch_size=self.batch_size,generator= self.generator, num_workers=self.num_workers)
70 | 
71 |     def val_dataloader(self):
72 |         return DataLoader(self.valid_df, batch_size=self.batch_size,generator= self.generator, num_workers=self.num_workers)
73 | 
74 |     def test_dataloader(self):
75 |         return DataLoader(self.test_df, batch_size=self.batch_size,generator= self.generator, num_workers=self.num_workers)


--------------------------------------------------------------------------------
/Knapsack/Trainer/diff_layer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | from Trainer.utils import batch_solve
 5 | 
 6 | def SPOlayer(solver,minimize=False):
 7 |     mm = 1 if minimize else -1
 8 |     class SPOlayer_cls(torch.autograd.Function):
 9 |         @staticmethod
10 |         def forward(ctx, y_hat,y_true,sol_true ):
11 |             sol_hat = batch_solve(solver, y_hat)
12 | 
13 |             ctx.save_for_backward(y_hat,y_true,sol_true)
14 | 
15 |             return ( mm*(sol_hat -sol_true)*y_true).sum()
16 | 
17 |         @staticmethod
18 |         def backward(ctx, grad_output):
19 |             y_hat,y_true,sol_true = ctx.saved_tensors
20 |             y_spo = 2*y_hat - y_true
21 |             sol_spo = batch_solve(solver,y_spo)
22 |             return (sol_true - sol_spo)*mm, None, None
23 |     return SPOlayer_cls.apply
24 | 
25 | 
26 | def DBBlayer(solver,lambda_val=1., minimize=False):
27 |     mm = 1 if minimize else -1
28 |     class DBBlayer_cls(torch.autograd.Function):
29 |         @staticmethod
30 |         def forward(ctx, y_hat,y_true,sol_true ):
31 |             sol_hat = batch_solve(solver, y_hat)
32 | 
33 |             ctx.save_for_backward(y_hat,y_true,sol_true, sol_hat)
34 | 
35 |             return sol_hat
36 | 
37 |         @staticmethod
38 |         def backward(ctx, grad_output):
39 |             """
40 |             In the backward pass we compute gradient to minimize regret
41 |             """
42 |             y_hat,y_true,sol_true, sol_hat= ctx.saved_tensors
43 |             y_perturbed = y_hat + mm * lambda_val* grad_output
44 |             sol_perturbed = batch_solve(solver, y_perturbed)
45 |             
46 |             return -mm*(sol_hat - sol_perturbed)/lambda_val, None, None
47 |     return DBBlayer_cls.apply


--------------------------------------------------------------------------------
/Knapsack/Trainer/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | 
 5 | def batch_solve(solver, y):
 6 | 
 7 |     sol = []
 8 |     for i in range(len(y)):
 9 |         sol.append(  solver.solve(y[i].detach().numpy()) )
10 |     return torch.tensor(sol).float()
11 | 
12 | def regret_list(solver, y_hat,y_true, sol_true, minimize=False):
13 |     mm = 1 if minimize else -1    
14 |     sol_hat = batch_solve(solver,y_hat)
15 |     
16 |     return ((mm*(sol_hat - sol_true)*y_true).sum(1)) /(( sol_true*y_true).sum(1))
17 | 
18 | 
19 | def regret_fn(solver, y_hat,y_true, sol_true, minimize=False):
20 | 
21 |     
22 |     return regret_list(solver,y_hat,y_true,sol_true,minimize).mean()
23 | 
24 | 
25 | def abs_regret_list(solver, y_hat,y_true, sol_true, minimize=False):
26 |     mm = 1 if minimize else -1    
27 |     sol_hat = batch_solve(solver,y_hat)
28 |     
29 |     return ((mm*(sol_hat - sol_true)*y_true).sum(1)) 
30 | 
31 | def abs_regret_fn(solver, y_hat,y_true, sol_true, minimize=False):
32 | 
33 |     
34 |     return abs_regret_list(solver,y_hat,y_true,sol_true,minimize).mean()
35 | 
36 | def growpool_fn(solver,cache, y_hat):
37 |     '''
38 |     cache is torch array [currentpoolsize,48]
39 |     y_hat is  torch array [batch_size,48]
40 |     '''
41 |     sol = batch_solve(solver,y_hat).detach().numpy()
42 |     cache_np = cache.detach().numpy()
43 |     cache_np = np.unique(np.append(cache_np,sol,axis=0),axis=0)
44 |     # torch has no unique function, so we have to do this
45 |     return torch.from_numpy(cache_np).float()


--------------------------------------------------------------------------------
/Knapsack/config.json:
--------------------------------------------------------------------------------
  1 | [
  2 |     {
  3 |         "model": "IntOpt",
  4 |         "capacity": 60,
  5 |         "lr": 0.5,
  6 |         "thr": 0.01,
  7 |         "damping": 10.0
  8 |     },
  9 |     {
 10 |         "model": "IntOpt",
 11 |         "capacity": 120,
 12 |         "lr": 0.5,
 13 |         "thr": 0.1,
 14 |         "damping": 10.0
 15 |     },
 16 |     {
 17 |         "model": "IntOpt",
 18 |         "capacity": 180,
 19 |         "lr": 1.0,
 20 |         "thr": 0.01,
 21 |         "damping": 0.1
 22 |     },
 23 |     {
 24 |         "model": "DCOL",
 25 |         "capacity": 60,
 26 |         "lr": 0.5,
 27 |         "mu": 10.0
 28 |     },
 29 |     {
 30 |         "model": "DCOL",
 31 |         "capacity": 120,
 32 |         "lr": 0.5,
 33 |         "mu": 1.0
 34 |     },
 35 |     {
 36 |         "model": "DCOL",
 37 |         "capacity": 180,
 38 |         "lr": 0.5,
 39 |         "mu": 0.1
 40 |     },
 41 |     {
 42 |         "model": "baseline_mse",
 43 |         "capacity": 60,
 44 |         "lr": 0.5
 45 |     },
 46 |     {
 47 |         "model": "baseline_mse",
 48 |         "capacity": 120,
 49 |         "lr": 1.0
 50 |     },
 51 |     {
 52 |         "model": "baseline_mse",
 53 |         "capacity": 180,
 54 |         "lr": 1.0
 55 |     },
 56 |     {
 57 |         "model": "SPO",
 58 |         "capacity": 60,
 59 |         "lr": 0.5
 60 |     },
 61 |     {
 62 |         "model": "SPO",
 63 |         "capacity": 120,
 64 |         "lr": 1.0
 65 |     },
 66 |     {
 67 |         "model": "SPO",
 68 |         "capacity": 180,
 69 |         "lr": 1.0
 70 |     },
 71 |     {
 72 |         "model": "DBB",
 73 |         "capacity": 60,
 74 |         "lr": 0.5,
 75 |         "lambda_val": 0.1
 76 |     },
 77 |     {
 78 |         "model": "DBB",
 79 |         "capacity": 120,
 80 |         "lr": 1.0,
 81 |         "lambda_val": 1.0
 82 |     },
 83 |     {
 84 |         "model": "DBB",
 85 |         "capacity": 180,
 86 |         "lr": 0.5,
 87 |         "lambda_val": 1.0
 88 |     },
 89 |     {
 90 |         "model": "FenchelYoung",
 91 |         "capacity": 60,
 92 |         "lr": 1.0,
 93 |         "sigma": 0.005
 94 |     },
 95 |     {
 96 |         "model": "FenchelYoung",
 97 |         "capacity": 120,
 98 |         "lr": 1,
 99 |         "sigma": 0.5
100 |     },
101 |     {
102 |         "model": "FenchelYoung",
103 |         "capacity": 180,
104 |         "lr": 0.5,
105 |         "sigma": 0.5
106 |     },
107 |     {
108 |         "model": "IMLE",
109 |         "capacity": 60,
110 |         "lr": 0.5,
111 |         "beta": 0.1,
112 |         "temperature": 0.5,
113 |         "k": 5
114 |     },
115 |     {
116 |         "model": "IMLE",
117 |         "capacity": 120,
118 |         "lr": 0.5,
119 |         "beta": 0.1,
120 |         "temperature": 0.1,
121 |         "k": 5
122 |     },
123 |     {
124 |         "model": "IMLE",
125 |         "capacity": 180,
126 |         "lr": 0.5,
127 |         "beta": 0.1,
128 |         "temperature": 5.0,
129 |         "k": 5
130 |     },
131 |     {
132 |         "model": "CachingPO",
133 |         "loss": "MAP_c",
134 |         "capacity": 60,
135 |         "lr": 1.0
136 |     },
137 |     {
138 |         "model": "CachingPO",
139 |         "loss": "MAP_c",
140 |         "capacity": 120,
141 |         "lr": 1.0
142 |     },
143 |     {
144 |         "model": "CachingPO",
145 |         "loss": "MAP_c",
146 |         "capacity": 180,
147 |         "lr": 1.0
148 |     },
149 |     {
150 |         "model": "CachingPO",
151 |         "loss": "pairwise_diff",
152 |         "capacity": 60,
153 |         "lr": 1.0
154 |     },
155 |     {
156 |         "model": "CachingPO",
157 |         "loss": "pairwise_diff",
158 |         "capacity": 120,
159 |         "lr": 1.0
160 |     },
161 |     {
162 |         "model": "CachingPO",
163 |         "loss": "pairwise_diff",
164 |         "capacity": 180,
165 |         "lr": 1.0
166 |     },
167 |     {
168 |         "model": "CachingPO",
169 |         "loss": "pairwise",
170 |         "capacity": 60,
171 |         "lr": 0.5,
172 |         "tau": 10
173 |     },
174 |     {
175 |         "model": "CachingPO",
176 |         "loss": "pairwise",
177 |         "capacity": 120,
178 |         "lr": 0.5,
179 |         "tau": 10
180 |     },
181 |     {
182 |         "model": "CachingPO",
183 |         "loss": "pairwise",
184 |         "capacity": 180,
185 |         "lr": 0.5,
186 |         "tau": 10
187 |     },
188 |     {
189 |         "model": "CachingPO",
190 |         "loss": "listwise",
191 |         "capacity": 60,
192 |         "lr": 1,
193 |         "tau": 0.001
194 |     },
195 |     {
196 |         "model": "CachingPO",
197 |         "loss": "listwise",
198 |         "capacity": 120,
199 |         "lr": 1,
200 |         "tau": 0.001
201 |     },
202 |     {
203 |         "model": "CachingPO",
204 |         "loss": "listwise",
205 |         "capacity": 180,
206 |         "lr": 0.5,
207 |         "tau": 0.0001
208 |     }
209 | ]


--------------------------------------------------------------------------------
/Knapsack/imle/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PredOpt/predopt-benchmarks/1a8e048c5aa640f73a05e29878b8e1e8f6f73610/Knapsack/imle/__init__.py


--------------------------------------------------------------------------------
/Knapsack/imle/noise.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import math
 4 | 
 5 | import torch
 6 | from torch import Tensor, Size
 7 | from torch.distributions.gamma import Gamma
 8 | 
 9 | from abc import ABC, abstractmethod
10 | 
11 | from typing import Optional
12 | 
13 | import logging
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | class BaseNoiseDistribution(ABC):
19 |     def __init__(self):
20 |         super().__init__()
21 | 
22 |     @abstractmethod
23 |     def sample(self,
24 |                shape: Size) -> Tensor:
25 |         raise NotImplementedError
26 | 
27 | 
28 | class SumOfGammaNoiseDistribution(BaseNoiseDistribution):
29 |     r"""
30 |     Creates a generator of samples for the Sum-of-Gamma distribution [1], parameterized
31 |     by :attr:`k`, :attr:`nb_iterations`, and :attr:`device`.
32 | 
33 |     [1] Mathias Niepert, Pasquale Minervini, Luca Franceschi - Implicit MLE: Backpropagating Through Discrete
34 |     Exponential Family Distributions. NeurIPS 2021 (https://arxiv.org/abs/2106.01798)
35 | 
36 |     Example::
37 | 
38 |         >>> import torch
39 |         >>> noise_distribution = SumOfGammaNoiseDistribution(k=5, nb_iterations=100)
40 |         >>> noise_distribution.sample(torch.Size([5]))
41 |         tensor([ 0.2504,  0.0112,  0.5466,  0.0051, -0.1497])
42 | 
43 |     Args:
44 |         k (float): k parameter -- see [1] for more details.
45 |         nb_iterations (int): number of iterations for estimating the sample.
46 |         device (torch.devicde): device where to store samples.
47 |     """
48 |     def __init__(self,
49 |                  k: float,
50 |                  nb_iterations: int = 10,
51 |                  device: Optional[torch.device] = None):
52 |         super().__init__()
53 |         self.k = k
54 |         self.nb_iterations = nb_iterations
55 |         self.device = device
56 | 
57 |     def sample(self,
58 |                shape: Size) -> Tensor:
59 |         samples = torch.zeros(size=shape, device=self.device)
60 |         for i in range(1, self.nb_iterations + 1):
61 |             concentration = torch.tensor(1. / self.k, device=self.device)
62 |             rate = torch.tensor(i / self.k, device=self.device)
63 | 
64 |             gamma = Gamma(concentration=concentration, rate=rate)
65 |             samples = samples + gamma.sample(sample_shape=shape).to(self.device)
66 |         samples = (samples - math.log(self.nb_iterations)) / self.k
67 |         return samples.to(self.device)
68 | 


--------------------------------------------------------------------------------
/Knapsack/imle/target.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from torch import Tensor
 4 | from abc import ABC, abstractmethod
 5 | 
 6 | import logging
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class BaseTargetDistribution(ABC):
12 |     def __init__(self):
13 |         super().__init__()
14 | 
15 |     @abstractmethod
16 |     def params(self,
17 |                theta: Tensor,
18 |                dy: Tensor) -> Tensor:
19 |         raise NotImplementedError
20 | 
21 | 
22 | class TargetDistribution(BaseTargetDistribution):
23 |     r"""
24 |     Creates a generator of target distributions parameterized by :attr:`alpha` and :attr:`beta`.
25 | 
26 |     Example::
27 | 
28 |         >>> import torch
29 |         >>> target_distribution = TargetDistribution(alpha=1.0, beta=1.0)
30 |         >>> target_distribution.params(theta=torch.tensor([1.0]), dy=torch.tensor([1.0]))
31 |         tensor([2.])
32 | 
33 |     Args:
34 |         alpha (float): weight of the initial distribution parameters theta
35 |         beta (float): weight of the downstream gradient dy
36 |     """
37 |     def __init__(self,
38 |                  alpha: float = 1.0,
39 |                  beta: float = 1.0):
40 |         super().__init__()
41 |         self.alpha = alpha
42 |         self.beta = beta
43 | 
44 |     def params(self,
45 |                theta: Tensor,
46 |                dy: Tensor) -> Tensor:
47 |         theta_prime = self.alpha * theta - self.beta * dy
48 |         return theta_prime
49 | 


--------------------------------------------------------------------------------
/Knapsack/imle/wrapper.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import functools
  4 | 
  5 | import torch
  6 | from torch import Tensor
  7 | 
  8 | from imle.noise import BaseNoiseDistribution
  9 | from imle.target import BaseTargetDistribution, TargetDistribution
 10 | 
 11 | from typing import Callable, Optional
 12 | 
 13 | import logging
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | 
 18 | def imle(function: Callable[[Tensor], Tensor] = None,
 19 |          target_distribution: Optional[BaseTargetDistribution] = None,
 20 |          noise_distribution: Optional[BaseNoiseDistribution] = None,
 21 |          nb_samples: int = 1,
 22 |          input_noise_temperature: float = 1.0,
 23 |          target_noise_temperature: float = 1.0):
 24 |     r"""Turns a black-box combinatorial solver in an Exponential Family distribution via Perturb-and-MAP and I-MLE [1].
 25 | 
 26 |     The input function (solver) needs to return the solution to the problem of finding a MAP state for a constrained
 27 |     exponential family distribution -- this is the case for most black-box combinatorial solvers [2]. If this condition
 28 |     is violated though, the result would not hold and there is no guarantee on the validity of the obtained gradients.
 29 | 
 30 |     This function can be used directly or as a decorator.
 31 | 
 32 |     [1] Mathias Niepert, Pasquale Minervini, Luca Franceschi - Implicit MLE: Backpropagating Through Discrete
 33 |     Exponential Family Distributions. NeurIPS 2021 (https://arxiv.org/abs/2106.01798)
 34 |     [2] Marin Vlastelica, Anselm Paulus, Vít Musil, Georg Martius, Michal Rolínek - Differentiation of Blackbox
 35 |     Combinatorial Solvers. ICLR 2020 (https://arxiv.org/abs/1912.02175)
 36 | 
 37 |     Example::
 38 | 
 39 |         >>> from imle.wrapper import imle
 40 |         >>> from imle.target import TargetDistribution
 41 |         >>> from imle.noise import SumOfGammaNoiseDistribution
 42 |         >>> target_distribution = TargetDistribution(alpha=0.0, beta=10.0)
 43 |         >>> noise_distribution = SumOfGammaNoiseDistribution(k=21, nb_iterations=100)
 44 |         >>> @imle(target_distribution=target_distribution, noise_distribution=noise_distribution, nb_samples=100,
 45 |         >>>       input_noise_temperature=input_noise_temperature, target_noise_temperature=5.0)
 46 |         >>> def imle_solver(weights_batch: Tensor) -> Tensor:
 47 |         >>>     return torch_solver(weights_batch)
 48 | 
 49 |     Args:
 50 |         function (Callable[[Tensor], Tensor]): black-box combinatorial solver
 51 |         target_distribution (Optional[BaseTargetDistribution]): factory for target distributions
 52 |         noise_distribution (Optional[BaseNoiseDistribution]): noise distribution
 53 |         nb_samples (int): number of noise sammples
 54 |         input_noise_temperature (float): noise temperature for the input distribution
 55 |         target_noise_temperature (float): noise temperature for the target distribution
 56 |     """
 57 |     if target_distribution is None:
 58 |         target_distribution = TargetDistribution(alpha=1.0, beta=1.0)
 59 | 
 60 |     if function is None:
 61 |         return functools.partial(imle,
 62 |                                  target_distribution=target_distribution,
 63 |                                  noise_distribution=noise_distribution,
 64 |                                  nb_samples=nb_samples,
 65 |                                  input_noise_temperature=input_noise_temperature,
 66 |                                  target_noise_temperature=target_noise_temperature)
 67 | 
 68 |     @functools.wraps(function)
 69 |     def wrapper(input: Tensor, *args):
 70 |         class WrappedFunc(torch.autograd.Function):
 71 | 
 72 |             @staticmethod
 73 |             def forward(ctx, input: Tensor, *args):
 74 |                 # [BATCH_SIZE, ...]
 75 |                 input_shape = input.shape
 76 | 
 77 |                 batch_size = input_shape[0]
 78 |                 instance_shape = input_shape[1:]
 79 | 
 80 |                 # [BATCH_SIZE, N_SAMPLES, ...]
 81 |                 perturbed_input_shape = [batch_size, nb_samples] + list(instance_shape)
 82 | 
 83 |                 if noise_distribution is None:
 84 |                     noise = torch.zeros(size=perturbed_input_shape)
 85 |                 else:
 86 |                     noise = noise_distribution.sample(shape=torch.Size(perturbed_input_shape))
 87 | 
 88 |                 input_noise = noise * input_noise_temperature
 89 | 
 90 |                 # [BATCH_SIZE, N_SAMPLES, ...]
 91 |                 perturbed_input_3d = input.view(batch_size, 1, -1).repeat(1, nb_samples, 1).view(perturbed_input_shape)
 92 |                 perturbed_input_3d = perturbed_input_3d + input_noise
 93 | 
 94 |                 # [BATCH_SIZE * N_SAMPLES, ...]
 95 |                 perturbed_input_2d = perturbed_input_3d.view([-1] + perturbed_input_shape[2:])
 96 |                 perturbed_input_2d_shape = perturbed_input_2d.shape
 97 | 
 98 |                 # [BATCH_SIZE * N_SAMPLES, ...]
 99 |                 perturbed_output = function(perturbed_input_2d)
100 |                 # [BATCH_SIZE, N_SAMPLES, ...]
101 |                 perturbed_output = perturbed_output.view(perturbed_input_shape)
102 | 
103 |                 ctx.save_for_backward(input, noise, perturbed_output)
104 | 
105 |                 # [BATCH_SIZE * N_SAMPLES, ...]
106 |                 # res = perturbed_output.view(perturbed_input_2d_shape)
107 |                 ####  New line added
108 |                 res = perturbed_output.mean(dim=1)
109 |                 return res
110 | 
111 |             @staticmethod
112 |             def backward(ctx, dy):
113 |                 # input: [BATCH_SIZE, ...]
114 |                 # noise: [BATCH_SIZE, N_SAMPLES, ...]
115 |                 # perturbed_output_3d: # [BATCH_SIZE, N_SAMPLES, ...]
116 |                 input, noise, perturbed_output_3d = ctx.saved_variables
117 | 
118 |                 input_shape = input.shape
119 |                 batch_size = input_shape[0]
120 |                 instance_shape = input_shape[1:]
121 | 
122 | 
123 |                 ####  New line added
124 |                 dy = dy.view(batch_size, 1, -1).repeat(1,nb_samples, 1).view([batch_size*nb_samples] +list(instance_shape))
125 | 
126 | 
127 |                 # dy is [BATCH_SIZE * N_SAMPLES, ...]
128 |                 dy_shape = dy.shape
129 |                 # noise is [BATCH_SIZE, N_SAMPLES, ...]
130 |                 noise_shape = noise.shape
131 | 
132 |                 # [BATCH_SIZE * NB_SAMPLES, ...]
133 |                 input_2d = input.view(batch_size, 1, -1).repeat(1, nb_samples, 1).view(dy_shape)
134 |                 target_input_2d = target_distribution.params(input_2d, dy)
135 | 
136 |                 # [BATCH_SIZE, NB_SAMPLES, ...]
137 |                 target_input_3d = target_input_2d.view(noise_shape)
138 | 
139 |                 # [BATCH_SIZE, NB_SAMPLES, ...]
140 |                 target_noise = noise * target_noise_temperature
141 | 
142 |                 # [BATCH_SIZE, N_SAMPLES, ...]
143 |                 perturbed_target_input_3d = target_input_3d + target_noise
144 | 
145 |                 # [BATCH_SIZE * N_SAMPLES, ...]
146 |                 perturbed_target_input_2d = perturbed_target_input_3d.view(dy_shape)
147 | 
148 |                 # [BATCH_SIZE * N_SAMPLES, ...]
149 |                 target_output_2d = function(perturbed_target_input_2d)
150 | 
151 |                 # [BATCH_SIZE, N_SAMPLES, ...]
152 |                 target_output_3d = target_output_2d.view(noise_shape)
153 | 
154 |                 # [BATCH_SIZE, ...]
155 |                 gradient = (perturbed_output_3d - target_output_3d)
156 |                 gradient = gradient.mean(axis=1)
157 |                 return gradient
158 | 
159 |         return WrappedFunc.apply(input, *args)
160 |     return wrapper
161 | 


--------------------------------------------------------------------------------
/Knapsack/readme.md:
--------------------------------------------------------------------------------
1 | This directory corresponds to the Knapsack problem.
2 | 
3 | The data is included in `Trainer/Data.npz`
4 | To run an experiment use `testknapsack.py`.
5 | To reproduce the result of expriements run
6 | ```
7 | python testknapsack.py --scheduler True
8 | ```


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 PredOpt-Benchmarks
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Matching/DPO/fenchel_young.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # 
 3 | # Modifications from original work
 4 | # 29-03-2021 (tuero@ualberta.ca) : Convert Tensorflow code to PyTorch
 5 | # 
 6 | # Copyright 2021 The Google Research Authors.
 7 | #
 8 | # Licensed under the Apache License, Version 2.0 (the "License");
 9 | # you may not use this file except in compliance with the License.
10 | # You may obtain a copy of the License at
11 | #
12 | #     http://www.apache.org/licenses/LICENSE-2.0
13 | #
14 | # Unless required by applicable law or agreed to in writing, software
15 | # distributed under the License is distributed on an "AS IS" BASIS,
16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | # See the License for the specific language governing permissions and
18 | # limitations under the License.
19 | 
20 | # Lint as: python3
21 | """Implementation of a Fenchel-Young loss using perturbation techniques."""
22 | 
23 | import torch
24 | import torch.nn as nn
25 | 
26 | from DPO import perturbations
27 | 
28 | 
29 | class PerturbedFunc(torch.autograd.Function):
30 |     """Implementation of a Fenchel Young loss."""
31 |     @staticmethod
32 |     def forward(ctx, input_tensor, y_true, perturbed, batched, maximize, *args):
33 |         diff = perturbed(input_tensor, *args) - y_true.type(input_tensor.dtype)
34 |         if not maximize:
35 |             diff = -diff
36 |         # Computes per-example loss for batched inputs.
37 |         if batched:
38 |             loss = torch.sum(torch.reshape(diff, [list(diff.shape)[0], -1]) ** 2, dim=-1)
39 |         else:  # Computes loss for unbatched inputs.
40 |             loss = torch.sum(diff ** 2)
41 |         ctx.save_for_backward(diff)
42 |         ctx.batched = batched
43 |         return loss
44 | 
45 |     @staticmethod
46 |     def backward(ctx, dy):
47 |         diff,  = ctx.saved_tensors
48 |         batched = ctx.batched
49 |         if batched:  # dy has shape (batch_size,) in this case.
50 |             dy = torch.reshape(dy, [list(dy.shape)[0]] + (diff.dim() - 1) * [1])
51 |         return dy * diff, None, None, None, None # original
52 |         # return  diff, None, None, None, None
53 | 
54 | 
55 | class FenchelYoungLoss(nn.Module):
56 |     def __init__(self,
57 |                  func = None,
58 |                  num_samples = 1000,
59 |                  sigma = 0.01,
60 |                  noise = perturbations._GUMBEL,
61 |                  batched = True,
62 |                  maximize = True,
63 |                  device=None):
64 |         """Initializes the Fenchel-Young loss.
65 | 
66 |         Args:
67 |             func: the function whose argmax is to be differentiated by perturbation.
68 |             num_samples: (int) the number of perturbed inputs.
69 |             sigma: (float) the amount of noise to be considered
70 |             noise: (str) the noise distribution to be used to sample perturbations.
71 |             batched: whether inputs to the func will have a leading batch dimension
72 |             (True) or consist of a single example (False). Defaults to True.
73 |             maximize: (bool) whether to maximize or to minimize the input function.
74 |             device: The device to create tensors on (cpu/gpu). If None given, it will
75 |             default to gpu:0 if available, cpu otherwise.
76 |         """
77 |         super().__init__()
78 |         self._batched = batched
79 |         self._maximize = maximize
80 |         self.func = func
81 |         self.perturbed = perturbations.perturbed(func=func,
82 |                                                 num_samples=num_samples,
83 |                                                 sigma=sigma,
84 |                                                 noise=noise,
85 |                                                 batched=batched,
86 |                                                 device=device)
87 | 
88 |     def forward(self, input_tensor, y_true, *args):
89 |         return PerturbedFunc.apply(input_tensor, y_true, self.perturbed, self._batched, self._maximize, *args)
90 | 
91 | 


--------------------------------------------------------------------------------
/Matching/Trainer/CacheLosses.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import torch.nn.functional as F
  4 | ###################################### NCE Loss  Functions  #########################################
  5 | class NCE(torch.nn.Module):
  6 |     def __init__(self, minimize=False):
  7 |         super().__init__()
  8 |         self.mm  = 1 if minimize else -1
  9 |     def forward(self, y_hat,y_true, sol_true,cache):
 10 | 
 11 |         loss = 0
 12 |         mm = self.mm
 13 |         ## print("shape to be preinted: ")
 14 |         # print(sol_true.shape, cache.shape, y_hat.shape)
 15 |         ### torch.Size([B, 2500]) torch.Size([|S|, 2500]) torch.Size([B, 2500])
 16 | 
 17 |         for ii in range(len( y_hat )):
 18 |             loss += ( ( mm* ( sol_true[ii] - cache )*y_hat[ii]  ).sum(dim=(1)) ).mean() 
 19 |         loss /= len(y_hat)
 20 |         return loss
 21 | 
 22 | class NCE_c(torch.nn.Module):
 23 |     def __init__(self, minimize=False):
 24 |         super().__init__()
 25 |         self.mm  = 1 if minimize else -1
 26 |     def forward(self, y_hat,y_true, sol_true,cache):
 27 | 
 28 |         loss = 0
 29 |         mm = self.mm
 30 |         for ii in range(len( y_hat )):
 31 |             loss += ( ( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii])  ).sum(dim=(1)) ).mean() 
 32 |         loss /= len(y_hat)
 33 |         return loss
 34 | 
 35 | 
 36 | class MAP(torch.nn.Module):
 37 |     def __init__(self, minimize=False):
 38 |         super().__init__()
 39 |         self.mm  = 1 if minimize else -1
 40 |     def forward(self, y_hat,y_true,sol_true,cache):
 41 | 
 42 |         loss = 0
 43 |         mm = self.mm
 44 | 
 45 |         for ii in range(len( y_hat )):
 46 |             loss += (( mm* ( sol_true[ii] - cache )*y_hat[ii]  ).sum(dim=(1)) ).max() 
 47 |         loss /= len(y_hat)
 48 |         return loss
 49 | 
 50 | 
 51 | class MAP_c(torch.nn.Module):
 52 |     def __init__(self, minimize=False):
 53 |         super().__init__()
 54 |         self.mm  = 1 if minimize else -1
 55 |     def forward(self, y_hat,y_true,sol_true,cache):
 56 | 
 57 |         loss = 0
 58 |         mm = self.mm
 59 | 
 60 |         for ii in range(len( y_hat )):
 61 |             loss += (( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii])   ).sum(dim=(1)) ).max() 
 62 |         loss /= len(y_hat)
 63 |         return loss
 64 | 
 65 | 
 66 | class MAP_c_actual(torch.nn.Module):
 67 |     def __init__(self, minimize=False):
 68 |         super().__init__()
 69 |         self.mm  = 1 if minimize else -1
 70 |     def forward(self, y_hat,y_true,sol_true,cache):
 71 | 
 72 |         loss = 0
 73 |         mm = self.mm
 74 | 
 75 |         for ii in range(len( y_hat )):
 76 | 
 77 |             loss += (( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii])   ).sum(dim=(1)) ).max() 
 78 |         loss /= len(y_hat)
 79 |         return loss
 80 | 
 81 | ###################################### Ranking Loss  Functions  #########################################
 82 | class PointwiseLoss(torch.nn.Module):
 83 |     def __init__(self):
 84 |         super().__init__()
 85 |     def forward(self, y_hat,y_true,sol_true,cache):
 86 |         '''
 87 |         pred_weights: predicted cost vector [batch_size, img,img]
 88 |         true_weights: actua cost vector [batch_size, img,img]
 89 |         target: true shortest path [batch_size, img,img]
 90 |         cache: cache is torch array [cache_size, img,img]
 91 |         '''
 92 |         loss = 0
 93 | 
 94 |         for ii in range(len( y_hat )):
 95 |             loss += ((cache*y_hat[ii])-(cache*y_true[ii])).square().mean() 
 96 |         loss /= len(y_hat)
 97 | 
 98 |         return loss
 99 | class ListwiseLoss(torch.nn.Module):
100 |     def __init__(self, temperature=0., minimize=False):
101 |         super().__init__()
102 |         self.temperature = temperature
103 |         self.mm  = 1 if minimize else -1
104 |     def forward(self, y_hat,y_true,sol_true,cache):
105 | 
106 |         loss = 0
107 |         mm, temperature  = self.mm, self.temperature
108 | 
109 |         for ii in range(len( y_hat )):
110 |             loss += - ( F.log_softmax((-mm*y_hat[ii]*cache/temperature).sum(dim=(1)),dim=0) * F.softmax((-mm*y_true[ii]*cache/temperature).sum(dim=(1)),dim=0)).mean()
111 |         loss /= len(y_hat)
112 | 
113 |         return loss
114 | 
115 | 
116 | class PairwisediffLoss(torch.nn.Module):
117 |     def __init__(self, minimize=False):
118 |         super().__init__()
119 |         self.mm  = 1 if minimize else -1
120 | 
121 |     def forward(self, y_hat,y_true,sol_true,cache):
122 |         '''
123 |         pred_weights: predicted cost vector [batch_size, img,img]
124 |         true_weights: actua cost vector [batch_size, img,img]
125 |         target: true shortest path [batch_size, img,img]
126 |         cache: cache is torch array [cache_size, img,img]
127 |         '''
128 |         
129 |         loss = 0
130 |         for ii in range(len( y_hat )):
131 |             _,indices= np.unique((self.mm*y_true[ii]*cache).sum(dim= (1)).detach().numpy(),return_index=True)
132 | 
133 |             big_ind = [indices[0] for p in range(len(indices)-1)] #good one
134 |             small_ind = [indices[p+1] for p in range(len(indices)-1)] #bad one
135 |         
136 |             
137 |             loss += ( ( cache[big_ind]*y_hat[ii] -cache[small_ind]*y_hat[ii]).sum(dim=(1)) - ( cache[big_ind]*y_true[ii] -cache[small_ind]*y_true[ii]).sum(dim=(1)) ).square().mean()
138 |         loss /= len(y_hat)
139 |         return loss
140 | 
141 | class PairwiseLoss(torch.nn.Module):
142 |     def __init__(self, margin=0., minimize=False):
143 |         super().__init__()
144 |         self.margin = margin
145 |         self.mm  = 1 if minimize else -1
146 |     def forward(self, y_hat,y_true,sol_true,cache):
147 |         '''
148 |         pred_weights: predicted cost vector [batch_size, img,img]
149 |         true_weights: actua cost vector [batch_size, img,img]
150 |         target: true shortest path [batch_size, img,img]
151 |         cache: cache is torch array [cache_size, img,img]
152 |         '''
153 |         relu = torch.nn.ReLU()
154 |         loss = 0
155 |         mm, margin  = self.mm, self.margin
156 |         for ii in range(len( y_hat )):
157 |             _,indices= np.unique((self.mm*y_true[ii]*cache).sum(dim= (1)).detach().numpy(),return_index=True)
158 | 
159 |             big_ind = [indices[0] for p in range(len(indices)-1)] #good one
160 |             small_ind = [indices[p+1] for p in range(len(indices)-1)] #bad one
161 |             
162 |             loss += relu(  margin + mm*( cache[big_ind]*y_hat[ii] -cache[small_ind]*y_hat[ii] ).sum(dim=(1)) ).mean()
163 |         loss /= len(y_hat)
164 |         return loss


--------------------------------------------------------------------------------
/Matching/Trainer/NNModels.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | from torch.utils.data import DataLoader
 3 | from torch.autograd import Variable
 4 | import torch.nn.functional as F
 5 | from torch import nn
 6 | 
 7 | def cora_net(n_features=2866, n_hidden=200, n_layers=2, n_targets=1):
 8 |     if n_layers ==1:
 9 |         return nn.Sequential(nn.Linear(n_features, n_targets), nn.Sigmoid())
10 |     else:
11 |         layers = []
12 |         # input layer
13 |         layers.append(nn.Sequential(
14 |             nn.Linear(n_features, n_hidden),
15 |             nn.ReLU()
16 |             ))
17 |         # hidden layers
18 |         for _ in range(n_layers -2) :
19 |             layers.append(nn.Sequential(
20 |                 nn.Linear(n_hidden, n_hidden),
21 |                 nn.ReLU()
22 |             ))
23 |         # output layer
24 |         layers.append(nn.Sequential(
25 |             nn.Linear(n_hidden, n_targets),
26 |             nn.Sigmoid()
27 |         ))
28 |         return nn.Sequential(*layers)
29 | 
30 | def cora_normednet(n_features=2866, n_hidden=200, n_layers=2, n_targets=1):
31 |     if n_layers ==1:
32 |         return nn.Sequential(nn.Linear(n_features, n_targets), nn.Sigmoid())
33 |     else:
34 |         layers = []
35 |         # input layer
36 |         layers.append(nn.Sequential(
37 |             nn.Linear(n_features, n_hidden),
38 |             nn.ReLU(),nn.BatchNorm1d(2500)
39 |             ))
40 |         # hidden layers
41 |         for _ in range(n_layers -2) :
42 |             layers.append(nn.Sequential(
43 |                 nn.Linear(n_hidden, n_hidden),
44 |                 nn.ReLU(),nn.BatchNorm1d(2500)
45 |             ))
46 |         # output layer
47 |         layers.append(nn.Sequential(
48 |             nn.Linear(n_hidden, n_targets)
49 |             # nn.Sigmoid()
50 |         ))
51 |         return nn.Sequential(*layers)
52 | 
53 | def cora_nosigmoidnet(n_features=2866, n_hidden=200, n_layers=2, n_targets=1):
54 |     if n_layers ==1:
55 |         return nn.Sequential(nn.Linear(n_features, n_targets), nn.Sigmoid())
56 |     else:
57 |         layers = []
58 |         # input layer
59 |         layers.append(nn.Sequential(
60 |             nn.Linear(n_features, n_hidden),
61 |             nn.ReLU()
62 |             ))
63 |         # hidden layers
64 |         for _ in range(n_layers -2) :
65 |             layers.append(nn.Sequential(
66 |                 nn.Linear(n_hidden, n_hidden),
67 |                 nn.ReLU()
68 |             ))
69 |         # output layer
70 |         layers.append(nn.Sequential(
71 |             nn.Linear(n_hidden, n_targets)
72 |         ))
73 |         return nn.Sequential(*layers)


--------------------------------------------------------------------------------
/Matching/Trainer/bipartite.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import numpy as np
  3 | import pickle
  4 | import copy
  5 | from tqdm.auto import tqdm
  6 | import sys 
  7 | from ortools.graph import pywrapgraph
  8 | from ortools.linear_solver import pywraplp
  9 | import torch
 10 | 
 11 | def linearobj(x,v, **params):
 12 |     return 
 13 | 
 14 | def bmatching(preds, mult=1000, **kwargs):
 15 |     assignment = pywrapgraph.LinearSumAssignment()
 16 |     cost = -preds.reshape(50,50)*mult
 17 |     n1 = len(cost)
 18 |     n2 = len(cost[0])
 19 |     for i in range(n1):
 20 |         for j in range(n2):
 21 |           assignment.AddArcWithCost(i, j, int(cost[i,j]))
 22 |     solve_status = assignment.Solve()
 23 |     solution = np.zeros((50,50))
 24 |     for i in range(assignment.NumNodes()):
 25 |         mate = assignment.RightMate(i)
 26 |         solution[i,mate] = 1
 27 |     return solution.reshape(-1)
 28 | 
 29 | solver = pywraplp.Solver.CreateSolver('GLOP')
 30 | # solver.SuppressOutput()
 31 | 
 32 | class bmatching_diverse:
 33 |     def __init__(self,p=0.25, q=0.25, relaxation=False) -> None:
 34 |         self.p, self.q = p,q
 35 |         self.relaxation = relaxation
 36 |     def solve(self, preds, match_subs,  **kwargs):
 37 |         p,q = self.p, self.q
 38 |         relaxation = self.relaxation
 39 |     
 40 |         solver.Clear()
 41 |         mult=1000
 42 |         cost = -preds.reshape(50,50)*mult
 43 |         m = match_subs.reshape(50,50)
 44 |         n1 = len(cost)
 45 |         n2 = len(cost[0])
 46 |         x = {}
 47 |         for i in range(n1):
 48 |             for j in range(n2):
 49 |                 x[i,j] = solver.NumVar(0,1,'') if relaxation else solver.IntVar(0,1,'')
 50 | 
 51 |         for i in range(n1):
 52 |             solver.Add(solver.Sum([x[i, j] for j in range(n2)]) <= 1)
 53 | 
 54 |         for j in range(n2):
 55 |             solver.Add(solver.Sum([x[i, j] for i in range(n1)]) <= 1)
 56 | 
 57 |         # pairing in same field
 58 |         pairing_same = []
 59 |         allvars = []
 60 |         for i in range(n1):
 61 |             for j in range(n2):
 62 |                 pairing_same.append(x[i,j] * m[i,j])
 63 |                 allvars.append(x[i,j])
 64 |         solver.Add(solver.Sum(pairing_same) >= p*solver.Sum(allvars))
 65 | 
 66 |         # pairing in distinct field
 67 |         pairing_dis = []
 68 |         for i in range(n1):
 69 |             for j in range(n2):
 70 |                 pairing_dis.append(x[i,j] * (1-m[i,j]))
 71 |         solver.Add(solver.Sum(pairing_dis) >= q*solver.Sum(allvars))
 72 | 
 73 |         obj = []
 74 |         for i in range(n1):
 75 |             for j in range(n2):
 76 |                 obj.append(cost[i,j] * x[i,j]) 
 77 |         solver.Minimize(solver.Sum(obj))
 78 | 
 79 |         status = solver.Solve()
 80 |         solution = np.zeros((50,50))
 81 | 
 82 |         if status == pywraplp.Solver.OPTIMAL:
 83 |             for i in range(n1):
 84 |                 for j in range(n2):
 85 |                     solution[i,j] = x[i,j].solution_value()
 86 |         #solver.Clear()
 87 |         return solution.reshape(-1)
 88 | 
 89 |     def get_qpt_matrices(self, match_subs):
 90 |         p,q = self.p, self.q
 91 | 
 92 |         # we only have G * x <= h
 93 |         
 94 |         # Matching
 95 |         N1 = np.zeros((50,2500))
 96 |         N2 = np.zeros_like(N1)
 97 |         b1 = np.ones(50)
 98 |         b2 = np.ones_like(b1)
 99 |         
100 |         for i in range(50):
101 |             rowmask = np.zeros((50,50))
102 |             colmask = np.zeros_like(rowmask)
103 |             rowmask[i,:] = 1 
104 |             colmask[:,i] = 1
105 |             N1[i] = rowmask.flatten()
106 |             N2[i] = colmask.flatten() 
107 |         
108 |         # Similarity constraint
109 |         Sim = p - match_subs 
110 |         bsim = np.zeros(1)
111 |         
112 |         # Diversity constraint 
113 |         Div = q - 1 + match_subs 
114 |         bdiv = np.zeros_like(bsim)
115 | 
116 |         G = np.vstack((N1, N2, Sim, Div))
117 |         h = np.concatenate((b1, b2, bsim, bdiv))
118 |         A = torch.Tensor().float()
119 |         b = torch.Tensor().float()
120 |         return A,b, torch.from_numpy(G).float(), torch.from_numpy(h).float()
121 | 
122 | 
123 | 
124 | 
125 | # def get_qpt_matrices(match_subs, p=0.25, q=0.25, **kwargs):
126 | #     # we only have G * x <= h
127 |     
128 | #     # Matching
129 | #     N1 = np.zeros((50,2500))
130 | #     N2 = np.zeros_like(N1)
131 | #     b1 = np.ones(50)
132 | #     b2 = np.ones_like(b1)
133 |     
134 | #     for i in range(50):
135 | #         rowmask = np.zeros((50,50))
136 | #         colmask = np.zeros_like(rowmask)
137 | #         rowmask[i,:] = 1 
138 | #         colmask[:,i] = 1
139 | #         N1[i] = rowmask.flatten()
140 | #         N2[i] = colmask.flatten() 
141 |     
142 | #     # Similarity constraint
143 | #     Sim = p - match_subs 
144 | #     bsim = np.zeros(1)
145 |     
146 | #     # Diversity constraint 
147 | #     Div = q - 1 + match_subs 
148 | #     bdiv = np.zeros_like(bsim)
149 | 
150 | #     G = np.vstack((N1, N2, Sim, Div))
151 | #     h = np.concatenate((b1, b2, bsim, bdiv))
152 | #     A = None 
153 | #     b = None 
154 | #     return A,b, G,h
155 | 
156 | 
157 | 
158 | 
159 | def get_cora():
160 |     """
161 |     Get X,y
162 |     """
163 |     # 
164 |     with open('data/cora_data.pickle', 'rb') as f:
165 |         gt, ft, M = pickle.load(f)
166 |     return ft, gt, M
167 | 
168 | if __name__ == '__main__':
169 |     x,y,m = get_cora()
170 |     params = {'p':0.5,'q':0.5} 
171 |     idx = 15
172 |     p,m = bmatching_diverse(y[idx], m[idx], **params) , m[idx]
173 |     objective_fun=lambda x,v,**params: x @ v
174 |     print("Objective ",objective_fun(p,y[idx]) )
175 |     
176 |     _,_, G,h = get_qpt_matrices(m, **params)
177 |     ineq = G @ p
178 |     print('G: ', G.shape)
179 |     print('h: ', h.shape)
180 |     print('G @ x: ', ineq.shape)
181 |     csat = (ineq - h) <=0
182 |     print('constraints satisfied ?', csat.all())
183 |     print(ineq -h)
184 |     a,b = np.unique(G, axis=1, return_counts=True)
185 |     print('uniques?', a.shape)
186 |     print('any repetition?', (b > 2).any())


--------------------------------------------------------------------------------
/Matching/Trainer/data_utils.py:
--------------------------------------------------------------------------------
 1 | from Trainer.bipartite import get_cora
 2 | from torch.utils.data import DataLoader
 3 | import numpy as np
 4 | import pytorch_lightning as pl
 5 | import torch 
 6 | from torch.utils.data import DataLoader 
 7 | import tqdm
 8 | class CoraDatawrapper():
 9 |     def __init__(self, x,y, M,solver, params={'p':0.25, 'q':0.25},  relaxation=False, sols=None, verbose=False):
10 |         self.x = x
11 |         self.y = y
12 |         self.m = M
13 |         if sols is not None:
14 |             self.sols = sols
15 |         else:
16 |             y_iter = range(len(self.y))
17 |             it = tqdm(y_iter) if verbose else y_iter
18 |             self.sols = np.array([solver.solve(self.y[i], self.m[i], relaxation=relaxation, **params) for i in it])
19 |             self.sols = torch.from_numpy(self.sols).float()
20 |         
21 |         self.x = torch.from_numpy(self.x).float()
22 |         self.y = torch.from_numpy(self.y).float()
23 |         self.m = torch.from_numpy(self.m).float()
24 |     def __len__(self):
25 |         return len(self.y)
26 |     
27 |     def __getitem__(self, index):
28 |         return self.x[index], self.y[index], self.sols[index], self.m[index]
29 | 
30 | 
31 | 
32 | def return_trainlabel(solver,params):
33 |     x, y,m = get_cora()
34 | 
35 |     y_train, y_test = y[:22], y[22:]
36 |     m_train, m_test = m[:22], m[22:]
37 |     y_iter = range(len(y_train))
38 |     sols = np.array([solver.solve(y[i], m[i], **params) for i in y_iter])
39 |     sols = np.unique(sols,axis=0)  
40 |     return  torch.from_numpy (sols)
41 | 
42 | ###################################### Dataloader #########################################
43 | 
44 | class CoraMatchingDataModule(pl.LightningDataModule):
45 |     def __init__(self,solver,params, generator=None,  normalize=False, batch_size: int = 32, num_workers: int=8):
46 |         super().__init__()
47 |         x, y,m = get_cora()
48 | 
49 |         x_train, x_test = x[:22], x[22:]
50 |         y_train, y_test = y[:22], y[22:]
51 |         m_train, m_test = m[:22], m[22:]
52 | 
53 | 
54 |         self.train_df = CoraDatawrapper( x_train,y_train,m_train,solver,params=params)
55 |         self.valid_df = CoraDatawrapper( x_test,y_test,m_test, solver,params=params)
56 |         self.test_df = CoraDatawrapper( x_test,y_test,m_test,solver, params=params)
57 |         ### As we don't have much data, valid and test dataset are same
58 |         self.batch_size = batch_size
59 |         self.generator =  generator
60 |         self.num_workers = num_workers
61 | 
62 | 
63 |     def train_dataloader(self):
64 |         return DataLoader(self.train_df, batch_size=self.batch_size,generator= self.generator, num_workers=self.num_workers)
65 | 
66 |     def val_dataloader(self):
67 |         return DataLoader(self.valid_df, batch_size=self.batch_size,generator= self.generator, num_workers=self.num_workers)
68 | 
69 |     def test_dataloader(self):
70 |         return DataLoader(self.test_df, batch_size=5, num_workers=self.num_workers)


--------------------------------------------------------------------------------
/Matching/Trainer/diff_layer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | from Trainer.utils import batch_solve
 5 | 
 6 | 
 7 | 
 8 | def SPOlayer(solver,minimize=False):
 9 |     mm = 1 if minimize else -1
10 |     class SPOlayer_cls(torch.autograd.Function):
11 |         @staticmethod
12 |         def forward(ctx, y_hat,y_true,sol_true ,m ):
13 |             sol_hat = batch_solve(solver, y_hat, m)
14 | 
15 |             ctx.save_for_backward(y_hat,y_true,sol_true ,m )
16 | 
17 |             return ( mm*(sol_hat -sol_true)*y_true).sum()
18 | 
19 |         @staticmethod
20 |         def backward(ctx, grad_output):
21 |             y_hat,y_true,sol_true ,m = ctx.saved_tensors
22 |             y_spo = 2*y_hat - y_true
23 |             sol_spo = batch_solve(solver,y_spo ,m)
24 |             return (sol_true - sol_spo)*mm, None, None, None
25 |     return SPOlayer_cls.apply
26 | 
27 | 
28 | def DBBlayer(solver,lambda_val=1., minimize=False):
29 |     mm = 1 if minimize else -1
30 |     class DBBlayer_cls(torch.autograd.Function):
31 |         @staticmethod
32 |         def forward(ctx, y_hat,y_true,sol_true ,m ):
33 |             sol_hat = batch_solve(solver, y_hat ,m)
34 | 
35 |             ctx.save_for_backward(y_hat,y_true,sol_true, sol_hat ,m)
36 | 
37 |             return sol_hat
38 | 
39 |         @staticmethod
40 |         def backward(ctx, grad_output):
41 |             """
42 |             In the backward pass we compute gradient to minimize regret
43 |             """
44 |             y_hat,y_true,sol_true, sol_hat ,m = ctx.saved_tensors
45 |             y_perturbed = y_hat + mm* lambda_val* grad_output
46 |             sol_perturbed = batch_solve(solver, y_perturbed ,m)
47 |             
48 |             return -mm*(sol_hat - sol_perturbed)/lambda_val, None, None, None
49 |     return DBBlayer_cls.apply


--------------------------------------------------------------------------------
/Matching/Trainer/utils.py:
--------------------------------------------------------------------------------
 1 | # from Trainer.bipartite import  bmatching_diverse, get_qpt_matrices
 2 | import torch
 3 | import numpy as np
 4 | 
 5 | # solver = bmatching_diverse
 6 | # objective_fun=lambda x,v,**params: x @ v
 7 | 
 8 | def batch_solve(solver,y,m,relaxation =False,batched= True):
 9 | 
10 |     if batched:
11 |         ### y, m both are of dim (*,2500)
12 |         sol = []
13 | 
14 |         for i in range(len(y)):
15 |             sol.append(  solver.solve(y[i].detach().numpy(), m[i].numpy(), relaxation=relaxation) )
16 |         return torch.tensor(sol).float()
17 |     else:
18 |         ### y, m both are of dim (2500)
19 |         sol = solver.solve(y.detach().numpy(), m.numpy(), relaxation=relaxation)
20 |         return torch.tensor(sol).float()
21 | 
22 | 
23 | def regret_list(solver,y_hat,y_true,sol_true,m,minimize=False):
24 |     mm = 1 if minimize else -1    
25 |     sol_hat = batch_solve(solver, y_hat,m)
26 |     return ((mm*(sol_hat - sol_true)*y_true).sum(1)/ (sol_true*y_true).sum(1) )
27 | 
28 | def abs_regret_list(solver,y_hat,y_true,sol_true,m,minimize=False):
29 |     mm = 1 if minimize else -1    
30 |     sol_hat = batch_solve(solver, y_hat,m)
31 |     return ((mm*(sol_hat - sol_true)*y_true).sum(1) )
32 | 
33 | def regret_fn(solver,y_hat,y_true,sol_true,m,minimize=False):
34 |     # mm = 1 if minimize else -1    
35 |     # sol_hat = batch_solve(y_hat,m)
36 |     # sol_ = batch_solve(y,m)
37 |     # # return ((mm*(sol_hat - sol_)*y).sum(1)/ (sol_*y).sum(1) ).mean()
38 |     return  regret_list(solver,y_hat,y_true,sol_true,m,minimize=minimize).mean()
39 | 
40 | def abs_regret_fn(solver,y_hat,y_true,sol_true,m,minimize=False):
41 |     # mm = 1 if minimize else -1    
42 |     # sol_hat = batch_solve(y_hat,m)
43 |     # sol_ = batch_solve(y,m)
44 |     # # return ((mm*(sol_hat - sol_)*y).sum(1)/ (sol_*y).sum(1) ).mean()
45 |     return  abs_regret_list(solver,y_hat,y_true,sol_true,m,minimize=minimize).mean()
46 | 
47 | 
48 | def growpool_fn(solver,cache, y_hat, m):
49 |     '''
50 |     cache is torch array [currentpoolsize,48]
51 |     y_hat is  torch array [batch_size,48]
52 |     '''
53 |     sol = batch_solve(solver,y_hat,m).detach().numpy()
54 |     cache_np = cache.detach().numpy()
55 |     cache_np = np.unique(np.append(cache_np,sol,axis=0),axis=0)
56 |     # torch has no unique function, so we have to do this
57 |     return torch.from_numpy(cache_np).float()


--------------------------------------------------------------------------------
/Matching/config.json:
--------------------------------------------------------------------------------
  1 | [
  2 |     {
  3 |         "model": "DBB",
  4 |         "instance": 1,
  5 |         "lr": 0.01,
  6 |         "lambda_val": 10.0
  7 |     },
  8 |     {
  9 |         "model": "DBB",
 10 |         "instance": 2,
 11 |         "lr": 0.01,
 12 |         "lambda_val": 0.1
 13 |     },
 14 |     {
 15 |         "model": "DBB",
 16 |         "instance": 3,
 17 |         "lr": 0.01,
 18 |         "lambda_val": 0.1
 19 |     },
 20 |     {
 21 |         "model": "FenchelYoung",
 22 |         "instance": 1,
 23 |         "lr": 0.001,
 24 |         "sigma": 0.5
 25 |     },
 26 |     {
 27 |         "model": "FenchelYoung",
 28 |         "instance": 2,
 29 |         "lr": 0.001,
 30 |         "sigma": 0.01
 31 |     },
 32 |     {
 33 |         "model": "FenchelYoung",
 34 |         "instance": 3,
 35 |         "lr": 0.001,
 36 |         "sigma": 5.0
 37 |     },
 38 |     {
 39 |         "model": "IMLE",
 40 |         "instance": 1,
 41 |         "lr": 0.001,
 42 |         "beta": 100.0,
 43 |         "temperature": 0.5,
 44 |         "k": 5
 45 |     },
 46 |     {
 47 |         "model": "IMLE",
 48 |         "instance": 2,
 49 |         "lr": 0.001,
 50 |         "beta": 100.0,
 51 |         "temperature": 0.5,
 52 |         "k": 5
 53 |     },
 54 |     {
 55 |         "model": "IMLE",
 56 |         "instance": 3,
 57 |         "lr": 0.001,
 58 |         "beta": 100.0,
 59 |         "temperature": 0.5,
 60 |         "k": 5
 61 |     },
 62 |     {
 63 |         "model": "DCOL",
 64 |         "instance": 1,
 65 |         "lr": 0.01,
 66 |         "mu": 100.0
 67 |     },
 68 |     {
 69 |         "model": "DCOL",
 70 |         "instance": 2,
 71 |         "lr": 0.001,
 72 |         "mu": 10.0
 73 |     },
 74 |     {
 75 |         "model": "DCOL",
 76 |         "instance": 3,
 77 |         "lr": 0.001,
 78 |         "mu": 10.0
 79 |     },
 80 |     {
 81 |         "model": "IntOpt",
 82 |         "instance": 1,
 83 |         "lr": 0.001,
 84 |         "thr": 1.0,
 85 |         "damping": 0.1
 86 |     },
 87 |     {
 88 |         "model": "IntOpt",
 89 |         "instance": 2,
 90 |         "lr": 0.05,
 91 |         "thr": 0.1,
 92 |         "damping": 10.0
 93 |     },
 94 |     {
 95 |         "model": "IntOpt",
 96 |         "instance": 3,
 97 |         "lr": 0.001,
 98 |         "thr": 0.1,
 99 |         "damping": 0.1
100 |     },
101 |     {
102 |         "model": "baseline_mse",
103 |         "instance": 1,
104 |         "lr": 0.01
105 |     },
106 |     {
107 |         "model": "baseline_mse",
108 |         "instance": 2,
109 |         "lr": 0.01
110 |     },
111 |     {
112 |         "model": "baseline_mse",
113 |         "instance": 3,
114 |         "lr": 0.0005
115 |     },
116 |     {
117 |         "model": "CachingPO",
118 |         "loss": "MAP_c",
119 |         "instance": 1,
120 |         "lr": 0.001
121 |     },
122 |     {
123 |         "model": "CachingPO",
124 |         "loss": "MAP_c",
125 |         "instance": 2,
126 |         "lr": 0.01
127 |     },
128 |     {
129 |         "model": "CachingPO",
130 |         "loss": "MAP_c",
131 |         "instance": 3,
132 |         "lr": 0.005
133 |     },
134 |     {
135 |         "model": "CachingPO",
136 |         "loss": "pairwise_diff",
137 |         "instance": 1,
138 |         "lr": 0.001
139 |     },
140 |     {
141 |         "model": "CachingPO",
142 |         "loss": "pairwise_diff",
143 |         "instance": 2,
144 |         "lr": 0.01
145 |     },
146 |     {
147 |         "model": "CachingPO",
148 |         "loss": "pairwise_diff",
149 |         "instance": 3,
150 |         "lr": 0.005
151 |     },
152 |     {
153 |         "model": "CachingPO",
154 |         "loss": "pairwise",
155 |         "instance": 1,
156 |         "lr": 0.005,
157 |         "tau": 5
158 |     },
159 |     {
160 |         "model": "CachingPO",
161 |         "loss": "pairwise",
162 |         "instance": 2,
163 |         "lr": 0.01,
164 |         "tau": 50
165 |     },
166 |     {
167 |         "model": "CachingPO",
168 |         "loss": "pairwise",
169 |         "instance": 3,
170 |         "lr": 0.01,
171 |         "tau": 50
172 |     },
173 |     {
174 |         "model": "CachingPO",
175 |         "loss": "listwise",
176 |         "instance": 1,
177 |         "lr": 0.001,
178 |         "tau": 5.0
179 |     },
180 |     {
181 |         "model": "CachingPO",
182 |         "loss": "listwise",
183 |         "instance": 2,
184 |         "lr": 0.01,
185 |         "tau": 5.0
186 |     },
187 |     {
188 |         "model": "CachingPO",
189 |         "loss": "listwise",
190 |         "instance": 3,
191 |         "lr": 0.01,
192 |         "tau": 50.0
193 |     },
194 |     {
195 |         "model": "SPO",
196 |         "instance": 1,
197 |         "lr": 0.001
198 |     },
199 |     {
200 |         "model": "SPO",
201 |         "instance": 2,
202 |         "lr": 0.001
203 |     },
204 |     {
205 |         "model": "SPO",
206 |         "instance": 3,
207 |         "lr": 0.005
208 |     }
209 | ]


--------------------------------------------------------------------------------
/Matching/get_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | echo 'downloading preprocessed CORA dataset...'
 3 | gdown 1MNy9HCVkJykRbXf6XXI9D7lggF0UF8MP
 4 | tar -xvzf data.tar.gz
 5 | echo 'cleaning...'
 6 | rm data.tar.gz
 7 | cd data/
 8 | python make_cora_dataset.py
 9 | echo 'done'
10 | 


--------------------------------------------------------------------------------
/Matching/imle/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PredOpt/predopt-benchmarks/1a8e048c5aa640f73a05e29878b8e1e8f6f73610/Matching/imle/__init__.py


--------------------------------------------------------------------------------
/Matching/imle/noise.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import math
 4 | 
 5 | import torch
 6 | from torch import Tensor, Size
 7 | from torch.distributions.gamma import Gamma
 8 | 
 9 | from abc import ABC, abstractmethod
10 | 
11 | from typing import Optional
12 | 
13 | import logging
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | class BaseNoiseDistribution(ABC):
19 |     def __init__(self):
20 |         super().__init__()
21 | 
22 |     @abstractmethod
23 |     def sample(self,
24 |                shape: Size) -> Tensor:
25 |         raise NotImplementedError
26 | 
27 | 
28 | class SumOfGammaNoiseDistribution(BaseNoiseDistribution):
29 |     r"""
30 |     Creates a generator of samples for the Sum-of-Gamma distribution [1], parameterized
31 |     by :attr:`k`, :attr:`nb_iterations`, and :attr:`device`.
32 | 
33 |     [1] Mathias Niepert, Pasquale Minervini, Luca Franceschi - Implicit MLE: Backpropagating Through Discrete
34 |     Exponential Family Distributions. NeurIPS 2021 (https://arxiv.org/abs/2106.01798)
35 | 
36 |     Example::
37 | 
38 |         >>> import torch
39 |         >>> noise_distribution = SumOfGammaNoiseDistribution(k=5, nb_iterations=100)
40 |         >>> noise_distribution.sample(torch.Size([5]))
41 |         tensor([ 0.2504,  0.0112,  0.5466,  0.0051, -0.1497])
42 | 
43 |     Args:
44 |         k (float): k parameter -- see [1] for more details.
45 |         nb_iterations (int): number of iterations for estimating the sample.
46 |         device (torch.devicde): device where to store samples.
47 |     """
48 |     def __init__(self,
49 |                  k: float,
50 |                  nb_iterations: int = 10,
51 |                  device: Optional[torch.device] = None):
52 |         super().__init__()
53 |         self.k = k
54 |         self.nb_iterations = nb_iterations
55 |         self.device = device
56 | 
57 |     def sample(self,
58 |                shape: Size) -> Tensor:
59 |         samples = torch.zeros(size=shape, device=self.device)
60 |         for i in range(1, self.nb_iterations + 1):
61 |             concentration = torch.tensor(1. / self.k, device=self.device)
62 |             rate = torch.tensor(i / self.k, device=self.device)
63 | 
64 |             gamma = Gamma(concentration=concentration, rate=rate)
65 |             samples = samples + gamma.sample(sample_shape=shape).to(self.device)
66 |         samples = (samples - math.log(self.nb_iterations)) / self.k
67 |         return samples.to(self.device)
68 | 


--------------------------------------------------------------------------------
/Matching/imle/target.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from torch import Tensor
 4 | from abc import ABC, abstractmethod
 5 | 
 6 | import logging
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class BaseTargetDistribution(ABC):
12 |     def __init__(self):
13 |         super().__init__()
14 | 
15 |     @abstractmethod
16 |     def params(self,
17 |                theta: Tensor,
18 |                dy: Tensor) -> Tensor:
19 |         raise NotImplementedError
20 | 
21 | 
22 | class TargetDistribution(BaseTargetDistribution):
23 |     r"""
24 |     Creates a generator of target distributions parameterized by :attr:`alpha` and :attr:`beta`.
25 | 
26 |     Example::
27 | 
28 |         >>> import torch
29 |         >>> target_distribution = TargetDistribution(alpha=1.0, beta=1.0)
30 |         >>> target_distribution.params(theta=torch.tensor([1.0]), dy=torch.tensor([1.0]))
31 |         tensor([2.])
32 | 
33 |     Args:
34 |         alpha (float): weight of the initial distribution parameters theta
35 |         beta (float): weight of the downstream gradient dy
36 |     """
37 |     def __init__(self,
38 |                  alpha: float = 1.0,
39 |                  beta: float = 1.0):
40 |         super().__init__()
41 |         self.alpha = alpha
42 |         self.beta = beta
43 | 
44 |     def params(self,
45 |                theta: Tensor,
46 |                dy: Tensor) -> Tensor:
47 |         theta_prime = self.alpha * theta - self.beta * dy
48 |         return theta_prime
49 | 


--------------------------------------------------------------------------------
/Matching/imle/wrapper.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import functools
  4 | 
  5 | import torch
  6 | from torch import Tensor
  7 | 
  8 | from imle.noise import BaseNoiseDistribution
  9 | from imle.target import BaseTargetDistribution, TargetDistribution
 10 | 
 11 | from typing import Callable, Optional
 12 | 
 13 | import logging
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | 
 18 | def imle(function: Callable[[Tensor], Tensor] = None,
 19 |          target_distribution: Optional[BaseTargetDistribution] = None,
 20 |          noise_distribution: Optional[BaseNoiseDistribution] = None,
 21 |          nb_samples: int = 1,
 22 |          input_noise_temperature: float = 1.0,
 23 |          target_noise_temperature: float = 1.0):
 24 |     r"""Turns a black-box combinatorial solver in an Exponential Family distribution via Perturb-and-MAP and I-MLE [1].
 25 | 
 26 |     The input function (solver) needs to return the solution to the problem of finding a MAP state for a constrained
 27 |     exponential family distribution -- this is the case for most black-box combinatorial solvers [2]. If this condition
 28 |     is violated though, the result would not hold and there is no guarantee on the validity of the obtained gradients.
 29 | 
 30 |     This function can be used directly or as a decorator.
 31 | 
 32 |     [1] Mathias Niepert, Pasquale Minervini, Luca Franceschi - Implicit MLE: Backpropagating Through Discrete
 33 |     Exponential Family Distributions. NeurIPS 2021 (https://arxiv.org/abs/2106.01798)
 34 |     [2] Marin Vlastelica, Anselm Paulus, Vít Musil, Georg Martius, Michal Rolínek - Differentiation of Blackbox
 35 |     Combinatorial Solvers. ICLR 2020 (https://arxiv.org/abs/1912.02175)
 36 | 
 37 |     Example::
 38 | 
 39 |         >>> from imle.wrapper import imle
 40 |         >>> from imle.target import TargetDistribution
 41 |         >>> from imle.noise import SumOfGammaNoiseDistribution
 42 |         >>> target_distribution = TargetDistribution(alpha=0.0, beta=10.0)
 43 |         >>> noise_distribution = SumOfGammaNoiseDistribution(k=21, nb_iterations=100)
 44 |         >>> @imle(target_distribution=target_distribution, noise_distribution=noise_distribution, nb_samples=100,
 45 |         >>>       input_noise_temperature=input_noise_temperature, target_noise_temperature=5.0)
 46 |         >>> def imle_solver(weights_batch: Tensor) -> Tensor:
 47 |         >>>     return torch_solver(weights_batch)
 48 | 
 49 |     Args:
 50 |         function (Callable[[Tensor], Tensor]): black-box combinatorial solver
 51 |         target_distribution (Optional[BaseTargetDistribution]): factory for target distributions
 52 |         noise_distribution (Optional[BaseNoiseDistribution]): noise distribution
 53 |         nb_samples (int): number of noise sammples
 54 |         input_noise_temperature (float): noise temperature for the input distribution
 55 |         target_noise_temperature (float): noise temperature for the target distribution
 56 |     """
 57 |     if target_distribution is None:
 58 |         target_distribution = TargetDistribution(alpha=1.0, beta=1.0)
 59 | 
 60 |     if function is None:
 61 |         return functools.partial(imle,
 62 |                                  target_distribution=target_distribution,
 63 |                                  noise_distribution=noise_distribution,
 64 |                                  nb_samples=nb_samples,
 65 |                                  input_noise_temperature=input_noise_temperature,
 66 |                                  target_noise_temperature=target_noise_temperature)
 67 | 
 68 |     @functools.wraps(function)
 69 |     def wrapper(input: Tensor, *args):
 70 |         class WrappedFunc(torch.autograd.Function):
 71 | 
 72 |             @staticmethod
 73 |             def forward(ctx, input: Tensor, *args):
 74 |                 # [BATCH_SIZE, ...]
 75 |                 input_shape = input.shape
 76 | 
 77 |                 batch_size = input_shape[0]
 78 |                 instance_shape = input_shape[1:]
 79 | 
 80 |                 # [BATCH_SIZE, N_SAMPLES, ...]
 81 |                 perturbed_input_shape = [batch_size, nb_samples] + list(instance_shape)
 82 | 
 83 |                 if noise_distribution is None:
 84 |                     noise = torch.zeros(size=perturbed_input_shape)
 85 |                 else:
 86 |                     noise = noise_distribution.sample(shape=torch.Size(perturbed_input_shape))
 87 | 
 88 |                 input_noise = noise * input_noise_temperature
 89 | 
 90 |                 # [BATCH_SIZE, N_SAMPLES, ...]
 91 |                 perturbed_input_3d = input.view(batch_size, 1, -1).repeat(1, nb_samples, 1).view(perturbed_input_shape)
 92 |                 perturbed_input_3d = perturbed_input_3d + input_noise
 93 | 
 94 |                 # [BATCH_SIZE * N_SAMPLES, ...]
 95 |                 perturbed_input_2d = perturbed_input_3d.view([-1] + perturbed_input_shape[2:])
 96 |                 perturbed_input_2d_shape = perturbed_input_2d.shape
 97 | 
 98 |                 # [BATCH_SIZE * N_SAMPLES, ...]
 99 |                 perturbed_output = function(perturbed_input_2d)
100 |                 # [BATCH_SIZE, N_SAMPLES, ...]
101 |                 perturbed_output = perturbed_output.view(perturbed_input_shape)
102 | 
103 |                 ctx.save_for_backward(input, noise, perturbed_output)
104 | 
105 |                 # [BATCH_SIZE * N_SAMPLES, ...]
106 |                 # res = perturbed_output.view(perturbed_input_2d_shape)
107 |                 ####  New line added
108 |                 res = perturbed_output.mean(dim=1)
109 |                 return res
110 | 
111 |             @staticmethod
112 |             def backward(ctx, dy):
113 |                 # input: [BATCH_SIZE, ...]
114 |                 # noise: [BATCH_SIZE, N_SAMPLES, ...]
115 |                 # perturbed_output_3d: # [BATCH_SIZE, N_SAMPLES, ...]
116 |                 input, noise, perturbed_output_3d = ctx.saved_variables
117 | 
118 |                 input_shape = input.shape
119 |                 batch_size = input_shape[0]
120 |                 instance_shape = input_shape[1:]
121 | 
122 | 
123 |                 ####  New line added
124 |                 dy = dy.view(batch_size, 1, -1).repeat(1,nb_samples, 1).view([batch_size*nb_samples] +list(instance_shape))
125 | 
126 | 
127 |                 # dy is [BATCH_SIZE * N_SAMPLES, ...]
128 |                 dy_shape = dy.shape
129 |                 # noise is [BATCH_SIZE, N_SAMPLES, ...]
130 |                 noise_shape = noise.shape
131 | 
132 |                 # [BATCH_SIZE * NB_SAMPLES, ...]
133 |                 input_2d = input.view(batch_size, 1, -1).repeat(1, nb_samples, 1).view(dy_shape)
134 |                 target_input_2d = target_distribution.params(input_2d, dy)
135 | 
136 |                 # [BATCH_SIZE, NB_SAMPLES, ...]
137 |                 target_input_3d = target_input_2d.view(noise_shape)
138 | 
139 |                 # [BATCH_SIZE, NB_SAMPLES, ...]
140 |                 target_noise = noise * target_noise_temperature
141 | 
142 |                 # [BATCH_SIZE, N_SAMPLES, ...]
143 |                 perturbed_target_input_3d = target_input_3d + target_noise
144 | 
145 |                 # [BATCH_SIZE * N_SAMPLES, ...]
146 |                 perturbed_target_input_2d = perturbed_target_input_3d.view(dy_shape)
147 | 
148 |                 # [BATCH_SIZE * N_SAMPLES, ...]
149 |                 target_output_2d = function(perturbed_target_input_2d)
150 | 
151 |                 # [BATCH_SIZE, N_SAMPLES, ...]
152 |                 target_output_3d = target_output_2d.view(noise_shape)
153 | 
154 |                 # [BATCH_SIZE, ...]
155 |                 gradient = (perturbed_output_3d - target_output_3d)
156 |                 gradient = gradient.mean(axis=1)
157 |                 return gradient
158 | 
159 |         return WrappedFunc.apply(input, *args)
160 |     return wrapper
161 | 


--------------------------------------------------------------------------------
/Matching/readme.md:
--------------------------------------------------------------------------------
 1 | This directory corresponds to the diverse bipartite matching problem.
 2 | 
 3 | To download the data run
 4 | ```
 5 | ./get_data.sh
 6 | ```
 7 | This will create a folder `data/` and save the data files inside that directory.
 8 | Alternatively, you can download the bipartite matching datset from the repository: https://doi.org/10.48804/KT2P3Z and extract the `tar.gz` file.
 9 | 
10 | 
11 | To run experiments use `test_matching.py`.
12 | To reproduce the result of expriements run
13 | ```
14 | python test_matching.py --scheduler True
15 | ```
16 | 
17 | 


--------------------------------------------------------------------------------
/Portfolio/DPO/fenchel_young.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # 
 3 | # Modifications from original work
 4 | # 29-03-2021 (tuero@ualberta.ca) : Convert Tensorflow code to PyTorch
 5 | # 
 6 | # Copyright 2021 The Google Research Authors.
 7 | #
 8 | # Licensed under the Apache License, Version 2.0 (the "License");
 9 | # you may not use this file except in compliance with the License.
10 | # You may obtain a copy of the License at
11 | #
12 | #     http://www.apache.org/licenses/LICENSE-2.0
13 | #
14 | # Unless required by applicable law or agreed to in writing, software
15 | # distributed under the License is distributed on an "AS IS" BASIS,
16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | # See the License for the specific language governing permissions and
18 | # limitations under the License.
19 | 
20 | # Lint as: python3
21 | """Implementation of a Fenchel-Young loss using perturbation techniques."""
22 | 
23 | import torch
24 | import torch.nn as nn
25 | 
26 | from DPO import perturbations
27 | 
28 | 
29 | class PerturbedFunc(torch.autograd.Function):
30 |     """Implementation of a Fenchel Young loss."""
31 |     @staticmethod
32 |     def forward(ctx, input_tensor, y_true, perturbed, batched, maximize, *args):
33 |         diff = perturbed(input_tensor, *args) - y_true.type(input_tensor.dtype)
34 |         if not maximize:
35 |             diff = -diff
36 |         # Computes per-example loss for batched inputs.
37 |         if batched:
38 |             loss = torch.sum(torch.reshape(diff, [list(diff.shape)[0], -1]) ** 2, dim=-1)
39 |         else:  # Computes loss for unbatched inputs.
40 |             loss = torch.sum(diff ** 2)
41 |         ctx.save_for_backward(diff)
42 |         ctx.batched = batched
43 |         return loss
44 | 
45 |     @staticmethod
46 |     def backward(ctx, dy):
47 |         diff,  = ctx.saved_tensors
48 |         batched = ctx.batched
49 |         if batched:  # dy has shape (batch_size,) in this case.
50 |             dy = torch.reshape(dy, [list(dy.shape)[0]] + (diff.dim() - 1) * [1])
51 |         return dy * diff, None, None, None, None # original
52 |         # return  diff, None, None, None, None
53 | 
54 | 
55 | class FenchelYoungLoss(nn.Module):
56 |     def __init__(self,
57 |                  func = None,
58 |                  num_samples = 1000,
59 |                  sigma = 0.01,
60 |                  noise = perturbations._GUMBEL,
61 |                  batched = True,
62 |                  maximize = True,
63 |                  device=None):
64 |         """Initializes the Fenchel-Young loss.
65 | 
66 |         Args:
67 |             func: the function whose argmax is to be differentiated by perturbation.
68 |             num_samples: (int) the number of perturbed inputs.
69 |             sigma: (float) the amount of noise to be considered
70 |             noise: (str) the noise distribution to be used to sample perturbations.
71 |             batched: whether inputs to the func will have a leading batch dimension
72 |             (True) or consist of a single example (False). Defaults to True.
73 |             maximize: (bool) whether to maximize or to minimize the input function.
74 |             device: The device to create tensors on (cpu/gpu). If None given, it will
75 |             default to gpu:0 if available, cpu otherwise.
76 |         """
77 |         super().__init__()
78 |         self._batched = batched
79 |         self._maximize = maximize
80 |         self.func = func
81 |         self.perturbed = perturbations.perturbed(func=func,
82 |                                                 num_samples=num_samples,
83 |                                                 sigma=sigma,
84 |                                                 noise=noise,
85 |                                                 batched=batched,
86 |                                                 device=device)
87 | 
88 |     def forward(self, input_tensor, y_true, *args):
89 |         return PerturbedFunc.apply(input_tensor, y_true, self.perturbed, self._batched, self._maximize, *args)
90 | 
91 | 


--------------------------------------------------------------------------------
/Portfolio/Trainer/CacheLosses.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import torch.nn.functional as F
  4 | ###################################### NCE Loss  Functions  #########################################
  5 | class NCE(torch.nn.Module):
  6 |     def __init__(self, minimize = False):
  7 |         super().__init__()
  8 |         self.mm  = 1 if minimize else -1
  9 |     def forward(self, y_hat,y_true, sol_true,cache):
 10 | 
 11 |         loss = 0
 12 |         mm = self.mm
 13 |         ## print("shape to be preinted: ")
 14 |         # print(sol_true.shape, cache.shape, y_hat.shape)
 15 |         ### torch.Size([B, 2500]) torch.Size([|S|, 2500]) torch.Size([B, 2500])
 16 | 
 17 |         for ii in range(len( y_hat )):
 18 |             loss += ( ( mm* ( sol_true[ii] - cache )*y_hat[ii]  ).sum(dim=(1)) ).mean() 
 19 |         loss /= len(y_hat)
 20 |         return loss
 21 | 
 22 | class NCE_c(torch.nn.Module):
 23 |     def __init__(self, minimize = False):
 24 |         super().__init__()
 25 |         self.mm  = 1 if minimize else -1
 26 |     def forward(self, y_hat,y_true, sol_true,cache):
 27 | 
 28 |         loss = 0
 29 |         mm = self.mm
 30 |         for ii in range(len( y_hat )):
 31 |             loss += ( ( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii])  ).sum(dim=(1)) ).mean() 
 32 |         loss /= len(y_hat)
 33 |         return loss
 34 | 
 35 | 
 36 | class MAP(torch.nn.Module):
 37 |     def __init__(self, minimize = False):
 38 |         super().__init__()
 39 |         self.mm  = 1 if minimize else -1
 40 |     def forward(self, y_hat,y_true,sol_true,cache):
 41 | 
 42 |         loss = 0
 43 |         mm = self.mm
 44 | 
 45 |         for ii in range(len( y_hat )):
 46 |             loss += (( mm* ( sol_true[ii] - cache )*y_hat[ii]  ).sum(dim=(1)) ).max() 
 47 |         loss /= len(y_hat)
 48 |         return loss
 49 | 
 50 | 
 51 | class MAP_c(torch.nn.Module):
 52 |     def __init__(self, minimize = False):
 53 |         super().__init__()
 54 |         self.mm  = 1 if minimize else -1
 55 |     def forward(self, y_hat,y_true,sol_true,cache):
 56 | 
 57 |         loss = 0
 58 |         mm = self.mm
 59 | 
 60 |         for ii in range(len( y_hat )):
 61 |             loss += (( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii])   ).sum(dim=(1)) ).max() 
 62 |         loss /= len(y_hat)
 63 |         return loss
 64 | 
 65 | 
 66 | class MAP_c_actual(torch.nn.Module):
 67 |     def __init__(self, minimize = False):
 68 |         super().__init__()
 69 |         self.mm  = 1 if minimize else -1
 70 |     def forward(self, y_hat,y_true,sol_true,cache):
 71 | 
 72 |         loss = 0
 73 |         mm = self.mm
 74 | 
 75 |         for ii in range(len( y_hat )):
 76 | 
 77 |             loss += (( mm* ( sol_true[ii] - cache )* (y_hat[ii] - y_true[ii])   ).sum(dim=(1)) ).max() 
 78 |         loss /= len(y_hat)
 79 |         return loss
 80 | 
 81 | ###################################### Ranking Loss  Functions  #########################################
 82 | class PointwiseLoss(torch.nn.Module):
 83 |     def __init__(self):
 84 |         super().__init__()
 85 |     def forward(self, y_hat,y_true,sol_true,cache):
 86 |         '''
 87 |         pred_weights: predicted cost vector [batch_size, img,img]
 88 |         true_weights: actua cost vector [batch_size, img,img]
 89 |         target: true shortest path [batch_size, img,img]
 90 |         cache: cache is torch array [cache_size, img,img]
 91 |         '''
 92 |         loss = 0
 93 | 
 94 |         for ii in range(len( y_hat )):
 95 |             loss += ((cache*y_hat[ii])-(cache*y_true[ii])).square().mean() 
 96 |         loss /= len(y_hat)
 97 | 
 98 |         return loss
 99 | class ListwiseLoss(torch.nn.Module):
100 |     def __init__(self, temperature=0., minimize = False):
101 |         super().__init__()
102 |         self.temperature = temperature
103 |         self.mm  = 1 if minimize else -1
104 |     def forward(self, y_hat,y_true,sol_true,cache):
105 | 
106 |         loss = 0
107 |         mm, temperature  = self.mm, self.temperature
108 | 
109 |         for ii in range(len( y_hat )):
110 |             loss += - ( F.log_softmax((-mm*y_hat[ii]*cache/temperature).sum(dim=(1)),dim=0) * F.softmax((-mm*y_true[ii]*cache/temperature).sum(dim=(1)),dim=0)).mean()
111 |         loss /= len(y_hat)
112 | 
113 |         return loss
114 | 
115 | 
116 | class PairwisediffLoss(torch.nn.Module):
117 |     def __init__(self, minimize = False):
118 |         super().__init__()
119 |         self.mm  = 1 if minimize else -1
120 | 
121 |     def forward(self, y_hat,y_true,sol_true,cache):
122 |         '''
123 |         pred_weights: predicted cost vector [batch_size, img,img]
124 |         true_weights: actua cost vector [batch_size, img,img]
125 |         target: true shortest path [batch_size, img,img]
126 |         cache: cache is torch array [cache_size, img,img]
127 |         '''
128 |         
129 |         loss = 0
130 |         for ii in range(len( y_hat )):
131 |             _,indices= np.unique((self.mm*y_true[ii]*cache).sum(dim= (1)).detach().numpy(),return_index=True)
132 | 
133 |             big_ind = [indices[0] for p in range(len(indices)-1)] #good one
134 |             small_ind = [indices[p+1] for p in range(len(indices)-1)] #bad one
135 |         
136 |             
137 |             loss += ( ( cache[big_ind]*y_hat[ii] -cache[small_ind]*y_hat[ii]).sum(dim=(1)) - ( cache[big_ind]*y_true[ii] -cache[small_ind]*y_true[ii]).sum(dim=(1)) ).square().mean()
138 |         loss /= len(y_hat)
139 |         return loss
140 | 
141 | class PairwiseLoss(torch.nn.Module):
142 |     def __init__(self, margin=0., minimize = False):
143 |         super().__init__()
144 |         self.margin = margin
145 |         self.mm  = 1 if minimize else -1
146 |     def forward(self, y_hat,y_true,sol_true,cache):
147 |         '''
148 |         pred_weights: predicted cost vector [batch_size, img,img]
149 |         true_weights: actua cost vector [batch_size, img,img]
150 |         target: true shortest path [batch_size, img,img]
151 |         cache: cache is torch array [cache_size, img,img]
152 |         '''
153 |         relu = torch.nn.ReLU()
154 |         loss = 0
155 |         mm, margin  = self.mm, self.margin
156 |         for ii in range(len( y_hat )):
157 |             _,indices= np.unique((self.mm*y_true[ii]*cache).sum(dim= (1)).detach().numpy(),return_index=True)
158 | 
159 |             big_ind = [indices[0] for p in range(len(indices)-1)] #good one
160 |             small_ind = [indices[p+1] for p in range(len(indices)-1)] #bad one
161 |             
162 |             loss += relu(  margin + mm*( cache[big_ind]*y_hat[ii] -cache[small_ind]*y_hat[ii] ).sum(dim=(1)) ).mean()
163 |         loss /= len(y_hat)
164 |         return loss


--------------------------------------------------------------------------------
/Portfolio/Trainer/data_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | import pytorch_lightning as pl
 4 | import torch 
 5 | from torch.utils.data import DataLoader
 6 | ###################################### Wrapper #########################################
 7 | class datawrapper():
 8 |     def __init__(self, x,y, sol=None, solver= None ):
 9 |         self.x = x
10 |         self.y = y
11 |         if sol is None:
12 |             if solver is None:
13 |                 raise  Exception("Either Give the solutions or provide a solver!") 
14 |             sol = []
15 |             for i in range(len(y)):
16 |                 sol.append(   solver.solve(y[i])   )            
17 |             sol = np.array(sol).astype(np.float32)
18 |             
19 |         self.sol = sol
20 | 
21 |     def __len__(self):
22 |         return len(self.y)
23 |     
24 |     def __getitem__(self, index):
25 |         return self.x[index], self.y[index],self.sol[index]
26 | 
27 | 
28 | ###################################### Dataloader #########################################
29 | 
30 | class ShortestPathDataModule(pl.LightningDataModule):
31 |     def __init__(self, train_df,valid_df,test_df,generator,  normalize=False, batchsize: int = 32, num_workers: int=4):
32 |         super().__init__()
33 |         self.train_df = train_df
34 |         self.valid_df =  valid_df
35 |         self.test_df = test_df
36 |         self.batchsize = batchsize
37 |         self.generator =  generator
38 |         self.num_workers = num_workers
39 | 
40 | 
41 |     def train_dataloader(self):
42 |         return DataLoader(self.train_df, batch_size=self.batchsize,generator= self.generator, num_workers=self.num_workers)
43 | 
44 |     def val_dataloader(self):
45 |         return DataLoader(self.valid_df, batch_size=self.batchsize,generator= self.generator, num_workers=self.num_workers)
46 | 
47 |     def test_dataloader(self):
48 |         return DataLoader(self.test_df, batch_size=1000, num_workers=self.num_workers)
49 | 


--------------------------------------------------------------------------------
/Portfolio/Trainer/diff_layer.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import torch 
 3 | from torch import nn, optim
 4 | from torch.autograd import Variable
 5 | import torch.nn.functional as F
 6 | import pytorch_lightning as pl
 7 | import numpy as np
 8 | from Trainer.utils import batch_solve
 9 | 
10 | def SPOlayer(solver,minimize = False):
11 |     mm = 1 if minimize else -1
12 |     class SPOlayer_cls(torch.autograd.Function):
13 |         @staticmethod
14 |         def forward(ctx, y_hat,y_true,sol_true ):
15 |             sol_hat = batch_solve(solver, y_hat)
16 | 
17 |             ctx.save_for_backward(y_hat,y_true,sol_true)
18 | 
19 |             return ( mm*(sol_hat -sol_true)*y_true).sum()
20 | 
21 |         @staticmethod
22 |         def backward(ctx, grad_output):
23 |             y_hat,y_true,sol_true = ctx.saved_tensors
24 |             y_spo = 2*y_hat - y_true
25 |             sol_spo = batch_solve(solver,y_spo) 
26 |             return (sol_true - sol_spo)*mm, None, None
27 |     return SPOlayer_cls.apply
28 | 
29 | 
30 | def DBBlayer(solver,lambda_val=1., minimize = False):
31 |     mm = 1 if minimize else -1
32 |     class DBBlayer_cls(torch.autograd.Function):
33 |         @staticmethod
34 |         def forward(ctx, y_hat,y_true,sol_true ):
35 |             sol_hat =  batch_solve(solver, y_hat) 
36 | 
37 |             ctx.save_for_backward(y_hat,y_true,sol_true, sol_hat)
38 | 
39 |             return sol_hat
40 | 
41 |         @staticmethod
42 |         def backward(ctx, grad_output):
43 |             """
44 |             In the backward pass we compute gradient to minimize regret
45 |             """
46 |             y_hat,y_true,sol_true, sol_hat= ctx.saved_tensors
47 |             y_perturbed = y_hat + mm* lambda_val* grad_output
48 |             sol_perturbed =  batch_solve(solver, y_perturbed) 
49 |             
50 |             return -mm*(sol_hat - sol_perturbed)/lambda_val, None, None
51 |     return DBBlayer_cls.apply


--------------------------------------------------------------------------------
/Portfolio/Trainer/optimizer_module.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch 
 3 | from torch import nn, optim
 4 | import torch.nn.functional as F
 5 | 
 6 | ###################################### Gurobi  Solver #########################################
 7 | import gurobipy as gp
 8 | from gurobipy import GRB
 9 | class gurobi_portfolio_solver:
10 |     '''
11 |     Gurobi solver takes the price as parameter, return the solution of the maximizimization problem
12 |     '''
13 |     def __init__(self,  cov, gamma, n_stocks = 50):
14 |         self.n_stocks = n_stocks
15 |         model = gp.Model("qp")
16 |         model.setParam('OutputFlag', 0)
17 | 
18 |         x = model.addMVar(shape= n_stocks, lb=0.0, vtype=GRB.CONTINUOUS, name="w")
19 | 
20 |         model.addConstr(sum(x) <= 1, "1")
21 |         ### Original Model invoves inequality, We once tested  with Equality
22 |         # model.addConstr(sum(x) == 1, "1")
23 | 
24 |         model.addConstr(x @ cov @ x <= gamma, "2")
25 |         self.model = model
26 |         self.x = x
27 |     def solve(self, price):
28 |         model = self.model
29 |         x =  self.x
30 | 
31 | 
32 |         model.setObjective(price@x, gp.GRB.MAXIMIZE)
33 |         model.optimize()
34 | 
35 |         if model.status==2:
36 |             sol = x.x
37 |             sol[sol < 1e-4] = 0
38 |             return sol
39 |         else:
40 |             raise Exception("Optimal Solution not found")   
41 |     def solution_fromtorch(self, y_torch):
42 |         if y_torch.dim()==1:
43 |             return torch.from_numpy(self.solve( y_torch.detach().numpy())).float()
44 |         else:
45 |             solutions = []
46 |             for ii in range(len(y_torch)):
47 |                 solutions.append(torch.from_numpy(self.solve( y_torch[ii].detach().numpy())).float())
48 |             return torch.stack(solutions)
49 | 
50 | import cvxpy as cp
51 | import cvxpylayers
52 | from cvxpylayers.torch import CvxpyLayer
53 | 
54 | ### Build cvxpy model prototype
55 | class cvxsolver:
56 |     ''' 
57 |     Implementation of QPTL with cvxpylayers and quadratic regularizer
58 |     '''
59 |     def __init__(self,cov, gamma, n_stocks = 50, mu=1e-6,regularizer='quadratic'):
60 |         '''
61 |         regularizer: form of regularizer- either quadratic or entropic
62 |         '''
63 |         self.cov = cov
64 |         self.gamma =  gamma
65 |         self.n_stocks =  n_stocks
66 |         self.mu = mu
67 |         self.regularizer = regularizer
68 |     
69 | 
70 | 
71 | 
72 |         x = cp.Variable(n_stocks)
73 |         constraints = [x >= 0, cp.quad_form( x, cov ) <= gamma, cp.sum(x) <= 1]
74 |         ### Original Model invoves inequality, We once tested  with Equality
75 |         # constraints = [x >= 0, cp.quad_form( x, cov ) <= gamma, cp.sum(x) == 1]
76 | 
77 |         c = cp.Parameter(n_stocks)
78 | 
79 |         if self.regularizer=='quadratic':
80 |             objective = cp.Minimize(-c @ x+ self.mu*cp.pnorm(x, p=2))  
81 |         elif self.regularizer=='entropic':
82 |             objective = cp.Minimize(-c @ x -  self.mu*cp.sum(cp.entr(x)) )
83 |         problem = cp.Problem(objective, constraints)
84 |         self.layer = CvxpyLayer(problem, parameters=[c], variables=[x])
85 |     def solution(self, y):
86 |               
87 |         sol, = self.layer(y)
88 |         return sol
89 | 
90 | ### We cannot run intopt in this problem, It can't handle quadratic onstriants
91 | 
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/Portfolio/Trainer/utils.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | import numpy as np
 3 | 
 4 | def batch_solve(solver, y,relaxation =False):
 5 |     sol = []
 6 |     for i in range(len(y)):
 7 |         sol.append(   solver.solution_fromtorch(y[i]).reshape(1,-1)   )
 8 |     return torch.cat(sol,0).float()
 9 | 
10 | 
11 | def regret_list(solver, y_hat,y_true, sol_true, minimize = False):  
12 |     '''
13 |     computes regret of more than one cost vectors
14 |     ''' 
15 |     mm = 1 if minimize else -1    
16 |     sol_hat = batch_solve(solver, y_hat )
17 |     return ((mm*(sol_hat - sol_true)*y_true).sum(1)/ (sol_true*y_true).sum(1) )
18 | def abs_regret_list(solver,y_hat,y_true,sol_true,minimize = False):
19 |     mm = 1 if minimize else -1    
20 |     sol_hat = batch_solve(solver, y_hat )
21 |     return ((mm*(sol_hat - sol_true)*y_true).sum(1) )
22 | 
23 | def regret_fn(solver, y_hat,y_true, sol_true, minimize = False):  
24 |     ### Converting infinity to 1, there are lots of innities where all the returns are negative
25 |     return torch.nan_to_num( regret_list(solver, y_hat,y_true, sol_true, minimize= minimize),  nan=0., posinf=1.).mean()
26 | 
27 | def abs_regret_fn(solver, y_hat,y_true, sol_true, minimize = False):  
28 |     return abs_regret_list(solver, y_hat,y_true, sol_true, minimize= minimize).mean()
29 | 
30 | 
31 | def growcache(solver, cache, y_hat):
32 |     '''
33 |     cache is torch array [currentpoolsize,48]
34 |     y_hat is  torch array [batch_size,48]
35 |     '''
36 |     sol = batch_solve(solver, y_hat,relaxation =False).detach().numpy()
37 |     cache_np = cache.detach().numpy()
38 |     cache_np = np.unique(np.append(cache_np,sol,axis=0),axis=0)
39 |     # torch has no unique function, so we need to do this
40 |     return torch.from_numpy(cache_np).float()


--------------------------------------------------------------------------------
/Portfolio/config.json:
--------------------------------------------------------------------------------
  1 | [
  2 |     {
  3 |         "model": "DCOL",
  4 |         "N": 1000,
  5 |         "noise": 1,
  6 |         "deg": 1,
  7 |         "lr": 0.1,
  8 |         "mu": 10.0
  9 |     },
 10 |     {
 11 |         "model": "DCOL",
 12 |         "N": 1000,
 13 |         "noise": 1,
 14 |         "deg": 4,
 15 |         "lr": 0.05,
 16 |         "mu": 10.0
 17 |     },
 18 |     {
 19 |         "model": "DCOL",
 20 |         "N": 1000,
 21 |         "noise": 1,
 22 |         "deg": 16,
 23 |         "lr": 0.05,
 24 |         "mu": 10.0
 25 |     },
 26 |     {
 27 |         "model": "DCOL",
 28 |         "N": 1000,
 29 |         "noise": 1,
 30 |         "deg": 8,
 31 |         "lr": 0.1,
 32 |         "mu": 10.0
 33 |     },
 34 |     {
 35 |         "model": "baseline",
 36 |         "N": 1000,
 37 |         "noise": 1,
 38 |         "deg": 1,
 39 |         "lr": 0.01
 40 |     },
 41 |     {
 42 |         "model": "baseline",
 43 |         "N": 1000,
 44 |         "noise": 1,
 45 |         "deg": 4,
 46 |         "lr": 0.05
 47 |     },
 48 |     {
 49 |         "model": "baseline",
 50 |         "N": 1000,
 51 |         "noise": 1,
 52 |         "deg": 16,
 53 |         "lr": 0.05
 54 |     },
 55 |     {
 56 |         "model": "baseline",
 57 |         "N": 1000,
 58 |         "noise": 1,
 59 |         "deg": 8,
 60 |         "lr": 0.1
 61 |     },
 62 |     {
 63 |         "model": "SPO",
 64 |         "N": 1000,
 65 |         "noise": 1,
 66 |         "deg": 1,
 67 |         "lr": 0.5
 68 |     },
 69 |     {
 70 |         "model": "SPO",
 71 |         "N": 1000,
 72 |         "noise": 1,
 73 |         "deg": 4,
 74 |         "lr": 1.0
 75 |     },
 76 |     {
 77 |         "model": "SPO",
 78 |         "N": 1000,
 79 |         "noise": 1,
 80 |         "deg": 16,
 81 |         "lr": 0.5
 82 |     },
 83 |     {
 84 |         "model": "SPO",
 85 |         "N": 1000,
 86 |         "noise": 1,
 87 |         "deg": 8,
 88 |         "lr": 0.5
 89 |     },
 90 |     {
 91 |         "model": "DBB",
 92 |         "N": 1000,
 93 |         "noise": 1,
 94 |         "deg": 1,
 95 |         "lr": 0.1,
 96 |         "lambda_val": 1.0
 97 |     },
 98 |     {
 99 |         "model": "DBB",
100 |         "N": 1000,
101 |         "noise": 1,
102 |         "deg": 4,
103 |         "lr": 0.1,
104 |         "lambda_val": 1.0
105 |     },
106 |     {
107 |         "model": "DBB",
108 |         "N": 1000,
109 |         "noise": 1,
110 |         "deg": 16,
111 |         "lr": 0.1,
112 |         "lambda_val": 1.0
113 |     },
114 |     {
115 |         "model": "DBB",
116 |         "N": 1000,
117 |         "noise": 1,
118 |         "deg": 8,
119 |         "lr": 0.1,
120 |         "lambda_val": 1.0
121 |     },
122 |     {
123 |         "model": "FenchelYoung",
124 |         "N": 1000,
125 |         "noise": 1,
126 |         "deg": 1,
127 |         "lr": 0.1,
128 |         "sigma": 0.01
129 |     },
130 |     {
131 |         "model": "FenchelYoung",
132 |         "N": 1000,
133 |         "noise": 1,
134 |         "deg": 4,
135 |         "lr": 0.5,
136 |         "sigma": 0.01
137 |     },
138 |     {
139 |         "model": "FenchelYoung",
140 |         "N": 1000,
141 |         "noise": 1,
142 |         "deg": 16,
143 |         "lr": 1.0,
144 |         "sigma": 2.0
145 |     },
146 |     {
147 |         "model": "FenchelYoung",
148 |         "N": 1000,
149 |         "noise": 1,
150 |         "deg": 8,
151 |         "lr": 1.0,
152 |         "sigma": 0.01
153 |     },
154 |     {
155 |         "model": "IMLE",
156 |         "N": 1000,
157 |         "noise": 1,
158 |         "deg": 1,
159 |         "lr": 0.5,
160 |         "beta": 0.1,
161 |         "temperature": 0.1,
162 |         "k": 5
163 |     },
164 |     {
165 |         "model": "IMLE",
166 |         "N": 1000,
167 |         "noise": 1,
168 |         "deg": 4,
169 |         "lr": 0.5,
170 |         "beta": 0.1,
171 |         "temperature": 0.5,
172 |         "k": 5
173 |     },
174 |     {
175 |         "model": "IMLE",
176 |         "N": 1000,
177 |         "noise": 1,
178 |         "deg": 16,
179 |         "lr": 0.5,
180 |         "beta": 0.1,
181 |         "temperature": 0.05,
182 |         "k": 5
183 |     },
184 |     {
185 |         "model": "IMLE",
186 |         "N": 1000,
187 |         "noise": 1,
188 |         "deg": 8,
189 |         "lr": 0.5,
190 |         "beta": 0.1,
191 |         "temperature": 0.05,
192 |         "k": 5
193 |     },
194 |     {
195 |         "model": "CachingPO",
196 |         "loss": "MAP_c",
197 |         "N": 1000,
198 |         "noise": 1,
199 |         "deg": 1,
200 |         "lr": 0.01
201 |     },
202 |     {
203 |         "model": "CachingPO",
204 |         "loss": "MAP_c",
205 |         "N": 1000,
206 |         "noise": 1,
207 |         "deg": 4,
208 |         "lr": 1.0
209 |     },
210 |     {
211 |         "model": "CachingPO",
212 |         "loss": "MAP_c",
213 |         "N": 1000,
214 |         "noise": 1,
215 |         "deg": 16,
216 |         "lr": 1.0
217 |     },
218 |     {
219 |         "model": "CachingPO",
220 |         "loss": "MAP_c",
221 |         "N": 1000,
222 |         "noise": 1,
223 |         "deg": 8,
224 |         "lr": 0.05
225 |     },
226 |     {
227 |         "model": "CachingPO",
228 |         "loss": "pairwise_diff",
229 |         "N": 1000,
230 |         "noise": 1,
231 |         "deg": 1,
232 |         "lr": 0.1
233 |     },
234 |     {
235 |         "model": "CachingPO",
236 |         "loss": "pairwise_diff",
237 |         "N": 1000,
238 |         "noise": 1,
239 |         "deg": 4,
240 |         "lr": 0.1
241 |     },
242 |     {
243 |         "model": "CachingPO",
244 |         "loss": "pairwise_diff",
245 |         "N": 1000,
246 |         "noise": 1,
247 |         "deg": 16,
248 |         "lr": 0.05
249 |     },
250 |     {
251 |         "model": "CachingPO",
252 |         "loss": "pairwise_diff",
253 |         "N": 1000,
254 |         "noise": 1,
255 |         "deg": 8,
256 |         "lr": 0.1
257 |     },
258 |     {
259 |         "model": "CachingPO",
260 |         "loss": "pairwise",
261 |         "N": 1000,
262 |         "noise": 1,
263 |         "deg": 1,
264 |         "lr": 0.01,
265 |         "tau": 0.01
266 |     },
267 |     {
268 |         "model": "CachingPO",
269 |         "loss": "pairwise",
270 |         "N": 1000,
271 |         "noise": 1,
272 |         "deg": 4,
273 |         "lr": 0.01,
274 |         "tau": 0.1
275 |     },
276 |     {
277 |         "model": "CachingPO",
278 |         "loss": "pairwise",
279 |         "N": 1000,
280 |         "noise": 1,
281 |         "deg": 16,
282 |         "lr": 0.1,
283 |         "tau": 0.05
284 |     },
285 |     {
286 |         "model": "CachingPO",
287 |         "loss": "pairwise",
288 |         "N": 1000,
289 |         "noise": 1,
290 |         "deg": 8,
291 |         "lr": 0.01,
292 |         "tau": 0.01
293 |     },
294 |     {
295 |         "model": "CachingPO",
296 |         "loss": "listwise",
297 |         "N": 1000,
298 |         "noise": 1,
299 |         "deg": 1,
300 |         "lr": 0.1,
301 |         "tau": 0.01
302 |     },
303 |     {
304 |         "model": "CachingPO",
305 |         "loss": "listwise",
306 |         "N": 1000,
307 |         "noise": 1,
308 |         "deg": 4,
309 |         "lr": 0.1,
310 |         "tau": 0.01
311 |     },
312 |     {
313 |         "model": "CachingPO",
314 |         "loss": "listwise",
315 |         "N": 1000,
316 |         "noise": 1,
317 |         "deg": 16,
318 |         "lr": 0.05,
319 |         "tau": 0.005
320 |     },
321 |     {
322 |         "model": "CachingPO",
323 |         "loss": "listwise",
324 |         "N": 1000,
325 |         "noise": 1,
326 |         "deg": 8,
327 |         "lr": 0.1,
328 |         "tau": 0.01
329 |     }
330 | ]


--------------------------------------------------------------------------------
/Portfolio/imle/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PredOpt/predopt-benchmarks/1a8e048c5aa640f73a05e29878b8e1e8f6f73610/Portfolio/imle/__init__.py


--------------------------------------------------------------------------------
/Portfolio/imle/noise.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import math
 4 | 
 5 | import torch
 6 | from torch import Tensor, Size
 7 | from torch.distributions.gamma import Gamma
 8 | 
 9 | from abc import ABC, abstractmethod
10 | 
11 | from typing import Optional
12 | 
13 | import logging
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | class BaseNoiseDistribution(ABC):
19 |     def __init__(self):
20 |         super().__init__()
21 | 
22 |     @abstractmethod
23 |     def sample(self,
24 |                shape: Size) -> Tensor:
25 |         raise NotImplementedError
26 | 
27 | 
28 | class SumOfGammaNoiseDistribution(BaseNoiseDistribution):
29 |     r"""
30 |     Creates a generator of samples for the Sum-of-Gamma distribution [1], parameterized
31 |     by :attr:`k`, :attr:`nb_iterations`, and :attr:`device`.
32 | 
33 |     [1] Mathias Niepert, Pasquale Minervini, Luca Franceschi - Implicit MLE: Backpropagating Through Discrete
34 |     Exponential Family Distributions. NeurIPS 2021 (https://arxiv.org/abs/2106.01798)
35 | 
36 |     Example::
37 | 
38 |         >>> import torch
39 |         >>> noise_distribution = SumOfGammaNoiseDistribution(k=5, nb_iterations=100)
40 |         >>> noise_distribution.sample(torch.Size([5]))
41 |         tensor([ 0.2504,  0.0112,  0.5466,  0.0051, -0.1497])
42 | 
43 |     Args:
44 |         k (float): k parameter -- see [1] for more details.
45 |         nb_iterations (int): number of iterations for estimating the sample.
46 |         device (torch.devicde): device where to store samples.
47 |     """
48 |     def __init__(self,
49 |                  k: float,
50 |                  nb_iterations: int = 10,
51 |                  device: Optional[torch.device] = None):
52 |         super().__init__()
53 |         self.k = k
54 |         self.nb_iterations = nb_iterations
55 |         self.device = device
56 | 
57 |     def sample(self,
58 |                shape: Size) -> Tensor:
59 |         samples = torch.zeros(size=shape, device=self.device)
60 |         for i in range(1, self.nb_iterations + 1):
61 |             concentration = torch.tensor(1. / self.k, device=self.device)
62 |             rate = torch.tensor(i / self.k, device=self.device)
63 | 
64 |             gamma = Gamma(concentration=concentration, rate=rate)
65 |             samples = samples + gamma.sample(sample_shape=shape).to(self.device)
66 |         samples = (samples - math.log(self.nb_iterations)) / self.k
67 |         return samples.to(self.device)
68 | 


--------------------------------------------------------------------------------
/Portfolio/imle/target.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from torch import Tensor
 4 | from abc import ABC, abstractmethod
 5 | 
 6 | import logging
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class BaseTargetDistribution(ABC):
12 |     def __init__(self):
13 |         super().__init__()
14 | 
15 |     @abstractmethod
16 |     def params(self,
17 |                theta: Tensor,
18 |                dy: Tensor) -> Tensor:
19 |         raise NotImplementedError
20 | 
21 | 
22 | class TargetDistribution(BaseTargetDistribution):
23 |     r"""
24 |     Creates a generator of target distributions parameterized by :attr:`alpha` and :attr:`beta`.
25 | 
26 |     Example::
27 | 
28 |         >>> import torch
29 |         >>> target_distribution = TargetDistribution(alpha=1.0, beta=1.0)
30 |         >>> target_distribution.params(theta=torch.tensor([1.0]), dy=torch.tensor([1.0]))
31 |         tensor([2.])
32 | 
33 |     Args:
34 |         alpha (float): weight of the initial distribution parameters theta
35 |         beta (float): weight of the downstream gradient dy
36 |     """
37 |     def __init__(self,
38 |                  alpha: float = 1.0,
39 |                  beta: float = 1.0):
40 |         super().__init__()
41 |         self.alpha = alpha
42 |         self.beta = beta
43 | 
44 |     def params(self,
45 |                theta: Tensor,
46 |                dy: Tensor) -> Tensor:
47 |         theta_prime = self.alpha * theta - self.beta * dy
48 |         return theta_prime
49 | 


--------------------------------------------------------------------------------
/Portfolio/readme.md:
--------------------------------------------------------------------------------
 1 | This directory corresponds to the portfolio optimization problem.
 2 | 
 3 | 
 4 | Download the portfolio optimization datset from the repository: https://doi.org/10.48804/KT2P3Z. 
 5 | 
 6 | Then extract the data by running
 7 | ```
 8 | tar -xvzf PortfolioData.tar.gz
 9 | ```
10 | 
11 | Then the test_sp.py can be used to run an experiment.
12 | To reproduce the result of expriements run
13 | ```
14 | python test_sp.py --scheduler True
15 | ```
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Benchmarking Predict-then-Optimize (PtO) Problems
 2 | 
 3 | ## About
 4 | 
 5 | This repository provides a comprehensive framework for benchmarking Predict-then-Optimize (PtO) problems using Decision-Focused Learning (DFL) approaches. PtO problems involve making predictions that are used as input to downstream optimization tasks, where traditional two-stage methods often lead to suboptimal solutions. DFL addresses this by training machine learning models that directly optimize for the downstream decision-making objectives.
 6 | 
 7 | 
 8 | This repository contains the implementation for the paper (Accepted to Journal of Artificial Intelligence Research (JAIR)):
 9 | 
10 | > Mandi, J., Kotary, J., Berden, S., Mulamba, M., Bucarey, V., Guns, T., & Fioretto, F. (2024). Decision-focused learning: Foundations, state of the art, benchmark and future opportunities. Journal of Artificial Intelligence Research, 80, 1623-1701. [DOI: 10.1613/jair.1.15320](https://doi.org/10.1613/jair.1.15320)
11 | 
12 | If you use this code in your research, please cite:
13 | ```bibtex
14 | @article{mandi2024decision,
15 |   title={Decision-focused learning: Foundations, state of the art, benchmark and future opportunities},
16 |   author={Mandi, Jayanta and Kotary, James and Berden, Senne and Mulamba, Maxime and Bucarey, Victor and Guns, Tias and Fioretto, Ferdinando},
17 |   journal={Journal of Artificial Intelligence Research},
18 |   volume={80},
19 |   pages={1623--1701},
20 |   year={2024},
21 |   doi={10.1613/jair.1.15320}
22 | }
23 | ```
24 | 
25 | 
26 | 
27 | ## Installation
28 | 
29 | ### Prerequisites
30 | - Python 3.7.3 (recommended)
31 | - pip or conda package manager
32 | 
33 | ### Option 1: Using venv (Recommended)
34 | 
35 | 1. Create and activate a virtual environment:
36 | ```bash
37 | python3 -m venv benchmarking_env
38 | source benchmarking_env/bin/activate
39 | ```
40 | 
41 | 2. Upgrade pip:
42 | ```bash
43 | pip install --upgrade pip
44 | ```
45 | 
46 | 3. Install required packages:
47 | ```bash
48 | pip install -r requirements.txt
49 | ```
50 | 
51 | ### Option 2: Using Conda
52 | 
53 | 1. Install Conda by following the [official installation guide](https://conda.io/projects/conda/en/latest/user-guide/install/index.html)
54 | 
55 | 2. Create and activate the environment:
56 | ```bash
57 | # Create environment
58 | conda env create -n benchmarking_env --file environment.yml
59 | 
60 | # Activate on Linux/macOS
61 | conda activate benchmarking_env
62 | 
63 | # Activate on Windows
64 | source activate benchmarking_env
65 | ```
66 | 
67 | ## Running Experiments
68 | 
69 | Navigate to the corresponding experiment directory to run specific benchmarks.
70 | 
71 | ## Contributing
72 | 
73 | Feel free to open issues or submit pull requests if you find any problems or have suggestions for improvements.
74 | 


--------------------------------------------------------------------------------
/ShortestPath/DPO/fenchel_young.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # 
 3 | # Modifications from original work
 4 | # 29-03-2021 (tuero@ualberta.ca) : Convert Tensorflow code to PyTorch
 5 | # 
 6 | # Copyright 2021 The Google Research Authors.
 7 | #
 8 | # Licensed under the Apache License, Version 2.0 (the "License");
 9 | # you may not use this file except in compliance with the License.
10 | # You may obtain a copy of the License at
11 | #
12 | #     http://www.apache.org/licenses/LICENSE-2.0
13 | #
14 | # Unless required by applicable law or agreed to in writing, software
15 | # distributed under the License is distributed on an "AS IS" BASIS,
16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | # See the License for the specific language governing permissions and
18 | # limitations under the License.
19 | 
20 | # Lint as: python3
21 | """Implementation of a Fenchel-Young loss using perturbation techniques."""
22 | 
23 | import torch
24 | import torch.nn as nn
25 | 
26 | from DPO import perturbations
27 | 
28 | 
29 | class PerturbedFunc(torch.autograd.Function):
30 |     """Implementation of a Fenchel Young loss."""
31 |     @staticmethod
32 |     def forward(ctx, input_tensor, y_true, perturbed, batched, maximize, *args):
33 |         diff = perturbed(input_tensor, *args) - y_true.type(input_tensor.dtype)
34 |         if not maximize:
35 |             diff = -diff
36 |         # Computes per-example loss for batched inputs.
37 |         if batched:
38 |             loss = torch.sum(torch.reshape(diff, [list(diff.shape)[0], -1]) ** 2, dim=-1)
39 |         else:  # Computes loss for unbatched inputs.
40 |             loss = torch.sum(diff ** 2)
41 |         ctx.save_for_backward(diff)
42 |         ctx.batched = batched
43 |         return loss
44 | 
45 |     @staticmethod
46 |     def backward(ctx, dy):
47 |         diff,  = ctx.saved_tensors
48 |         batched = ctx.batched
49 |         if batched:  # dy has shape (batch_size,) in this case.
50 |             dy = torch.reshape(dy, [list(dy.shape)[0]] + (diff.dim() - 1) * [1])
51 |         return dy * diff, None, None, None, None # original
52 |         # return  diff, None, None, None, None
53 | 
54 | 
55 | class FenchelYoungLoss(nn.Module):
56 |     def __init__(self,
57 |                  func = None,
58 |                  num_samples = 1000,
59 |                  sigma = 0.01,
60 |                  noise = perturbations._GUMBEL,
61 |                  batched = True,
62 |                  maximize = True,
63 |                  device=None):
64 |         """Initializes the Fenchel-Young loss.
65 | 
66 |         Args:
67 |             func: the function whose argmax is to be differentiated by perturbation.
68 |             num_samples: (int) the number of perturbed inputs.
69 |             sigma: (float) the amount of noise to be considered
70 |             noise: (str) the noise distribution to be used to sample perturbations.
71 |             batched: whether inputs to the func will have a leading batch dimension
72 |             (True) or consist of a single example (False). Defaults to True.
73 |             maximize: (bool) whether to maximize or to minimize the input function.
74 |             device: The device to create tensors on (cpu/gpu). If None given, it will
75 |             default to gpu:0 if available, cpu otherwise.
76 |         """
77 |         super().__init__()
78 |         self._batched = batched
79 |         self._maximize = maximize
80 |         self.func = func
81 |         self.perturbed = perturbations.perturbed(func=func,
82 |                                                 num_samples=num_samples,
83 |                                                 sigma=sigma,
84 |                                                 noise=noise,
85 |                                                 batched=batched,
86 |                                                 device=device)
87 | 
88 |     def forward(self, input_tensor, y_true, *args):
89 |         return PerturbedFunc.apply(input_tensor, y_true, self.perturbed, self._batched, self._maximize, *args)
90 | 
91 | 


--------------------------------------------------------------------------------
/ShortestPath/Trainer/data_utils.py:
--------------------------------------------------------------------------------
 1 | from Trainer.optimizer_module import spsolver
 2 | import numpy as np
 3 | import pytorch_lightning as pl
 4 | import torch 
 5 | from torch.utils.data import DataLoader
 6 | ###################################### Wrapper #########################################
 7 | class datawrapper():
 8 |     def __init__(self, x,y, sol=None, solver= spsolver ):
 9 |         self.x = x
10 |         self.y = y
11 |         if sol is None:
12 |             sol = []
13 |             for i in range(len(y)):
14 |                 sol.append(   solver.shortest_pathsolution(y[i])   )            
15 |             sol = np.array(sol).astype(np.float32)
16 |         self.sol = sol
17 | 
18 |     def __len__(self):
19 |         return len(self.y)
20 |     
21 |     def __getitem__(self, index):
22 |         return self.x[index], self.y[index],self.sol[index]
23 | 
24 | 
25 | ###################################### Dataloader #########################################
26 | 
27 | class ShortestPathDataModule(pl.LightningDataModule):
28 |     def __init__(self, train_df,valid_df,test_df,generator,  normalize=False, batchsize: int = 32, num_workers: int=4):
29 |         super().__init__()
30 |         self.train_df = train_df
31 |         self.valid_df =  valid_df
32 |         self.test_df = test_df
33 |         self.batchsize = batchsize
34 |         self.generator =  generator
35 |         self.num_workers = num_workers
36 | 
37 | 
38 |     def train_dataloader(self):
39 |         return DataLoader(self.train_df, batch_size=self.batchsize,generator= self.generator, num_workers=self.num_workers)
40 | 
41 |     def val_dataloader(self):
42 |         return DataLoader(self.valid_df, batch_size=self.batchsize,generator= self.generator, num_workers=self.num_workers)
43 | 
44 |     def test_dataloader(self):
45 |         return DataLoader(self.test_df, batch_size=1000, num_workers=self.num_workers)
46 | 


--------------------------------------------------------------------------------
/ShortestPath/Trainer/diff_layer.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import torch 
 3 | from torch import nn, optim
 4 | from torch.autograd import Variable
 5 | import torch.nn.functional as F
 6 | import pytorch_lightning as pl
 7 | import numpy as np
 8 | from Trainer.utils import batch_solve
 9 | 
10 | def SPOlayer(solver,minimize = True):
11 |     mm = 1 if minimize else -1
12 |     class SPOlayer_cls(torch.autograd.Function):
13 |         @staticmethod
14 |         def forward(ctx, y_hat,y_true,sol_true ):
15 |             sol_hat = batch_solve(solver, y_hat)
16 | 
17 |             ctx.save_for_backward(y_hat,y_true,sol_true)
18 | 
19 |             return ( mm*(sol_hat -sol_true)*y_true).sum()
20 | 
21 |         @staticmethod
22 |         def backward(ctx, grad_output):
23 |             y_hat,y_true,sol_true = ctx.saved_tensors
24 |             y_spo = 2*y_hat - y_true
25 |             sol_spo = batch_solve(solver,y_spo) 
26 |             return (sol_true - sol_spo)*mm, None, None
27 |     return SPOlayer_cls.apply
28 | 
29 | 
30 | def DBBlayer(solver,lambda_val=1., minimize = True):
31 |     mm = 1 if minimize else -1
32 |     class DBBlayer_cls(torch.autograd.Function):
33 |         @staticmethod
34 |         def forward(ctx, y_hat,y_true,sol_true ):
35 |             sol_hat =  batch_solve(solver, y_hat) 
36 | 
37 |             ctx.save_for_backward(y_hat,y_true,sol_true, sol_hat)
38 | 
39 |             return sol_hat
40 | 
41 |         @staticmethod
42 |         def backward(ctx, grad_output):
43 |             """
44 |             In the backward pass we compute gradient to minimize regret
45 |             """
46 |             y_hat,y_true,sol_true, sol_hat= ctx.saved_tensors
47 |             y_perturbed = y_hat + mm* lambda_val* grad_output
48 |             sol_perturbed =  batch_solve(solver, y_perturbed) 
49 |             
50 |             return -mm*(sol_hat - sol_perturbed)/lambda_val, None, None
51 |     return DBBlayer_cls.apply


--------------------------------------------------------------------------------
/ShortestPath/Trainer/utils.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | from torch import nn, optim
 3 | from torch.autograd import Variable
 4 | import torch.nn.functional as F
 5 | from torch.utils.data import DataLoader
 6 | import pytorch_lightning as pl
 7 | import numpy as np
 8 | 
 9 | def batch_solve(solver, y,relaxation =False):
10 |     sol = []
11 |     for i in range(len(y)):
12 |         sol.append(   solver.solution_fromtorch(y[i]).reshape(1,-1)   )
13 |     return torch.cat(sol,0).float()
14 | 
15 | 
16 | def regret_list(solver, y_hat,y_true, sol_true, minimize= True):  
17 |     '''
18 |     computes regret of more than one cost vectors
19 |     ''' 
20 |     mm = 1 if minimize else -1    
21 |     sol_hat = batch_solve(solver, y_hat )
22 |     return ((mm*(sol_hat - sol_true)*y_true).sum(1)/ (sol_true*y_true).sum(1) )
23 | def abs_regret_list(solver,y_hat,y_true,sol_true,minimize = True):
24 |     mm = 1 if minimize else -1    
25 |     sol_hat = batch_solve(solver, y_hat )
26 |     return ((mm*(sol_hat - sol_true)*y_true).sum(1) )
27 | 
28 | def regret_fn(solver, y_hat,y_true, sol_true, minimize= True):  
29 |     return regret_list(solver, y_hat,y_true, sol_true, minimize= minimize).mean()
30 | 
31 | def abs_regret_fn(solver, y_hat,y_true, sol_true, minimize= True):  
32 |     return abs_regret_list(solver, y_hat,y_true, sol_true, minimize= minimize).mean()
33 | 
34 | 
35 | def growcache(solver, cache, y_hat):
36 |     '''
37 |     cache is torch array [currentpoolsize,48]
38 |     y_hat is  torch array [batch_size,48]
39 |     '''
40 |     sol = batch_solve(solver, y_hat,relaxation =False).detach().numpy()
41 |     cache_np = cache.detach().numpy()
42 |     cache_np = np.unique(np.append(cache_np,sol,axis=0),axis=0)
43 |     # torch has no unique function, so we need to do this
44 |     return torch.from_numpy(cache_np).float()
45 | 
46 | 
47 | def cachingsolver(cache, y_hat, minimize= True):
48 |     mm = 1 if minimize else -1  
49 |     solutions = []
50 |     for ii in range(len(y_hat)):
51 |         val,ind  = torch.min(((cache)*y_hat[ii]*mm).sum(dim=1),0)
52 |         solutions.append(cache[ind])
53 | 
54 |     return torch.stack(solutions).float()


--------------------------------------------------------------------------------
/ShortestPath/config.json:
--------------------------------------------------------------------------------
  1 | [
  2 |     {
  3 |         "model": "IntOpt",
  4 |         "N": 1000,
  5 |         "noise": 0.5,
  6 |         "deg": 1,
  7 |         "lr": 0.1,
  8 |         "thr": 0.001,
  9 |         "damping": 1.0
 10 |     },
 11 |     {
 12 |         "model": "IntOpt",
 13 |         "N": 1000,
 14 |         "noise": 0.5,
 15 |         "deg": 2,
 16 |         "lr": 0.1,
 17 |         "thr": 0.1,
 18 |         "damping": 0.01
 19 |     },
 20 |     {
 21 |         "model": "IntOpt",
 22 |         "N": 1000,
 23 |         "noise": 0.5,
 24 |         "deg": 4,
 25 |         "lr": 0.1,
 26 |         "thr": 1e-06,
 27 |         "damping": 0.1
 28 |     },
 29 |     {
 30 |         "model": "IntOpt",
 31 |         "N": 1000,
 32 |         "noise": 0.5,
 33 |         "deg": 6,
 34 |         "lr": 0.1,
 35 |         "thr": 0.001,
 36 |         "damping": 1e-06
 37 |     },
 38 |     {
 39 |         "model": "IntOpt",
 40 |         "N": 1000,
 41 |         "noise": 0.5,
 42 |         "deg": 8,
 43 |         "lr": 0.1,
 44 |         "thr": 10.0,
 45 |         "damping": 1.0
 46 |     },
 47 |     {
 48 |         "model": "CachingPO",
 49 |         "loss": "pairwise_diff",
 50 |         "N": 1000,
 51 |         "noise": 0.5,
 52 |         "deg": 1,
 53 |         "lr": 0.1
 54 |     },
 55 |     {
 56 |         "model": "CachingPO",
 57 |         "loss": "pairwise_diff",
 58 |         "N": 1000,
 59 |         "noise": 0.5,
 60 |         "deg": 2,
 61 |         "lr": 0.1
 62 |     },
 63 |     {
 64 |         "model": "CachingPO",
 65 |         "loss": "pairwise_diff",
 66 |         "N": 1000,
 67 |         "noise": 0.5,
 68 |         "deg": 4,
 69 |         "lr": 0.5
 70 |     },
 71 |     {
 72 |         "model": "CachingPO",
 73 |         "loss": "pairwise_diff",
 74 |         "N": 1000,
 75 |         "noise": 0.5,
 76 |         "deg": 6,
 77 |         "lr": 0.1
 78 |     },
 79 |     {
 80 |         "model": "CachingPO",
 81 |         "loss": "pairwise_diff",
 82 |         "N": 1000,
 83 |         "noise": 0.5,
 84 |         "deg": 8,
 85 |         "lr": 1.0
 86 |     },
 87 |     {
 88 |         "model": "CachingPO",
 89 |         "loss": "pairwise",
 90 |         "N": 1000,
 91 |         "noise": 0.5,
 92 |         "deg": 1,
 93 |         "lr": 1.0,
 94 |         "tau": 1.0
 95 |     },
 96 |     {
 97 |         "model": "CachingPO",
 98 |         "loss": "pairwise",
 99 |         "N": 1000,
100 |         "noise": 0.5,
101 |         "deg": 2,
102 |         "lr": 0.1,
103 |         "tau": 0.5
104 |     },
105 |     {
106 |         "model": "CachingPO",
107 |         "loss": "pairwise",
108 |         "N": 1000,
109 |         "noise": 0.5,
110 |         "deg": 4,
111 |         "lr": 0.1,
112 |         "tau": 1.0
113 |     },
114 |     {
115 |         "model": "CachingPO",
116 |         "loss": "pairwise",
117 |         "N": 1000,
118 |         "noise": 0.5,
119 |         "deg": 6,
120 |         "lr": 1.0,
121 |         "tau": 10.0
122 |     },
123 |     {
124 |         "model": "CachingPO",
125 |         "loss": "pairwise",
126 |         "N": 1000,
127 |         "noise": 0.5,
128 |         "deg": 8,
129 |         "lr": 1.0,
130 |         "tau": 10.0
131 |     },
132 |     {
133 |         "model": "CachingPO",
134 |         "loss": "listwise",
135 |         "N": 1000,
136 |         "noise": 0.5,
137 |         "deg": 1,
138 |         "lr": 0.1,
139 |         "tau": 0.1
140 |     },
141 |     {
142 |         "model": "CachingPO",
143 |         "loss": "listwise",
144 |         "N": 1000,
145 |         "noise": 0.5,
146 |         "deg": 2,
147 |         "lr": 0.1,
148 |         "tau": 0.1
149 |     },
150 |     {
151 |         "model": "CachingPO",
152 |         "loss": "listwise",
153 |         "N": 1000,
154 |         "noise": 0.5,
155 |         "deg": 4,
156 |         "lr": 1.0,
157 |         "tau": 0.1
158 |     },
159 |     {
160 |         "model": "CachingPO",
161 |         "loss": "listwise",
162 |         "N": 1000,
163 |         "noise": 0.5,
164 |         "deg": 6,
165 |         "lr": 0.1,
166 |         "tau": 1.0
167 |     },
168 |     {
169 |         "model": "CachingPO",
170 |         "loss": "listwise",
171 |         "N": 1000,
172 |         "noise": 0.5,
173 |         "deg": 8,
174 |         "lr": 1.0,
175 |         "tau": 1.0
176 |     }
177 | ]


--------------------------------------------------------------------------------
/ShortestPath/imle/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PredOpt/predopt-benchmarks/1a8e048c5aa640f73a05e29878b8e1e8f6f73610/ShortestPath/imle/__init__.py


--------------------------------------------------------------------------------
/ShortestPath/imle/noise.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import math
 4 | 
 5 | import torch
 6 | from torch import Tensor, Size
 7 | from torch.distributions.gamma import Gamma
 8 | 
 9 | from abc import ABC, abstractmethod
10 | 
11 | from typing import Optional
12 | 
13 | import logging
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | class BaseNoiseDistribution(ABC):
19 |     def __init__(self):
20 |         super().__init__()
21 | 
22 |     @abstractmethod
23 |     def sample(self,
24 |                shape: Size) -> Tensor:
25 |         raise NotImplementedError
26 | 
27 | 
28 | class SumOfGammaNoiseDistribution(BaseNoiseDistribution):
29 |     r"""
30 |     Creates a generator of samples for the Sum-of-Gamma distribution [1], parameterized
31 |     by :attr:`k`, :attr:`nb_iterations`, and :attr:`device`.
32 | 
33 |     [1] Mathias Niepert, Pasquale Minervini, Luca Franceschi - Implicit MLE: Backpropagating Through Discrete
34 |     Exponential Family Distributions. NeurIPS 2021 (https://arxiv.org/abs/2106.01798)
35 | 
36 |     Example::
37 | 
38 |         >>> import torch
39 |         >>> noise_distribution = SumOfGammaNoiseDistribution(k=5, nb_iterations=100)
40 |         >>> noise_distribution.sample(torch.Size([5]))
41 |         tensor([ 0.2504,  0.0112,  0.5466,  0.0051, -0.1497])
42 | 
43 |     Args:
44 |         k (float): k parameter -- see [1] for more details.
45 |         nb_iterations (int): number of iterations for estimating the sample.
46 |         device (torch.devicde): device where to store samples.
47 |     """
48 |     def __init__(self,
49 |                  k: float,
50 |                  nb_iterations: int = 10,
51 |                  device: Optional[torch.device] = None):
52 |         super().__init__()
53 |         self.k = k
54 |         self.nb_iterations = nb_iterations
55 |         self.device = device
56 | 
57 |     def sample(self,
58 |                shape: Size) -> Tensor:
59 |         samples = torch.zeros(size=shape, device=self.device)
60 |         for i in range(1, self.nb_iterations + 1):
61 |             concentration = torch.tensor(1. / self.k, device=self.device)
62 |             rate = torch.tensor(i / self.k, device=self.device)
63 | 
64 |             gamma = Gamma(concentration=concentration, rate=rate)
65 |             samples = samples + gamma.sample(sample_shape=shape).to(self.device)
66 |         samples = (samples - math.log(self.nb_iterations)) / self.k
67 |         return samples.to(self.device)
68 | 


--------------------------------------------------------------------------------
/ShortestPath/imle/target.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from torch import Tensor
 4 | from abc import ABC, abstractmethod
 5 | 
 6 | import logging
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class BaseTargetDistribution(ABC):
12 |     def __init__(self):
13 |         super().__init__()
14 | 
15 |     @abstractmethod
16 |     def params(self,
17 |                theta: Tensor,
18 |                dy: Tensor) -> Tensor:
19 |         raise NotImplementedError
20 | 
21 | 
22 | class TargetDistribution(BaseTargetDistribution):
23 |     r"""
24 |     Creates a generator of target distributions parameterized by :attr:`alpha` and :attr:`beta`.
25 | 
26 |     Example::
27 | 
28 |         >>> import torch
29 |         >>> target_distribution = TargetDistribution(alpha=1.0, beta=1.0)
30 |         >>> target_distribution.params(theta=torch.tensor([1.0]), dy=torch.tensor([1.0]))
31 |         tensor([2.])
32 | 
33 |     Args:
34 |         alpha (float): weight of the initial distribution parameters theta
35 |         beta (float): weight of the downstream gradient dy
36 |     """
37 |     def __init__(self,
38 |                  alpha: float = 1.0,
39 |                  beta: float = 1.0):
40 |         super().__init__()
41 |         self.alpha = alpha
42 |         self.beta = beta
43 | 
44 |     def params(self,
45 |                theta: Tensor,
46 |                dy: Tensor) -> Tensor:
47 |         theta_prime = self.alpha * theta - self.beta * dy
48 |         return theta_prime
49 | 


--------------------------------------------------------------------------------
/ShortestPath/readme.md:
--------------------------------------------------------------------------------
 1 | This directory corresponds to the Shortest path problem on a $5 \times 5$ grid.
 2 | 
 3 | Download the shortest path datset from the repository: https://doi.org/10.48804/KT2P3Z. 
 4 | Then extract the data by running
 5 | ```
 6 | tar -xvzf ShortestPathData.tar.gz
 7 | ```
 8 | 
 9 | Then the test_sp.py can be used to run an experiment.
10 | To reproduce the result of expriements run
11 | ```
12 | python test_sp.py --scheduler True
13 | ```
14 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: benchmarking_env
 2 | channels:
 3 |   - conda-forge
 4 |   - defaults
 5 |   - gurobi
 6 | dependencies:
 7 |   - python=3.7.3
 8 |   - pandas=1.3.5
 9 |   - tensorboard=2.9.1
10 |   - pytorch-lightning=1.6.4
11 |   - torch=1.12.0
12 |   - torchvision=0.13.0
13 |   - scipy=1.6.3
14 |   - numpy=1.21.6
15 |   - scikit-learn=1.0.2
16 |   - cvxpy=1.3.0
17 |   - cvxpylayers=0.1.5
18 |   - networkx=2.6.3
19 |   - ortools=9.3.10459
20 |   - qpth=0.0.15
21 |   - gurobipy=9.5.2
22 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | cvxpy==1.3.0
 2 | cvxpylayers==0.1.5
 3 | diffcp==1.0.23
 4 | gurobipy==9.5.2
 5 | joblib==1.4.2
 6 | Markdown==3.6
 7 | multidict==6.0.5
 8 | networkx==2.6.3
 9 | numpy==1.22.4
10 | ortools==9.3.10497
11 | pandas==1.3.5
12 | pillow==10.3.0
13 | protobuf==3.19.6
14 | pytorch-lightning==1.6.4
15 | PyYAML==6.0.1
16 | scikit-learn==1.0.2
17 | scipy==1.7.3
18 | scs==3.2.4.post1
19 | tensorboard==2.9.1
20 | torch==1.12.0
21 | torchmetrics==1.3.2
22 | torchvision==0.13.0
23 | tqdm==4.66.4
24 | 


--------------------------------------------------------------------------------
/warcraft/DPO/fenchel_young.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # 
 3 | # Modifications from original work
 4 | # 29-03-2021 (tuero@ualberta.ca) : Convert Tensorflow code to PyTorch
 5 | # 
 6 | # Copyright 2021 The Google Research Authors.
 7 | #
 8 | # Licensed under the Apache License, Version 2.0 (the "License");
 9 | # you may not use this file except in compliance with the License.
10 | # You may obtain a copy of the License at
11 | #
12 | #     http://www.apache.org/licenses/LICENSE-2.0
13 | #
14 | # Unless required by applicable law or agreed to in writing, software
15 | # distributed under the License is distributed on an "AS IS" BASIS,
16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | # See the License for the specific language governing permissions and
18 | # limitations under the License.
19 | 
20 | # Lint as: python3
21 | """Implementation of a Fenchel-Young loss using perturbation techniques."""
22 | 
23 | import torch
24 | import torch.nn as nn
25 | 
26 | from DPO import perturbations
27 | 
28 | 
29 | class PerturbedFunc(torch.autograd.Function):
30 |     """Implementation of a Fenchel Young loss."""
31 |     @staticmethod
32 |     def forward(ctx, input_tensor, y_true, perturbed, batched, maximize, *args):
33 |         diff = perturbed(input_tensor, *args) - y_true.type(input_tensor.dtype)
34 |         if not maximize:
35 |             diff = -diff
36 |         # Computes per-example loss for batched inputs.
37 |         if batched:
38 |             loss = torch.sum(torch.reshape(diff, [list(diff.shape)[0], -1]) ** 2, dim=-1)
39 |         else:  # Computes loss for unbatched inputs.
40 |             loss = torch.sum(diff ** 2)
41 |         ctx.save_for_backward(diff)
42 |         ctx.batched = batched
43 |         return loss
44 | 
45 |     @staticmethod
46 |     def backward(ctx, dy):
47 |         diff,  = ctx.saved_tensors
48 |         batched = ctx.batched
49 |         if batched:  # dy has shape (batch_size,) in this case.
50 |             dy = torch.reshape(dy, [list(dy.shape)[0]] + (diff.dim() - 1) * [1])
51 |         return dy * diff, None, None, None, None # original
52 |         # return  diff, None, None, None, None
53 | 
54 | 
55 | class FenchelYoungLoss(nn.Module):
56 |     def __init__(self,
57 |                  func = None,
58 |                  num_samples = 1000,
59 |                  sigma = 0.01,
60 |                  noise = perturbations._GUMBEL,
61 |                  batched = True,
62 |                  maximize = True,
63 |                  device=None):
64 |         """Initializes the Fenchel-Young loss.
65 | 
66 |         Args:
67 |             func: the function whose argmax is to be differentiated by perturbation.
68 |             num_samples: (int) the number of perturbed inputs.
69 |             sigma: (float) the amount of noise to be considered
70 |             noise: (str) the noise distribution to be used to sample perturbations.
71 |             batched: whether inputs to the func will have a leading batch dimension
72 |             (True) or consist of a single example (False). Defaults to True.
73 |             maximize: (bool) whether to maximize or to minimize the input function.
74 |             device: The device to create tensors on (cpu/gpu). If None given, it will
75 |             default to gpu:0 if available, cpu otherwise.
76 |         """
77 |         super().__init__()
78 |         self._batched = batched
79 |         self._maximize = maximize
80 |         self.func = func
81 |         self.perturbed = perturbations.perturbed(func=func,
82 |                                                 num_samples=num_samples,
83 |                                                 sigma=sigma,
84 |                                                 noise=noise,
85 |                                                 batched=batched,
86 |                                                 device=device)
87 | 
88 |     def forward(self, input_tensor, y_true, *args):
89 |         return PerturbedFunc.apply(input_tensor, y_true, self.perturbed, self._batched, self._maximize, *args)
90 | 
91 | 


--------------------------------------------------------------------------------
/warcraft/Trainer/computervisionmodels.py:
--------------------------------------------------------------------------------
  1 | from math import sqrt
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | import torchvision
  7 | import pytorch_lightning as pl
  8 | 
  9 | def get_model(model_name, out_features, in_channels, arch_params):
 10 |     preloaded_models = {"ResNet18": torchvision.models.resnet18}
 11 | 
 12 |     own_models = {"ConvNet": ConvNet, "MLP": MLP, "PureConvNet": PureConvNet, "CombResnet18": CombRenset18}
 13 | 
 14 |     if model_name in preloaded_models:
 15 |         model = preloaded_models[model_name](pretrained=False, num_classes=out_features, **arch_params)
 16 | 
 17 |         # Hacking ResNets to expect 'in_channels' input channel (and not three)
 18 |         del model.conv1
 19 |         model.conv1 = nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
 20 |         return model
 21 |     elif model_name in own_models:
 22 |         return own_models[model_name](out_features=out_features, in_channels=in_channels, **arch_params)
 23 |     else:
 24 |         raise ValueError(f"Model name {model_name} not recognized!")
 25 | 
 26 | 
 27 | def dim_after_conv2D(input_dim, stride, kernel_size):
 28 |     return (input_dim - kernel_size + 2) // stride
 29 | 
 30 | 
 31 | class CombRenset18(nn.Module):
 32 | 
 33 |     def __init__(self, out_features, in_channels):
 34 |         super().__init__()
 35 |         self.resnet_model = torchvision.models.resnet18(pretrained=False, num_classes=out_features)
 36 |         del self.resnet_model.conv1
 37 |         self.resnet_model.conv1 = nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
 38 |         output_shape = (int(sqrt(out_features)), int(sqrt(out_features)))
 39 |         self.pool = nn.AdaptiveMaxPool2d(output_shape)
 40 |         #self.last_conv = nn.Conv2d(128, 1, kernel_size=1,  stride=1)
 41 | 
 42 | 
 43 |     def forward(self, x):
 44 |         x = self.resnet_model.conv1(x)
 45 |         x = self.resnet_model.bn1(x)
 46 |         x = self.resnet_model.relu(x)
 47 |         x = self.resnet_model.maxpool(x)
 48 |         x = self.resnet_model.layer1(x)
 49 |         #x = self.resnet_model.layer2(x)
 50 |         #x = self.resnet_model.layer3(x)
 51 |         #x = self.last_conv(x)
 52 |         x = self.pool(x)
 53 |         x = x.mean(dim=1)
 54 |         return x
 55 | 
 56 | 
 57 | class ConvNet(torch.nn.Module):
 58 |     def __init__(self, out_features, in_channels, kernel_size, stride, linear_layer_size, channels_1, channels_2):
 59 |         super().__init__()
 60 |         self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=channels_1, kernel_size=kernel_size, stride=stride)
 61 |         self.conv2 = nn.Conv2d(in_channels=channels_1, out_channels=channels_2, kernel_size=kernel_size, stride=stride)
 62 | 
 63 |         output_shape = (4, 4)
 64 |         self.pool = nn.AdaptiveAvgPool2d(output_shape)
 65 | 
 66 |         self.fc1 = nn.Linear(in_features=output_shape[0] * output_shape[1] * channels_2, out_features=linear_layer_size)
 67 |         self.fc2 = nn.Linear(in_features=linear_layer_size, out_features=out_features)
 68 | 
 69 |     def forward(self, x):
 70 |         batch_size = x.shape[0]
 71 |         x = F.relu(self.conv1(x))
 72 |         x = F.max_pool2d(x, 2, 2)
 73 |         x = F.relu(self.conv2(x))
 74 |         x = self.pool(x)
 75 |         x = x.view(batch_size, -1)
 76 |         x = F.relu(self.fc1(x))
 77 |         x = self.fc2(x)
 78 |         return x
 79 | 
 80 | 
 81 | class MLP(torch.nn.Module):
 82 |     def __init__(self, out_features, in_channels, hidden_layer_size):
 83 |         super().__init__()
 84 |         input_dim = in_channels * 40 * 20
 85 |         self.fc1 = nn.Linear(in_features=input_dim, out_features=hidden_layer_size)
 86 |         self.fc2 = nn.Linear(in_features=hidden_layer_size, out_features=out_features)
 87 | 
 88 |     def forward(self, x):
 89 |         batch_size = x.shape[0]
 90 |         x = x.view(batch_size, -1)
 91 |         x = torch.tanh(self.fc1(x))
 92 |         x = self.fc2(x)
 93 |         return x
 94 | 
 95 | 
 96 | class PureConvNet(torch.nn.Module):
 97 | 
 98 |     act_funcs = {"relu": F.relu, "tanh": F.tanh, "identity": lambda x: x}
 99 | 
100 |     def __init__(self, out_features, pooling, use_second_conv, kernel_size, in_channels,
101 |                  channels_1=20, channels_2=20, act_func="relu"):
102 |         super().__init__()
103 |         self.use_second_conv = use_second_conv
104 | 
105 |         self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=channels_1, kernel_size=kernel_size, stride=1)
106 |         self.conv2 = nn.Conv2d(in_channels=channels_1, out_channels=channels_2, kernel_size=kernel_size, stride=1)
107 | 
108 |         output_shape = (int(sqrt(out_features)), int(sqrt(out_features)))
109 |         if pooling == "average":
110 |             self.pool = nn.AdaptiveAvgPool2d(output_shape)
111 |         elif pooling == "max":
112 |             self.pool = nn.AdaptiveMaxPool2d(output_shape)
113 | 
114 |         self.conv3 = nn.Conv2d(in_channels=channels_2 if use_second_conv else channels_1,
115 |                                out_channels=1, kernel_size=1, stride=1)
116 |         self.act_func = PureConvNet.act_funcs[act_func]
117 | 
118 |     def forward(self, x):
119 |         x = self.act_func(self.conv1(x))
120 |         if self.use_second_conv:
121 |             x = self.act_func(self.conv2(x))
122 |         x = self.pool(x)
123 |         x = self.conv3(x)
124 |         return x
125 | 


--------------------------------------------------------------------------------
/warcraft/Trainer/data_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | from torch.utils.data import Dataset, DataLoader
 4 | import pytorch_lightning as pl
 5 | import torch
 6 | 
 7 | class WarcraftImageDataset(Dataset):
 8 |     def __init__(self, inputs, labels, true_weights):
 9 |         self.inputs = inputs
10 |         self.labels = labels
11 |         self.true_weights = true_weights
12 |     
13 |     def __len__(self):
14 |         return len(self.labels)
15 | 
16 |     def __getitem__(self, idx):
17 | 
18 |         return self.inputs[idx], self.labels[idx], self.true_weights[idx]
19 | 
20 | def return_trainlabel(data_dir):
21 |     train_prefix = "train"
22 | 
23 |     train_labels = np.load(os.path.join(data_dir, train_prefix + "_shortest_paths.npy")) 
24 |     train_labels = np.unique(train_labels,axis=0)   
25 |     return torch.from_numpy(train_labels)
26 | 
27 | 
28 | class WarcraftDataModule(pl.LightningDataModule):
29 |     def __init__(self, data_dir, use_test_set=True,  normalize=True, batch_size=70, generator=None,num_workers=4):
30 |         super().__init__()
31 |         self.batch_size = batch_size
32 |         self.generator = generator
33 |         self.num_workers = num_workers
34 | 
35 |         train_prefix = "train"
36 |         val_prefix = "val"
37 |         test_prefix = "test"
38 |         data_suffix = "maps"
39 |         train_inputs = np.load(os.path.join(data_dir, train_prefix + "_" + data_suffix + ".npy")).astype(np.float32)
40 |         train_inputs = train_inputs.transpose(0, 3, 1, 2)  # channel first
41 | 
42 |         val_inputs = np.load(os.path.join(data_dir, val_prefix + "_" + data_suffix + ".npy")).astype(np.float32)
43 |         val_inputs = val_inputs.transpose(0, 3, 1, 2)  # channel first
44 |         if use_test_set:
45 |             test_inputs = np.load(os.path.join(data_dir, test_prefix + "_" + data_suffix + ".npy")).astype(np.float32)
46 |             test_inputs = test_inputs.transpose(0, 3, 1, 2)  # channel first
47 | 
48 |         train_labels = np.load(os.path.join(data_dir, train_prefix + "_shortest_paths.npy"))
49 |         train_true_weights = np.load(os.path.join(data_dir, train_prefix + "_vertex_weights.npy")).astype(np.float32)
50 |         if normalize:
51 |             mean, std = (
52 |                 np.mean(train_inputs, axis=(0, 2, 3), keepdims=True),
53 |                 np.std(train_inputs, axis=(0, 2, 3), keepdims=True),
54 |             )
55 |             train_inputs -= mean
56 |             train_inputs /= std 
57 |             val_inputs -= mean
58 |             val_inputs /= std 
59 |             if use_test_set:
60 |                 test_inputs -= mean
61 |                 test_inputs /= std    
62 |         val_labels = np.load(os.path.join(data_dir, val_prefix + "_shortest_paths.npy"))
63 |         val_true_weights = np.load(os.path.join(data_dir, val_prefix + "_vertex_weights.npy")).astype(np.float32)
64 |         val_full_images = np.load(os.path.join(data_dir, val_prefix + "_maps.npy"))  
65 |         if use_test_set:
66 |             test_labels = np.load(os.path.join(data_dir, test_prefix + "_shortest_paths.npy"))
67 |             test_true_weights = np.load(os.path.join(data_dir, test_prefix + "_vertex_weights.npy")).astype(np.float32)
68 |             # test_full_images = np.load(os.path.join(data_dir, test_prefix + "_maps.npy"))
69 |         self.training_data = WarcraftImageDataset(train_inputs, train_labels, train_true_weights)
70 |         self.val_data = WarcraftImageDataset(val_inputs, val_labels, val_true_weights)
71 |         if use_test_set:
72 |             self.test_data = WarcraftImageDataset(test_inputs, test_labels, test_true_weights)
73 | 
74 |         def denormalize(x):
75 |             return (x * std) + mean
76 | 
77 |         self.metadata = {
78 |             "input_image_size": val_full_images[0].shape[1],
79 |             "output_features": val_true_weights[0].shape[0] * val_true_weights[0].shape[1],
80 |             "num_channels": val_full_images[0].shape[-1],
81 |             "output_shape": (val_true_weights[0].shape[0] , val_true_weights[0].shape[1]),
82 |             "denormalize": denormalize
83 |         }
84 | 
85 | 
86 | 
87 | 
88 |     def train_dataloader(self):
89 |         return DataLoader(self.training_data, batch_size=self.batch_size,generator= self.generator, num_workers=self.num_workers)
90 | 
91 |     def val_dataloader(self):
92 |         return DataLoader(self.val_data, batch_size=self.batch_size,generator= self.generator, num_workers=self.num_workers)
93 | 
94 |     def test_dataloader(self):
95 |         return DataLoader(self.test_data, batch_size=self.batch_size,generator= self.generator, num_workers=self.num_workers)
96 | 


--------------------------------------------------------------------------------
/warcraft/Trainer/metric.py:
--------------------------------------------------------------------------------
 1 | import pytorch_lightning as pl
 2 | import torch 
 3 | from torch import nn, optim
 4 | from torch.autograd import Variable
 5 | import torch.nn.functional as F
 6 | 
 7 | def normalized_regret(true_weights, true_paths, suggested_paths, minimize=True):
 8 |     mm = 1 if minimize else -1
 9 |     suggested_paths_costs = (suggested_paths * true_weights).sum((1,2))
10 |     true_paths_costs = (true_paths * true_weights).sum((1,2))
11 | 
12 |     return mm*(( suggested_paths_costs - true_paths_costs)/true_paths_costs).mean()
13 | 
14 | 
15 | def regret_list(true_weights, true_paths, suggested_paths, minimize=True):
16 |     mm = 1 if minimize else -1
17 |     suggested_paths_costs = (suggested_paths * true_weights).sum((1,2))
18 |     true_paths_costs = (true_paths * true_weights).sum((1,2))
19 | 
20 |     return mm*(( suggested_paths_costs - true_paths_costs)/true_paths_costs)
21 | 
22 | def normalized_hamming(true_weights, true_paths, suggested_paths, minimize=True):
23 |     errors = suggested_paths * (1.0 - true_paths) + (1.0 - suggested_paths) * true_paths
24 |     # print( errors.sum((1,2)), true_paths.sum((1,2)) )
25 |     return (errors.sum((1,2))/true_paths.sum((1,2))).mean()
26 | 
27 | 
28 | class HammingLoss(torch.nn.Module):
29 |     def forward(self, suggested, target, true_weights):
30 |         errors = suggested * (1.0 - target) + (1.0 - suggested) * target
31 |         return errors.mean(dim=0).sum()
32 |         # return (torch.mean(suggested*(1.0-target)) + torch.mean((1.0-suggested)*target)) * 25.0


--------------------------------------------------------------------------------
/warcraft/Trainer/utils.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import torch
 3 | import numpy as np
 4 | try:
 5 |     import ray
 6 | except ImportError as e:
 7 |     print(e)
 8 | 
 9 | def maybe_parallelize(function, arg_list):
10 |     """
11 |     Parallelizes execution is ray is enabled
12 |     :param function: callable
13 |     :param arg_list: list of function arguments (one for each execution)
14 |     :return:
15 |     """
16 |     # Passive ray module check
17 |     if 'ray' in sys.modules and ray.is_initialized():
18 |         ray_fn = ray.remote(function)
19 |         return ray.get([ray_fn.remote(arg) for arg in arg_list])
20 |     else:
21 |         return [function(arg) for arg in arg_list]
22 | def shortest_pathsolution(solver, weights):
23 |     '''
24 |     solver: dijkstra solver
25 |     weights: torch tensor matrix
26 |     '''
27 |     np_weights = weights.detach().cpu().numpy()
28 |     suggested_tours = np.asarray (maybe_parallelize(solver, arg_list=list(np_weights)))
29 |     return torch.from_numpy(suggested_tours).float().to(weights.device)
30 | 
31 | 
32 | 
33 | def growcache(solver, cache, output):
34 |     '''
35 |     cache is torch array [currentpoolsize,48]
36 |     y_hat is  torch array [batch_size,48]
37 |     '''
38 |     weights = output.reshape(-1, output.shape[-1], output.shape[-1])
39 |     shortest_path =  shortest_pathsolution(solver, weights).numpy() 
40 |     cache_np = cache.detach().numpy()
41 |     cache_np = np.unique(np.append(cache_np,shortest_path,axis=0),axis=0)
42 |     # torch has no unique function, so we need to do this
43 |     return torch.from_numpy(cache_np).float()


--------------------------------------------------------------------------------
/warcraft/comb_modules/dijkstra.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import heapq
 3 | import torch
 4 | from functools import partial
 5 | from comb_modules.utils import get_neighbourhood_func
 6 | from collections import namedtuple
 7 | # from utils import maybe_parallelize
 8 | 
 9 | DijkstraOutput = namedtuple("DijkstraOutput", ["shortest_path", "is_unique", "transitions"])
10 | 
11 | 
12 | def dijkstra(matrix, neighbourhood_fn="8-grid", request_transitions=False):
13 | 
14 |     x_max, y_max = matrix.shape
15 |     neighbors_func = partial(get_neighbourhood_func(neighbourhood_fn), x_max=x_max, y_max=y_max)
16 | 
17 |     costs = np.full_like(matrix, 1.0e10)
18 |     costs[0][0] = matrix[0][0]
19 |     num_path = np.zeros_like(matrix)
20 |     num_path[0][0] = 1
21 |     priority_queue = [(matrix[0][0], (0, 0))]
22 |     certain = set()
23 |     transitions = dict()
24 | 
25 |     while priority_queue:
26 |         cur_cost, (cur_x, cur_y) = heapq.heappop(priority_queue)
27 |         if (cur_x, cur_y) in certain:
28 |             pass
29 | 
30 |         for x, y in neighbors_func(cur_x, cur_y):
31 |             if (x, y) not in certain:
32 |                 if matrix[x][y] + costs[cur_x][cur_y] < costs[x][y]:
33 |                     costs[x][y] = matrix[x][y] + costs[cur_x][cur_y]
34 |                     heapq.heappush(priority_queue, (costs[x][y], (x, y)))
35 |                     transitions[(x, y)] = (cur_x, cur_y)
36 |                     num_path[x, y] = num_path[cur_x, cur_y]
37 |                 elif matrix[x][y] + costs[cur_x][cur_y] == costs[x][y]:
38 |                     num_path[x, y] += 1
39 | 
40 |         certain.add((cur_x, cur_y))
41 |     # retrieve the path
42 |     cur_x, cur_y = x_max - 1, y_max - 1
43 |     on_path = np.zeros_like(matrix)
44 |     on_path[-1][-1] = 1
45 |     while (cur_x, cur_y) != (0, 0):
46 |         cur_x, cur_y = transitions[(cur_x, cur_y)]
47 |         on_path[cur_x, cur_y] = 1.0
48 | 
49 |     is_unique = num_path[-1, -1] == 1
50 | 
51 |     if request_transitions:
52 |         return DijkstraOutput(shortest_path=on_path, is_unique=is_unique, transitions=transitions)
53 |     else:
54 |         return DijkstraOutput(shortest_path=on_path, is_unique=is_unique, transitions=None)
55 | 
56 | 
57 | def get_solver(neighbourhood_fn):
58 |     def solver(matrix):
59 |         return dijkstra(matrix, neighbourhood_fn).shortest_path
60 | 
61 |     return solver
62 | 
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/warcraft/comb_modules/gurobi_dijkstra.py:
--------------------------------------------------------------------------------
  1 | import networkx as nx
  2 | import gurobipy as gp
  3 | import numpy as np
  4 | 
  5 | # A = nx.adjacency_matrix(G, weight=None).todense()
  6 | # I = np.identity(len(A))
  7 | 
  8 | name_concat = lambda *s: '_'.join( list(map(str, s)) )
  9 | def ILP(matrix):
 10 |     x_max, y_max = matrix.shape
 11 |     print("weight of sink node ",matrix[-1,-1])
 12 |     # row_sum_constraintmat= np.zeros((x_max, x_max*y_max))
 13 |     # col_sum_constraintmat= np.zeros((y_max, x_max*y_max))
 14 |     # for i in range(x_max):
 15 |     #     row_sum_constraintmat[i,i*x_max:((i+1)*x_max)]=1
 16 | 
 17 |     # for j in range(y_max):
 18 |     #     col_sum_constraintmat[j,np.arange(j,x_max*y_max, y_max)]=1    
 19 |     E = []
 20 |     N = [name_concat(x, y) for x in range(x_max) for y in range(y_max)]
 21 |     '''
 22 |     The goal is to create a directed graph with (x_max*y_max) nodes.
 23 |     Each node is connected to its 8 neighbours- (x-1,y), (x-1,y+1),(x,y+1),(x+1,y+1), (x+1,y),(x+1,y-1),
 24 |     (x,y-1),(x-1,y-1). Care is taken for node which does not have 8 neighbours. 
 25 |     '''
 26 |     for i in range(x_max):
 27 |         for j in range(y_max):
 28 |             if (( (x_max-1)> i>0) & ( (y_max-1)> j>0)):
 29 |                 x_minus,x_plus, y_minus, y_plus = -1,2,-1,2
 30 |             elif(i==j==0 ):
 31 |                 x_minus,x_plus, y_minus, y_plus = 0,2,0,2
 32 |             elif ((i==0)&(j==y_max-1)):
 33 |                 x_minus,x_plus, y_minus, y_plus = 0,2,-1,1
 34 |             elif ((i==x_max-1)&(j==0)):
 35 |                 x_minus,x_plus, y_minus, y_plus = -1,1,0,2
 36 |             elif (i==0):
 37 |                 x_minus,x_plus, y_minus, y_plus = 0,2,-1,2
 38 |             elif (j==0):
 39 |                 x_minus,x_plus, y_minus, y_plus = -1,2,0,2            
 40 |             elif ( (i== (x_max -1)) & (j== (y_max-1) )):
 41 |                 x_minus,x_plus, y_minus, y_plus = -1,1,-1,1
 42 |             elif ( (i== (x_max -1))):
 43 |                 x_minus,x_plus, y_minus, y_plus = -1,1,-1,2
 44 |             elif ( (j== (y_max -1))):
 45 |                 x_minus,x_plus, y_minus, y_plus = -1,2,-1,1              
 46 |                 
 47 |             
 48 |                     
 49 |             E.extend([ ( name_concat(i,j), name_concat(i+p,j+q)) for p in range(x_minus,x_plus) 
 50 |                     for q in range(y_minus, y_plus) if ((p!=0)|(q!=0)) ])
 51 |             # E.extend([ ( name_concat(i+p,j+q), name_concat(i,j) ) for p in range(x_minus,x_plus) 
 52 |             #         for q in range(y_minus,y_plus) if ((p!=0)|(q!=0)) ])
 53 | 
 54 | 
 55 |     G =  nx.DiGraph()
 56 |     G.add_nodes_from(N)
 57 |     G.add_edges_from(E)  
 58 | 
 59 |     A = -nx.incidence_matrix(G, oriented=True).todense()
 60 |     A_pos = A.copy()
 61 |     A_pos[A_pos==-1]=0
 62 | 
 63 |     bigM = 1e18
 64 |     
 65 | 
 66 |     b =  np.zeros(len(A))
 67 |     b[0] = 1
 68 |     b[-1] = -1
 69 |     model = gp.Model()
 70 |     model.setParam('OutputFlag', 0)
 71 |     # x = model.addMVar(shape=A.shape[1], vtype=gp.GRB.BINARY, name="x")
 72 |     # z = model.addMVar(shape=A.shape[0], vtype=gp.GRB.BINARY, name="z")
 73 | 
 74 | 
 75 |     x = model.addMVar(shape=A.shape[1],lb=0.0, ub=1.0, vtype=gp.GRB.CONTINUOUS, name="x")
 76 |     z = model.addMVar(shape=A.shape[0],lb=0.0, ub=1.0, vtype=gp.GRB.CONTINUOUS, name="z")
 77 | 
 78 |     # model.addConstr( z[0]==1, name="source")
 79 |     #### force sink node to be 1
 80 |     model.addConstr( z[-1]==1, name="sink")
 81 |   
 82 |     model.addConstr( A@ x == b, name="eq")
 83 |     model.addConstr( A_pos@ x <=  z, name="eq")
 84 |     '''
 85 |     Inequality constraint only for sink nodes, as there is no incoming edge at sink, 
 86 |     sink node variable can't be 1 otherwise. 
 87 |     '''
 88 | 
 89 |     model.setObjective(matrix.flatten() @z, gp.GRB.MINIMIZE)
 90 |     model.optimize()
 91 | 
 92 |     if model.status==2:
 93 |         return z.x.reshape( x_max, y_max )
 94 |     else:
 95 |         print(model.status)
 96 |         model.computeIIS()
 97 |         model.write("infreasible_nodeweightedSP.ilp")
 98 |         raise Exception("Soluion Not found")
 99 | 
100 | 
101 | def ILP_reformulated(matrix):
102 |     x_max, y_max = matrix.shape
103 |     print("weight of sink node ",matrix[-1,-1])
104 |    
105 |     E = [  ( name_concat(x,y,'in'), name_concat(x,y,'out')) for x in range(x_max) for y in range(y_max)   ]  
106 |     N = [name_concat(x, y, s) for x in range(x_max) for y in range(y_max) for s in ['in','out']]
107 |     '''
108 |     The goal is to create a directed graph with (x_max*y_max) nodes.
109 |     Each node is connected to its 8 neighbours- (x-1,y), (x-1,y+1),(x,y+1),(x+1,y+1), (x+1,y),(x+1,y-1),
110 |     (x,y-1),(x-1,y-1). Care is taken for node which does not have 8 neighbours. 
111 |     '''
112 |     for i in range(x_max):
113 |         for j in range(y_max):
114 |             if (( (x_max-1)> i>0) & ( (y_max-1)> j>0)):
115 |                 x_minus,x_plus, y_minus, y_plus = -1,2,-1,2
116 |             elif(i==j==0 ):
117 |                 x_minus,x_plus, y_minus, y_plus = 0,2,0,2
118 |             elif ((i==0)&(j==y_max-1)):
119 |                 x_minus,x_plus, y_minus, y_plus = 0,2,-1,1
120 |             elif ((i==x_max-1)&(j==0)):
121 |                 x_minus,x_plus, y_minus, y_plus = -1,1,0,2
122 |             elif (i==0):
123 |                 x_minus,x_plus, y_minus, y_plus = 0,2,-1,2
124 |             elif (j==0):
125 |                 x_minus,x_plus, y_minus, y_plus = -1,2,0,2            
126 |             elif ( (i== (x_max -1)) & (j== (y_max-1) )):
127 |                 x_minus,x_plus, y_minus, y_plus = -1,1,-1,1
128 |             elif ( (i== (x_max -1))):
129 |                 x_minus,x_plus, y_minus, y_plus = -1,1,-1,2
130 |             elif ( (j== (y_max -1))):
131 |                 x_minus,x_plus, y_minus, y_plus = -1,2,-1,1              
132 | 
133 | 
134 | 
135 |             E.extend([ ( name_concat(i,j,'out'), name_concat(i+p,j+q,'in')) for p in range(x_minus,x_plus) 
136 |                     for q in range(y_minus, y_plus) if ((p!=0)|(q!=0)) ])
137 |             # E.extend([ ( name_concat(i+p,j+q), name_concat(i,j) ) for p in range(x_minus,x_plus) 
138 |             #         for q in range(y_minus,y_plus) if ((p!=0)|(q!=0)) ])
139 |     G =  nx.DiGraph()
140 |     G.add_nodes_from(N)
141 |     G.add_edges_from(E)  
142 | 
143 |     A = -nx.incidence_matrix(G, oriented=True).todense()
144 |     b =  np.zeros(len(A))
145 |     b[0] = 1
146 |     b[-1] = -1
147 | 
148 |     c  = np.zeros(A.shape[1])
149 |     non_zero_edge_idx = [ i for i,k in enumerate( list(G.edges) ) if "_".join(k[0].split("_", 2)[:2]) == "_".join(k[1].split("_", 2)[:2])]
150 |     c[non_zero_edge_idx] =  matrix.flatten()
151 |     print(c[0:10])
152 | 
153 |     print(c[10:20])
154 | 
155 |     print(c[100:120])
156 | 
157 |     print(c[450:480])
158 | 
159 |     model = gp.Model()
160 |     model.setParam('OutputFlag', 0)
161 |     # x = model.addMVar(shape=A.shape[1], vtype=gp.GRB.BINARY, name="x")
162 |     x = model.addMVar(shape=A.shape[1], lb=0.0, ub=1.0, vtype=gp.GRB.CONTINUOUS, name="x")
163 |     model.setObjective(c @x, gp.GRB.MINIMIZE)
164 |     model.addConstr(A @ x == b, name="eq")
165 |     model.optimize()
166 | 
167 |     if model.status==2:
168 |         sol = x.x[non_zero_edge_idx]
169 |         return sol.reshape( x_max, y_max )
170 |     else:
171 |         print(model.status)
172 |         model.computeIIS()
173 |         model.write("infreasible_nodeweightedSP.ilp")
174 |         raise Exception("Soluion Not found")
175 | 


--------------------------------------------------------------------------------
/warcraft/comb_modules/utils.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | import functools
 3 | import numpy as np
 4 | 
 5 | 
 6 | def neighbours_8(x, y, x_max, y_max):
 7 |     deltas_x = (-1, 0, 1)
 8 |     deltas_y = (-1, 0, 1)
 9 |     for (dx, dy) in itertools.product(deltas_x, deltas_y):
10 |         x_new, y_new = x + dx, y + dy
11 |         if 0 <= x_new < x_max and 0 <= y_new < y_max and (dx, dy) != (0, 0):
12 |             yield x_new, y_new
13 | 
14 | 
15 | def neighbours_4(x, y, x_max, y_max):
16 |     for (dx, dy) in [(1, 0), (0, 1), (0, -1), (-1, 0)]:
17 |         x_new, y_new = x + dx, y + dy
18 |         if 0 <= x_new < x_max and 0 <= y_new < y_max and (dx, dy) != (0, 0):
19 |             yield x_new, y_new
20 | 
21 | 
22 | def get_neighbourhood_func(neighbourhood_fn):
23 |     if neighbourhood_fn == "4-grid":
24 |         return neighbours_4
25 |     elif neighbourhood_fn == "8-grid":
26 |         return neighbours_8
27 |     else:
28 |         raise Exception(f"neighbourhood_fn of {neighbourhood_fn} not possible")
29 | 
30 | 
31 | def edges_from_vertex(x, y, N, neighbourhood_fn):
32 |     v = (x, y)
33 |     neighbours = get_neighbourhood_func(neighbourhood_fn)(*v, x_max=N, y_max=N)
34 |     v_edges = [
35 |         (*v, *vn) for vn in neighbours if vertex_index(v, N) < vertex_index(vn, N)
36 |     ]  # Enforce ordering on vertices
37 |     return v_edges
38 | 
39 | 
40 | def vertex_index(v, dim):
41 |     x, y = v
42 |     return x * dim + y
43 | 
44 | 
45 | @functools.lru_cache(32)
46 | def edges_from_grid(N, neighbourhood_fn):
47 |     all_vertices = itertools.product(range(N), range(N))
48 |     all_edges = [edges_from_vertex(x, y, N, neighbourhood_fn=neighbourhood_fn) for x, y in all_vertices]
49 |     all_edges_flat = sum(all_edges, [])
50 |     all_edges_flat_unique = list(set(all_edges_flat))
51 |     return np.asarray(all_edges_flat_unique)
52 | 
53 | 
54 | @functools.lru_cache(32)
55 | def cached_vertex_grid_to_edges_grid_coords(grid_dim: int):
56 |     edges_grid_idxs = edges_from_grid(grid_dim, neighbourhood_fn="4-grid")
57 |     return edges_grid_idxs[:, 0], edges_grid_idxs[:, 1], edges_grid_idxs[:, 2], edges_grid_idxs[:, 3]
58 | 
59 | 
60 | @functools.lru_cache(32)
61 | def cached_vertex_grid_to_edges(grid_dim: int):
62 |     x, y, xn, yn = cached_vertex_grid_to_edges_grid_coords(grid_dim)
63 |     return np.vstack([vertex_index((x, y), grid_dim), vertex_index((xn, yn), grid_dim)]).T
64 | 


--------------------------------------------------------------------------------
/warcraft/config.json:
--------------------------------------------------------------------------------
  1 | [
  2 |     {
  3 |         "model": "baseline",
  4 |         "img_size": 12,
  5 |         "loss": "mse",
  6 |         "lr": 0.001
  7 |     },
  8 |     {
  9 |         "model": "baseline",
 10 |         "img_size": 30,
 11 |         "loss": "mse",
 12 |         "lr": 0.01
 13 |     },
 14 |     {
 15 |         "model": "baseline",
 16 |         "img_size": 24,
 17 |         "loss": "mse",
 18 |         "lr": 0.001
 19 |     },
 20 |     {
 21 |         "model": "baseline",
 22 |         "img_size": 18,
 23 |         "loss": "mse",
 24 |         "lr": 0.0005
 25 |     },
 26 |     {
 27 |         "model": "SPO",
 28 |         "img_size": 12,
 29 |         "lr": 0.005
 30 |     },
 31 |     {
 32 |         "model": "SPO",
 33 |         "img_size": 30,
 34 |         "lr": 0.0005
 35 |     },
 36 |     {
 37 |         "model": "SPO",
 38 |         "img_size": 24,
 39 |         "lr": 0.005
 40 |     },
 41 |     {
 42 |         "model": "SPO",
 43 |         "img_size": 18,
 44 |         "lr": 0.01
 45 |     },
 46 |     {
 47 |         "model": "DBB",
 48 |         "img_size": 12,
 49 |         "lr": 0.001,
 50 |         "lambda_val": 10.0
 51 |     },
 52 |     {
 53 |         "model": "DBB",
 54 |         "img_size": 30,
 55 |         "lr": 0.005,
 56 |         "lambda_val": 10.0
 57 |     },
 58 |     {
 59 |         "model": "DBB",
 60 |         "img_size": 24,
 61 |         "lr": 0.001,
 62 |         "lambda_val": 100.0
 63 |     },
 64 |     {
 65 |         "model": "DBB",
 66 |         "img_size": 18,
 67 |         "lr": 0.001,
 68 |         "lambda_val": 10.0
 69 |     },
 70 |     {
 71 |         "model": "FenchelYoung",
 72 |         "img_size": 12,
 73 |         "lr": 0.01,
 74 |         "sigma": 0.01
 75 |     },
 76 |     {
 77 |         "model": "FenchelYoung",
 78 |         "img_size": 30,
 79 |         "lr": 0.001,
 80 |         "sigma": 0.01
 81 |     },
 82 |     {
 83 |         "model": "FenchelYoung",
 84 |         "img_size": 24,
 85 |         "lr": 0.01,
 86 |         "sigma": 0.01
 87 |     },
 88 |     {
 89 |         "model": "FenchelYoung",
 90 |         "img_size": 18,
 91 |         "lr": 0.01,
 92 |         "sigma": 0.01
 93 |     },
 94 |     {
 95 |         "model": "IMLE",
 96 |         "img_size": 12,
 97 |         "lr": 0.001,
 98 |         "beta": 10.0,
 99 |         "temperature": 0.05,
100 |         "k": 50
101 |     },
102 |     {
103 |         "model": "IMLE",
104 |         "img_size": 30,
105 |         "lr": 0.001,
106 |         "beta": 100.0,
107 |         "temperature": 0.05,
108 |         "k": 50
109 |     },
110 |     {
111 |         "model": "IMLE",
112 |         "img_size": 24,
113 |         "lr": 0.001,
114 |         "beta": 10.0,
115 |         "temperature": 0.05,
116 |         "k": 50
117 |     },
118 |     {
119 |         "model": "IMLE",
120 |         "img_size": 18,
121 |         "lr": 0.01,
122 |         "beta": 10.0,
123 |         "temperature": 0.05,
124 |         "k": 5
125 |     },
126 |     {
127 |         "model": "CachingPO",
128 |         "loss": "MAP_c",
129 |         "img_size": 12,
130 |         "lr": 0.005
131 |     },
132 |     {
133 |         "model": "CachingPO",
134 |         "loss": "MAP_c",
135 |         "img_size": 30,
136 |         "lr": 0.01
137 |     },
138 |     {
139 |         "model": "CachingPO",
140 |         "loss": "MAP_c",
141 |         "img_size": 24,
142 |         "lr": 0.005
143 |     },
144 |     {
145 |         "model": "CachingPO",
146 |         "loss": "MAP_c",
147 |         "img_size": 18,
148 |         "lr": 0.005
149 |     },
150 |     {
151 |         "model": "CachingPO",
152 |         "loss": "pairwise_diff",
153 |         "img_size": 12,
154 |         "lr": 0.005
155 |     },
156 |     {
157 |         "model": "CachingPO",
158 |         "loss": "pairwise_diff",
159 |         "img_size": 30,
160 |         "lr": 0.005
161 |     },
162 |     {
163 |         "model": "CachingPO",
164 |         "loss": "pairwise_diff",
165 |         "img_size": 24,
166 |         "lr": 0.005
167 |     },
168 |     {
169 |         "model": "CachingPO",
170 |         "loss": "pairwise_diff",
171 |         "img_size": 18,
172 |         "lr": 0.005
173 |     },
174 |     {
175 |         "model": "CachingPO",
176 |         "loss": "pairwise",
177 |         "img_size": 12,
178 |         "lr": 0.01,
179 |         "tau": 0.1
180 |     },
181 |     {
182 |         "model": "CachingPO",
183 |         "loss": "pairwise",
184 |         "img_size": 30,
185 |         "lr": 0.01,
186 |         "tau": 0.1
187 |     },
188 |     {
189 |         "model": "CachingPO",
190 |         "loss": "pairwise",
191 |         "img_size": 24,
192 |         "lr": 0.01,
193 |         "tau": 0.1
194 |     },
195 |     {
196 |         "model": "CachingPO",
197 |         "loss": "pairwise",
198 |         "img_size": 18,
199 |         "lr": 0.005,
200 |         "tau": 0.1
201 |     },
202 |     {
203 |         "model": "CachingPO",
204 |         "loss": "listwise",
205 |         "img_size": 12,
206 |         "lr": 0.005,
207 |         "tau": 0.5
208 |     },
209 |     {
210 |         "model": "CachingPO",
211 |         "loss": "listwise",
212 |         "img_size": 30,
213 |         "lr": 0.005,
214 |         "tau": 1.0
215 |     },
216 |     {
217 |         "model": "CachingPO",
218 |         "loss": "listwise",
219 |         "img_size": 24,
220 |         "lr": 0.005,
221 |         "tau": 0.5
222 |     },
223 |     {
224 |         "model": "CachingPO",
225 |         "loss": "listwise",
226 |         "img_size": 18,
227 |         "lr": 0.005,
228 |         "tau": 0.05
229 |     }
230 | ]


--------------------------------------------------------------------------------
/warcraft/data/data_prep.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | pip install --upgrade --no-cache-dir gdown
3 | gdown 16heKlpA9cBq8GXnAtBJgQCYUgV9LQ-Rw
4 | tar -xvzf warcaft-data.tar.gz
5 | rm warcaft-data.tar.gz
6 | 


--------------------------------------------------------------------------------
/warcraft/imle/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PredOpt/predopt-benchmarks/1a8e048c5aa640f73a05e29878b8e1e8f6f73610/warcraft/imle/__init__.py


--------------------------------------------------------------------------------
/warcraft/imle/noise.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import math
 4 | 
 5 | import torch
 6 | from torch import Tensor, Size
 7 | from torch.distributions.gamma import Gamma
 8 | 
 9 | from abc import ABC, abstractmethod
10 | 
11 | from typing import Optional
12 | 
13 | import logging
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | class BaseNoiseDistribution(ABC):
19 |     def __init__(self):
20 |         super().__init__()
21 | 
22 |     @abstractmethod
23 |     def sample(self,
24 |                shape: Size) -> Tensor:
25 |         raise NotImplementedError
26 | 
27 | 
28 | class SumOfGammaNoiseDistribution(BaseNoiseDistribution):
29 |     r"""
30 |     Creates a generator of samples for the Sum-of-Gamma distribution [1], parameterized
31 |     by :attr:`k`, :attr:`nb_iterations`, and :attr:`device`.
32 | 
33 |     [1] Mathias Niepert, Pasquale Minervini, Luca Franceschi - Implicit MLE: Backpropagating Through Discrete
34 |     Exponential Family Distributions. NeurIPS 2021 (https://arxiv.org/abs/2106.01798)
35 | 
36 |     Example::
37 | 
38 |         >>> import torch
39 |         >>> noise_distribution = SumOfGammaNoiseDistribution(k=5, nb_iterations=100)
40 |         >>> noise_distribution.sample(torch.Size([5]))
41 |         tensor([ 0.2504,  0.0112,  0.5466,  0.0051, -0.1497])
42 | 
43 |     Args:
44 |         k (float): k parameter -- see [1] for more details.
45 |         nb_iterations (int): number of iterations for estimating the sample.
46 |         device (torch.devicde): device where to store samples.
47 |     """
48 |     def __init__(self,
49 |                  k: float,
50 |                  nb_iterations: int = 10,
51 |                  device: Optional[torch.device] = None):
52 |         super().__init__()
53 |         self.k = k
54 |         self.nb_iterations = nb_iterations
55 |         self.device = device
56 | 
57 |     def sample(self,
58 |                shape: Size) -> Tensor:
59 |         samples = torch.zeros(size=shape, device=self.device)
60 |         for i in range(1, self.nb_iterations + 1):
61 |             concentration = torch.tensor(1. / self.k, device=self.device)
62 |             rate = torch.tensor(i / self.k, device=self.device)
63 | 
64 |             gamma = Gamma(concentration=concentration, rate=rate)
65 |             samples = samples + gamma.sample(sample_shape=shape).to(self.device)
66 |         samples = (samples - math.log(self.nb_iterations)) / self.k
67 |         return samples.to(self.device)
68 | 


--------------------------------------------------------------------------------
/warcraft/imle/target.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from torch import Tensor
 4 | from abc import ABC, abstractmethod
 5 | 
 6 | import logging
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class BaseTargetDistribution(ABC):
12 |     def __init__(self):
13 |         super().__init__()
14 | 
15 |     @abstractmethod
16 |     def params(self,
17 |                theta: Tensor,
18 |                dy: Tensor) -> Tensor:
19 |         raise NotImplementedError
20 | 
21 | 
22 | class TargetDistribution(BaseTargetDistribution):
23 |     r"""
24 |     Creates a generator of target distributions parameterized by :attr:`alpha` and :attr:`beta`.
25 | 
26 |     Example::
27 | 
28 |         >>> import torch
29 |         >>> target_distribution = TargetDistribution(alpha=1.0, beta=1.0)
30 |         >>> target_distribution.params(theta=torch.tensor([1.0]), dy=torch.tensor([1.0]))
31 |         tensor([2.])
32 | 
33 |     Args:
34 |         alpha (float): weight of the initial distribution parameters theta
35 |         beta (float): weight of the downstream gradient dy
36 |     """
37 |     def __init__(self,
38 |                  alpha: float = 1.0,
39 |                  beta: float = 1.0):
40 |         super().__init__()
41 |         self.alpha = alpha
42 |         self.beta = beta
43 | 
44 |     def params(self,
45 |                theta: Tensor,
46 |                dy: Tensor) -> Tensor:
47 |         theta_prime = self.alpha * theta - self.beta * dy
48 |         return theta_prime
49 | 


--------------------------------------------------------------------------------
/warcraft/readme.md:
--------------------------------------------------------------------------------
 1 | This directory corresponds to the Warcraft shortest path problem/
 2 | 
 3 | To download the data go inside the data folder and run 
 4 | ```
 5 | ./data_prep.sh
 6 | ```
 7 | This wil download and preprocess the data.
 8 | 
 9 | You may download the data directly from [https://doi.org/10.48804/KT2P3Z](https://doi.org/10.48804/KT2P3Z) and extract  by running `tar -xvzf warcraft-data.tar.gz`.
10 | 
11 | 
12 | Then an experiment can be run using the `TestWarcraft.py` file.
13 | To reproduce the result of expriements run
14 | ```
15 | python TestWarcraft.py
16 | ```
17 | 


--------------------------------------------------------------------------------