├── .gitignore
├── LICENSE
├── README.md
├── activetesting
    ├── __init__.py
    ├── acquisition.py
    ├── datasets.py
    ├── experiment.py
    ├── hoover.py
    ├── loss.py
    ├── models
    │   ├── __init__.py
    │   ├── cnn
    │   │   ├── .gitignore
    │   │   ├── models.py
    │   │   ├── resnet_custom.py
    │   │   └── wide_resnet.py
    │   ├── efficient_models.py
    │   ├── radial_bnn
    │   │   ├── __init__.py
    │   │   ├── bnn_models.py
    │   │   ├── consistent_mc_dropout.py
    │   │   └── radial_layers
    │   │   │   ├── __init__.py
    │   │   │   ├── distributions.py
    │   │   │   ├── loss.py
    │   │   │   └── variational_bayes.py
    │   ├── sk2torch.py
    │   ├── skmodels.py
    │   └── torchmodels.py
    ├── plotting
    │   ├── __init__.py
    │   ├── paths.py
    │   └── utils.py
    ├── risk_estimators.py
    ├── utils
    │   └── maps.py
    └── visualize.py
├── conf
    ├── config.yaml
    ├── hydra
    │   └── default.yaml
    └── paper
    │   ├── LargeCIFAR100ResNet.yaml
    │   ├── LargeCIFAR100WideResNet.yaml
    │   ├── LargeCIFAR10ResNet.yaml
    │   ├── LargeCIFAR10ResNetAccuracy.yaml
    │   ├── LargeFMNISTBNN.yaml
    │   ├── LargeFMNISTResNet.yaml
    │   ├── LargeMNISTBNN.yaml
    │   ├── SmallFMNISTResNet.yaml
    │   ├── SmallMNISTBNN.yaml
    │   ├── SyntheticGPGP.yaml
    │   ├── SyntheticQuadraticLinear.yaml
    │   └── SyntheticTwoMoonsRF.yaml
├── main.py
├── notebooks
    ├── explore_experiment.ipynb
    └── plots_paper.ipynb
├── outputs
    └── animation.gif
├── reproduce
    └── experiments
    │   ├── figure-123.sh
    │   ├── figure-4.sh
    │   ├── figure-5.sh
    │   ├── figure-6.sh
    │   └── figure-7.sh
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Plots
  2 | notebooks/plots
  3 | 
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # MacOs
 11 | .DS_Store
 12 | 
 13 | # C extensions
 14 | *.so
 15 | 
 16 | # VSCode
 17 | .vscode/
 18 | 
 19 | # Hydra
 20 | outputs/
 21 | 
 22 | # Data
 23 | data/
 24 | 
 25 | # Distribution / packaging
 26 | .Python
 27 | build/
 28 | develop-eggs/
 29 | dist/
 30 | downloads/
 31 | eggs/
 32 | .eggs/
 33 | lib/
 34 | lib64/
 35 | parts/
 36 | sdist/
 37 | var/
 38 | wheels/
 39 | pip-wheel-metadata/
 40 | share/python-wheels/
 41 | *.egg-info/
 42 | .installed.cfg
 43 | *.egg
 44 | MANIFEST
 45 | 
 46 | # PyInstaller
 47 | #  Usually these files are written by a python script from a template
 48 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 49 | *.manifest
 50 | *.spec
 51 | 
 52 | # Installer logs
 53 | pip-log.txt
 54 | pip-delete-this-directory.txt
 55 | 
 56 | # Unit test / coverage reports
 57 | htmlcov/
 58 | .tox/
 59 | .nox/
 60 | .coverage
 61 | .coverage.*
 62 | .cache
 63 | nosetests.xml
 64 | coverage.xml
 65 | *.cover
 66 | *.py,cover
 67 | .hypothesis/
 68 | .pytest_cache/
 69 | 
 70 | # Translations
 71 | *.mo
 72 | *.pot
 73 | 
 74 | # Django stuff:
 75 | *.log
 76 | local_settings.py
 77 | db.sqlite3
 78 | db.sqlite3-journal
 79 | 
 80 | # Flask stuff:
 81 | instance/
 82 | .webassets-cache
 83 | 
 84 | # Scrapy stuff:
 85 | .scrapy
 86 | 
 87 | # Sphinx documentation
 88 | docs/_build/
 89 | 
 90 | # PyBuilder
 91 | target/
 92 | 
 93 | # Jupyter Notebook
 94 | .ipynb_checkpoints
 95 | 
 96 | # IPython
 97 | profile_default/
 98 | ipython_config.py
 99 | 
100 | # pyenv
101 | .python-version
102 | 
103 | # pipenv
104 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
105 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
106 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
107 | #   install all needed dependencies.
108 | #Pipfile.lock
109 | 
110 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
111 | __pypackages__/
112 | 
113 | # Celery stuff
114 | celerybeat-schedule
115 | celerybeat.pid
116 | 
117 | # SageMath parsed files
118 | *.sage.py
119 | 
120 | # Environments
121 | .env
122 | .venv
123 | env/
124 | venv/
125 | ENV/
126 | env.bak/
127 | venv.bak/
128 | 
129 | # Spyder project settings
130 | .spyderproject
131 | .spyproject
132 | 
133 | # Rope project settings
134 | .ropeproject
135 | 
136 | # mkdocs documentation
137 | /site
138 | 
139 | # mypy
140 | .mypy_cache/
141 | .dmypy.json
142 | dmypy.json
143 | 
144 | # Pyre type checker
145 | .pyre/
146 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Jannik Kossen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Active Testing: Sample-Efficient Model Evaluation
 2 | 
 3 | Hi, good to see you here! 👋
 4 | 
 5 | This is code for "Active Testing: Sample-Efficient Model Evaluation".
 6 | 
 7 | Please cite our paper, if you find this helpful:
 8 | 
 9 | ```
10 | @article{kossen2021active,
11 |   title={{A}ctive {T}esting: {S}ample-{E}fficient {M}odel {E}valuation},
12 |   author={Kossen, Jannik and Farquhar, Sebastian and Gal, Yarin and Rainforth, Tom},
13 |   journal={arXiv:2103.05331},
14 |   year={2021}
15 | }
16 | ```
17 | 
18 | ![animation](outputs/animation.gif)
19 | 
20 | ## Setup
21 | 
22 | The `requirements.txt` can be used to set up a python environment for this codebase.
23 | You can do this, for example, with `conda`:
24 | 
25 | ```
26 | conda create -n isactive python=3.8
27 | conda activate isactive
28 | pip install -r requirements.txt
29 | ```
30 | 
31 | ## Reproducing the Experiments
32 | 
33 | * To reproduce a figure of the paper, first run the appropriate experiments
34 | ```
35 | sh reproduce/experiments/figure-X.sh
36 | ```
37 | * And then create the plots with the Jupyter Notebook at
38 | ```
39 | notebooks/plots_paper.ipynb
40 | ```
41 | * (The notebook let's you conveniently select which plots to recreate.)
42 | 
43 | * Which should put plots into `notebooks/plots/`.
44 | 
45 | * In the above, replace `X` by 
46 |     * `123` for Figures 1, 2, 3
47 |     * `4` for Figure 4
48 |     * `5` for Figure 5
49 |     * `6` for Figure 6
50 |     * `7` for Figure 7
51 | 
52 | * Other notes
53 |     * Synthetic data experiments do not require GPUs and should run on pretty much all recent hardware.
54 |     * All other plots, realistically speaking, require GPUs.
55 |     * We are also happy to share a 4 GB file with results from all experiments presented in the paper.
56 |     * You may want to produce plots 7 and 8 for other experiment setups than the one in the paper, i.e. ones you already have computed.
57 |     * Some experiments, e.g. those for Figures 4 or 6, may run a really long time on a single GPU. It may be good to
58 |         * execute the scripts in the sh-files in parallel on multiple GPUs.
59 |         * start multiple runs in parallel and then combine experiments. (See below).
60 |         * end the runs early /  decrease number of total runs (this can be very reasonable -- look at the config files in `conf/paper` to modify this property)
61 |     * If you want to understand the code, below we give a good strategy for approaching it. (Also start with synthetic data experiments. They have less complex code!)
62 | 
63 | 
64 | ## Running A Custom Experiment
65 | 
66 | * `main.py` is the main entry point into this code-base.
67 |     * It executes a a total of  `n_runs` active testing experiments for a fixed setup.
68 |     * Each experiment:
69 |         * Trains (or loads) one main model.
70 |         * This model can then be evaluated with a variety of acquisition strategies.
71 |         * Risk estimates are then computed for points/weights from all acquisition strategies for all risk estimators.
72 | 
73 | * This repository uses `Hydra` to manage configs.
74 |     * Look at `conf/config.yaml` or one of the experiments in `conf/...` for default configs and hyperparameters.
75 |     * Experiments are autologged and results saved to `./output/`.
76 | 
77 | * See `notebooks/eplore_experiment.ipynb` for some example code on how to evaluate custom experiments.
78 |     * The evaluations use `activetesting.visualize.Visualiser` which implements visualisation methods.
79 |     * Give it a `path` to an experiment in `output/path/to/experiment` and explore the methods.
80 |     * If you want to combine data from multiple runs, give it a list of paths.
81 |     * I prefer to load this in Jupyter Notebooks, but hey, everybody's different.
82 | 
83 | * A guide to the code
84 |     * `main.py` runs repeated experiments and orchestrates the whole shebang.
85 |         * It iterates through all `n_runs` and `acquisition strategies`.
86 |     * `experiment.py` handles a single experiment.
87 |         * It combines the `model`, `dataset`, `acquisition strategy`, and `risk estimators`.
88 |     * `datasets.py`, `aquisition.py`, `loss.py`, `risk_estimators.py` all contain exactly what you would expect!
89 |     * `hoover.py` is a logging module.
90 |     * `models/` contains all models, scikit-learn and pyTorch.
91 |         * In `sk2torch.py` we have some code that wraps torch models in a way that lets them be used as scikit-learn models from the outside.
92 | 
93 | ## And Finally
94 | 
95 | Thanks for stopping by!
96 | 
97 | If you find anything wrong with the code, please contact us.
98 | 
99 | We are happy to answer any questions related to the code and project.


--------------------------------------------------------------------------------
/activetesting/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jlko/active-testing/2807fac9ad91fb12e5814e71b9a26c55df9d50fb/activetesting/__init__.py


--------------------------------------------------------------------------------
/activetesting/acquisition.py:
--------------------------------------------------------------------------------
  1 | """Implement acquisition functions."""
  2 | 
  3 | import warnings
  4 | import numpy as np
  5 | from omegaconf import OmegaConf
  6 | 
  7 | from activetesting.models import (
  8 |     SVMClassifier, RandomForestClassifier, GPClassifier,
  9 |     GaussianProcessRegressor, RandomDirectionRandomForestClassifier,
 10 |     RadialBNN, make_efficient
 11 |     )
 12 | 
 13 | 
 14 | class AcquisitionFunction:
 15 |     """Acquisition function is its own class.
 16 | 
 17 |     In the beginning this may seem like overkill, but remember that our
 18 |     acquisition function will likely have a powerfull substitute model.
 19 | 
 20 |     Implement get_next_point
 21 |     """
 22 |     def __init__(self, cfg_run, dataset):
 23 |         self.cfg, run = cfg_run
 24 |         self.dataset = dataset
 25 |         # keep track of acquisition weights
 26 |         self.weights = np.array([])
 27 | 
 28 |         if self.cfg.animate and run < self.cfg.animate_until:
 29 |             self.all_pmfs = list()
 30 |         else:
 31 |             self.all_pmfs = None
 32 | 
 33 |         self.counter = 0
 34 | 
 35 |         if self.cfg.lazy_save:
 36 | 
 37 |             if L := self.cfg.get('lazy_save_schedule', False):
 38 |                 L = list(L)
 39 |             else:
 40 |                 L = list(range(1000))
 41 |                 L += list(range(int(1e3), int(1e4), 500))
 42 |                 L += list(range(int(1e4), int(10e4), int(1e3)))
 43 | 
 44 |             self.lazy_list = L
 45 | 
 46 |         # For model selection hot-patching.
 47 |         self.externally_controlled = False
 48 |         self.ext_test_idx = None
 49 |         self.ext_pmf_idx = None
 50 | 
 51 |     @staticmethod
 52 |     def acquire():
 53 |         raise NotImplementedError
 54 | 
 55 |     def check_save(self, off=0):
 56 |         if self.all_pmfs is None:
 57 |             return False
 58 |         if self.cfg.lazy_save and (self.counter - off in self.lazy_list):
 59 |             return True
 60 |         else:
 61 |             return False
 62 | 
 63 |         return True
 64 | 
 65 |     def sample_pmf(self, pmf):
 66 |         """Sample from pmf."""
 67 | 
 68 |         if len(pmf) == 1:
 69 |             # Always choose last datum
 70 |             pmf = [1]
 71 | 
 72 |         if self.externally_controlled:
 73 |             idx = self.ext_pmf_idx
 74 |             test_idx = self.ext_test_idx
 75 | 
 76 |         else:
 77 |             if self.cfg['sample']:
 78 |                 # this is one-hot over all remaining test data
 79 |                 sample = np.random.multinomial(1, pmf)
 80 |                 # idx in test_remaining
 81 |                 idx = np.where(sample)[0][0]
 82 |             else:
 83 |                 idx = np.argmax(pmf)
 84 | 
 85 |             # get index of chosen test datum
 86 |             test_idx = self.dataset.test_remaining[idx]
 87 | 
 88 |         # get value of acquisition function at that index
 89 |         self.weights = np.append(
 90 |             self.weights, pmf[idx])
 91 | 
 92 |         if self.check_save():
 93 |             self.all_pmfs.append(dict(
 94 |                 idx=idx,
 95 |                 test_idx=test_idx,
 96 |                 pmf=pmf,
 97 |                 remaining=self.dataset.test_remaining,
 98 |                 observed=self.dataset.test_observed))
 99 | 
100 |         self.counter += 1
101 |         return test_idx, idx
102 | 
103 |     @staticmethod
104 |     def safe_normalise(pmf):
105 |         """If loss is 0, we want to sample uniform and avoid nans."""
106 | 
107 |         if (Σ := pmf.sum()) != 0:
108 |             pmf /= Σ
109 |         else:
110 |             pmf = np.ones(len(pmf))/len(pmf)
111 | 
112 |         return pmf
113 | 
114 | 
115 | class RandomAcquisition(AcquisitionFunction):
116 |     def __init__(self, cfg, dataset, *args, **kwargs):
117 |         super().__init__(cfg, dataset)
118 | 
119 |     def acquire(self):
120 |         n_remaining = len(self.dataset.test_remaining)
121 |         pmf = np.ones(n_remaining)/n_remaining
122 |         return self.sample_pmf(pmf)
123 | 
124 | 
125 | class TrueLossAcquisition(AcquisitionFunction):
126 |     def __init__(self, cfg, dataset, true_loss_vals, *args, **kwargs):
127 |         super().__init__(cfg, dataset)
128 | 
129 |         # make sure indexes are aligned
130 |         self.true_loss = np.zeros(dataset.N)
131 |         self.true_loss[self.dataset.test_idxs] = true_loss_vals
132 | 
133 |     def acquire(self):
134 |         """Sample according to true loss dist."""
135 | 
136 |         pmf = self.true_loss[self.dataset.test_remaining]
137 | 
138 |         pmf = self.safe_normalise(pmf)
139 | 
140 |         return self.sample_pmf(pmf)
141 | 
142 | 
143 | class DistanceBasedAcquisition(AcquisitionFunction):
144 |     def __init__(self, cfg, dataset, *args, **kwargs):
145 |         super().__init__(cfg, dataset)
146 | 
147 |     def acquire(self):
148 |         """Sample according to distance to previously sampled points."""
149 |         remaining_idx = self.dataset.test_remaining
150 |         observed_idx = self.dataset.test_observed
151 | 
152 |         # First test index sampled at random
153 |         if observed_idx.size == 0:
154 |             N = len(self.dataset.test_idxs)
155 |             pmf = np.ones(N) / N
156 | 
157 |         else:
158 |             # For each point in remaining
159 |             # calculate distance to all points in observed
160 |             remaining = self.dataset.x[remaining_idx]
161 |             observed = self.dataset.x[observed_idx]
162 | 
163 |             # broadcasting to get all paired differences
164 |             d = remaining[:, np.newaxis, :] - observed
165 |             d = d**2
166 |             # sum over feature dimension
167 |             d = d.sum(-1)
168 |             # sqrt to get distance
169 |             d = np.sqrt(d)
170 |             # mean over other pairs
171 |             distances = d.mean(1)
172 | 
173 |             # Constract PDF via softmax
174 |             pmf = np.exp(distances)
175 |             pmf /= pmf.sum()
176 | 
177 |         return self.sample_pmf(pmf)
178 | 
179 | 
180 | # --- Acquisition Functions Based on Expected Loss
181 | 
182 | class _LossAcquisitionBase(AcquisitionFunction):
183 |     def __init__(self, cfg, dataset, model):
184 |         super().__init__(cfg, dataset)
185 | 
186 |         # also save original model
187 |         self.model = model
188 | 
189 |     def acquire(self):
190 |         # predict + std for both models on all remaining test points
191 |         remaining_idxs = self.dataset.test_remaining
192 |         remaining_data = self.dataset.x[remaining_idxs]
193 | 
194 |         # build expected loss
195 |         expected_loss = self.expected_loss(remaining_data, remaining_idxs)
196 | 
197 |         if self.cfg['sample'] and (expected_loss < 0).sum() > 0:
198 |             # Log-lik can be negative.
199 |             # Make all values positive.
200 |             # Alternatively could set <0 values to 0.
201 |             expected_loss += np.abs(expected_loss.min())
202 | 
203 |         if not (expected_loss.sum() == 0):
204 |             expected_loss /= expected_loss.sum()
205 | 
206 |         if self.cfg.get('uniform_clip', False):
207 |             # clip all values less than 10 percent of uniform propability
208 |             p = self.cfg['uniform_clip_val']
209 |             expected_loss = np.maximum(p * 1/expected_loss.size, expected_loss)
210 |             expected_loss /= expected_loss.sum()
211 | 
212 |         return self.sample_pmf(expected_loss)
213 | 
214 | 
215 | class GPAcquisitionUncertainty(_LossAcquisitionBase):
216 |     # warning = (
217 |     #     'GPAcquisitionUncertainty is currently only appropriate if '
218 |     #     'the aleatoric uncertainty is 0.')
219 | 
220 |     def __init__(self, cfg, dataset, model, **kwargs):
221 | 
222 |         super().__init__(cfg, dataset, model)
223 | 
224 |     def expected_loss(self, remaining_data, remaining_idxs):
225 | 
226 |         mu, std = self.model.predict(
227 |             remaining_data, idxs=remaining_idxs, return_std=True)
228 | 
229 |         aleatoric = getattr(self.dataset, 'aleatoric', 0)
230 | 
231 |         return std**2 + aleatoric**2
232 | 
233 | 
234 | class BNNClassifierAcquisitionMI(_LossAcquisitionBase):
235 |     # warning = (
236 |     #     'GPAcquisitionUncertainty is currently only appropriate if '
237 |     #     'the aleatoric uncertainty is 0.')
238 | 
239 |     def __init__(self, cfg, dataset, model, **kwargs):
240 | 
241 |         super().__init__(cfg, dataset, model)
242 | 
243 |     def expected_loss(self, remaining_data, remaining_idxs):
244 | 
245 |         mutual_information = self.model.predict(
246 |             remaining_data, idxs=remaining_idxs, mutual_info=True)
247 | 
248 |         return mutual_information
249 | 
250 | 
251 | class _SurrogateAcquisitionBase(_LossAcquisitionBase):
252 |     def __init__(self, cfg_run, dataset, model, SurrModel, surr_cfg):
253 |         if surr_cfg.get('acquisition', False):
254 |             # the surrogate acquisition can specialise the
255 |             # acquisition configs. this mostly affects clipping behaviour
256 |             cfg = OmegaConf.merge(
257 |                 OmegaConf.structured(cfg_run[0]),
258 |                 OmegaConf.structured(surr_cfg.acquisition))
259 |             cfg_run = [cfg, cfg_run[1]]
260 | 
261 |         super().__init__(cfg_run, dataset, model)
262 | 
263 |         self.surr_cfg = surr_cfg
264 |         self.surr_class = SurrModel
265 | 
266 |         self.surr_model = SurrModel(surr_cfg)
267 |         self.surr_model.fit(*self.dataset.total_observed)
268 | 
269 |         if self.surr_cfg.get('efficient', False):
270 |             # make efficient predictions on remaining test data
271 |             self.surr_model = make_efficient(self.surr_model, self.dataset)
272 | 
273 |         if surr_cfg.get('lazy', False):
274 |             if (sched := surr_cfg.get('lazy_schedule', False)) is not False:
275 |                 retrain = list(sched)
276 |             else:
277 |                 retrain = [5]
278 |                 retrain += list(range(10, 50, 10))
279 |                 retrain += [50]
280 |                 retrain += list(range(100, 1000, 150))
281 |                 retrain += list(range(1000, 10000, 2000))
282 |                 retrain += list(range(int(10e3), int(100e3), int(10e3)))
283 | 
284 |             # always remove 0, since we train at it 0
285 |             self.retrain = list(set(retrain) - {0})
286 |             self.update_surrogate = self.lazy_update_surrogate
287 |         else:
288 |             self.update_surrogate = self.vanilla_update_surrogate
289 | 
290 |     def vanilla_update_surrogate(self):
291 |         # train surrogate on train data + currently observed test
292 |         self.surr_model = self.surr_class(self.surr_cfg)
293 | 
294 |         if self.surr_cfg.get('on_train_only', False):
295 |             self.surr_model.fit(*self.dataset.train_data)
296 |         else:
297 |             # fit on all observed data
298 |             self.surr_model.fit(*self.dataset.total_observed)
299 | 
300 |         if self.surr_cfg.get('efficient', False):
301 |             self.surr_model = make_efficient(self.surr_model, self.dataset)
302 | 
303 |     def lazy_update_surrogate(self):
304 | 
305 |         if self.counter in self.retrain:
306 |             self.surr_model = self.surr_class(self.surr_cfg)
307 | 
308 |             if self.surr_cfg.get('on_train_only', False):
309 |                 self.surr_model.fit(*self.dataset.train_data)
310 |             else:
311 |                 # fit on all observed data
312 |                 self.surr_model.fit(*self.dataset.total_observed)
313 | 
314 |             if self.surr_cfg.get('efficient', False):
315 |                 # make efficient predictions on remaining test data
316 |                 self.surr_model = make_efficient(self.surr_model, self.dataset)
317 | 
318 |     def acquire(self):
319 | 
320 |         self.update_surrogate()
321 | 
322 |         return super().acquire()
323 | 
324 | 
325 | class _SelfSurrogateAcquisitionBase(_SurrogateAcquisitionBase):
326 | 
327 |     def __init__(self, cfg, dataset, model, model_cfg):
328 |         from activetesting.utils.maps import model as model_maps
329 |         SurrModel = model_maps[model.cfg['name']]
330 | 
331 |         super().__init__(cfg, dataset, model, SurrModel, model_cfg)
332 | 
333 | 
334 | class SelfSurrogateAcquisitionEntropy(
335 |         _SelfSurrogateAcquisitionBase):
336 | 
337 |     def __init__(self, cfg, dataset, model, model_cfg, *args, **kwargs):
338 |         super().__init__(cfg, dataset, model, model_cfg)
339 | 
340 |     def expected_loss(self, remaining_data, remaining_idxs):
341 |         return entropy_loss(
342 |             remaining_data, remaining_idxs, self.model, self.surr_model)
343 | 
344 | 
345 | class SelfSurrogateAcquisitionAccuracy(
346 |         _SelfSurrogateAcquisitionBase):
347 | 
348 |     def __init__(self, cfg, dataset, model, model_cfg, *args, **kwargs):
349 |         super().__init__(cfg, dataset, model, model_cfg)
350 | 
351 |     def expected_loss(self, remaining_data, remaining_idxs):
352 |         return accuracy_loss(
353 |             remaining_data, remaining_idxs, self.model, self.surr_model)
354 | 
355 | 
356 | class _AnySurrogateAcquisitionBase(_SurrogateAcquisitionBase):
357 | 
358 |     def __init__(self, cfg, dataset, model, model_cfg):
359 |         from activetesting.utils.maps import model as model_maps
360 |         SurrModel = model_maps[model_cfg.name]
361 |         super().__init__(cfg, dataset, model, SurrModel, model_cfg)
362 | 
363 | 
364 | class AnySurrogateAcquisitionEntropy(
365 |         _AnySurrogateAcquisitionBase):
366 | 
367 |     def __init__(self, cfg, dataset, model, model_cfg, *args, **kwargs):
368 |         super().__init__(cfg, dataset, model, model_cfg)
369 | 
370 |     def expected_loss(self, remaining_data, remaining_idxs):
371 |         return entropy_loss(
372 |             remaining_data, remaining_idxs, self.model, self.surr_model,
373 |             cfg=self.cfg)
374 | 
375 | 
376 | class AnySurrogateAcquisitionAccuracy(
377 |         _AnySurrogateAcquisitionBase):
378 | 
379 |     def __init__(self, cfg, dataset, model, model_cfg, *args, **kwargs):
380 |         super().__init__(cfg, dataset, model, model_cfg)
381 | 
382 |     def expected_loss(self, remaining_data, remaining_idxs):
383 |         return accuracy_loss(
384 |             remaining_data, remaining_idxs, self.model, self.surr_model)
385 | 
386 | 
387 | class _GPSurrogateAcquisitionBase(_SurrogateAcquisitionBase):
388 |     def __init__(self, cfg, dataset, model, model_cfg):
389 |         super().__init__(
390 |             cfg, dataset, model, GaussianProcessRegressor, model_cfg)
391 | 
392 | 
393 | class GPSurrogateAcquisitionLogLik(_GPSurrogateAcquisitionBase):
394 |     warning = (
395 |         'GPSurrogateAcquisitionLogLik only works if aleatoric noise 0.')
396 |     # warnings.warn(warning)
397 | 
398 |     def __init__(self, cfg, dataset, model, model_cfg, **kwargs):
399 | 
400 |         super().__init__(cfg, dataset, model, model_cfg)
401 | 
402 |     def expected_loss(self, remaining_data, remaining_idxs, *args, **kwargs):
403 | 
404 |         std = dict(return_std=True)
405 |         mu_s, std_s = self.surr_model.predict(
406 |             remaining_data, idxs=remaining_idxs, **std)
407 |         mu_m, std_m = self.model.predict(
408 |             remaining_data, idxs=remaining_idxs, **std)
409 | 
410 |         # temporary fix for log lik acquisition
411 |         aleatoric = 0
412 | 
413 |         expected_loss = (
414 |             np.log(2*np.pi*std_m**2)
415 |             + 1/(2*std_m**2) * (
416 |                 (mu_s - mu_m)**2 + std_s**2 + aleatoric**2)
417 |         )
418 | 
419 |         return expected_loss
420 | 
421 | 
422 | class GPSurrogateAcquisitionMSE(_GPSurrogateAcquisitionBase):
423 |     warning = (
424 |         'GPSurrogateAcquisitionMSE is currently only appropriate if '
425 |         'the aleatoric uncertainty is 0.')
426 |     # warnings.warn(warning)
427 | 
428 |     def __init__(self, cfg, dataset, model, model_cfg, **kwargs):
429 | 
430 |         super().__init__(cfg, dataset, model, model_cfg)
431 | 
432 |     def expected_loss(self, remaining_data, remaining_idxs):
433 | 
434 |         mu_s, std_s = self.surr_model.predict(
435 |             remaining_data, idxs=remaining_idxs, return_std=True)
436 |         mu_m = self.model.predict(remaining_data, idxs=remaining_idxs)
437 | 
438 |         # each model needs to have this set
439 |         # TODO: should probs be a data property
440 |         # (move reliance on main model out of GPDataset)
441 |         aleatoric = getattr(self.dataset, 'aleatoric', 0)
442 | 
443 |         expected_loss = (mu_s - mu_m)**2 + std_s**2 + aleatoric**2
444 | 
445 |         # print('mse/var', (((mu_s - mu_m)/std_s)**2).mean(),
446 |         #       ((mu_s - mu_m)**2).std(), (std_s**2).std())
447 | 
448 |         if self.cfg.get('clip', False):
449 |             clip_val = 0.05 * np.max(expected_loss)
450 |             if clip_val < 1e-10:
451 |                 warnings.warn('All loss values small!')
452 | 
453 |             expected_loss = np.maximum(clip_val, expected_loss)
454 | 
455 |         return expected_loss
456 | 
457 | 
458 | class GPSurrogateAcquisitionMSEDoublyUncertain(_GPSurrogateAcquisitionBase):
459 |     warning = (
460 |         'GPSurrogateAcquisitionMSEDoublyUncertain is currently only '
461 |         'appropriate if the aleatoric uncertainty is 0.')
462 |     # warnings.warn(warning)
463 | 
464 |     def __init__(self, cfg, dataset, model, model_cfg, **kwargs):
465 | 
466 |         super().__init__(cfg, dataset, model, model_cfg)
467 |         self.model_cfg = model_cfg
468 | 
469 |     def expected_loss(self, remaining_data, remaining_idxs):
470 | 
471 |         mu_s, std_s = self.surr_model.predict(remaining_data, return_std=True)
472 |         mu_m, std_m = self.model.predict(
473 |             remaining_data, idxs=remaining_idxs, return_std=True)
474 | 
475 |         expected_loss = (mu_s - mu_m)**2 + std_s**2 + std_m**2
476 | 
477 |         return expected_loss
478 | 
479 | 
480 | class ClassifierAcquisitionEntropy(_LossAcquisitionBase):
481 | 
482 |     def __init__(self, cfg, dataset, model, model_cfg, *args, **kwargs):
483 |         super().__init__(cfg, dataset, model)
484 |         self.T = model_cfg.get('temperature', None)
485 | 
486 |     def expected_loss(self, remaining_data, remaining_idxs):
487 |         return entropy_loss(
488 |             remaining_data, remaining_idxs, self.model, None, T=self.T,
489 |             cfg=self.cfg)
490 | 
491 | 
492 | class ClassifierAcquisitionAccuracy(_LossAcquisitionBase):
493 | 
494 |     def __init__(self, cfg, dataset, model, model_cfg, *args, **kwargs):
495 |         super().__init__(cfg, dataset, model)
496 | 
497 |     def expected_loss(self, remaining_data, remaining_idxs):
498 |         return accuracy_loss(
499 |             remaining_data, remaining_idxs, self.model, None)
500 | 
501 | 
502 | class _RandomForestSurrogateAcquisitionBase(_SurrogateAcquisitionBase):
503 | 
504 |     def __init__(self, cfg, dataset, model, model_cfg):
505 |         super().__init__(
506 |             cfg, dataset, model, RandomForestClassifier, model_cfg)
507 | 
508 | 
509 | class RandomForestClassifierSurrogateAcquisitionEntropy(
510 |         _RandomForestSurrogateAcquisitionBase):
511 | 
512 |     def __init__(self, cfg, dataset, model, model_cfg, *args, **kwargs):
513 |         super().__init__(cfg, dataset, model, model_cfg)
514 | 
515 |     def expected_loss(self, remaining_data, remaining_idxs):
516 |         return entropy_loss(
517 |             remaining_data, remaining_idxs, self.model, self.surr_model,
518 |             cfg=self.cfg)
519 | 
520 | 
521 | class _SVMClassifierSurrogateAcquisitionBase(_SurrogateAcquisitionBase):
522 | 
523 |     def __init__(self, cfg, dataset, model, model_cfg):
524 |         super().__init__(cfg, dataset, model, SVMClassifier, model_cfg)
525 | 
526 | 
527 | class SVMClassifierSurrogateAcquisitionEntropy(
528 |         _SVMClassifierSurrogateAcquisitionBase):
529 | 
530 |     def __init__(self, cfg, dataset, model, model_cfg, *args, **kwargs):
531 |         super().__init__(cfg, dataset, model, model_cfg)
532 | 
533 |     def expected_loss(self, remaining_data, remaining_idxs):
534 |         return entropy_loss(
535 |             remaining_data, remaining_idxs, self.model, self.surr_model,
536 |             cfg=self.cfg)
537 | 
538 | 
539 | class _GPClassifierSurrogateAcquisitionBase(_SurrogateAcquisitionBase):
540 | 
541 |     def __init__(self, cfg, dataset, model, model_cfg):
542 |         super().__init__(cfg, dataset, model, GPClassifier, model_cfg)
543 | 
544 | 
545 | class GPClassifierSurrogateAcquisitionEntropy(
546 |         _GPClassifierSurrogateAcquisitionBase):
547 | 
548 |     def __init__(self, cfg, dataset, model, model_cfg, *args, **kwargs):
549 |         super().__init__(cfg, dataset, model, model_cfg)
550 | 
551 |     def expected_loss(self, remaining_data, remaining_idxs):
552 |         return entropy_loss(
553 |             remaining_data, remaining_idxs, self.model, self.surr_model,
554 |             cfg=self.cfg
555 |             )
556 | 
557 | 
558 | class _RandomRandomForestSurrogateAcquisitionBase(_LossAcquisitionBase):
559 |     """Randomize Hypers each iteration."""
560 | 
561 |     def __init__(self, cfg, dataset, model, model_cfg):
562 |         super().__init__(cfg, dataset, model)
563 | 
564 |         self.model_cfg = model_cfg
565 |         self.surr_model = None
566 |         self.random_init_model()
567 | 
568 |     def random_init_model(self):
569 | 
570 |         if self.model_cfg['params_from'] == 'main':
571 |             if self.surr_model is not None:
572 |                 return True
573 |             else:
574 |                 sk_args = self.model.model.get_params()
575 |                 cfg = OmegaConf.create(dict(sk_args=sk_args))
576 | 
577 |         elif self.model_cfg['params_from'] == 'random':
578 |             # This may be highly dependent on the data!!
579 |             sk_args = dict(
580 |                 max_features='sqrt',
581 |                 criterion=str(np.random.choice(["gini", "entropy"])),
582 |                 max_depth=int(np.random.choice([3, 5, 10, 20])),
583 |                 n_estimators=int(np.random.choice([10, 50, 100, 200])),
584 |                 # min_samples_split=int(np.random.choice([2, 5, 10]))
585 |             )
586 |             cfg = OmegaConf.create(dict(sk_args=sk_args))
587 |         else:
588 |             raise ValueError
589 | 
590 |         if self.model_cfg['rotated']:
591 |             self.surr_model = RandomDirectionRandomForestClassifier(
592 |                 cfg, speedup=True, dim=self.dataset.D[0]
593 |                 )
594 |         else:
595 |             self.surr_model = RandomForestClassifier(cfg)
596 | 
597 |     def update_surrogate(self):
598 | 
599 |         self.random_init_model()
600 | 
601 |         self.surr_model.fit(*self.dataset.total_observed)
602 | 
603 |     def acquire(self):
604 | 
605 |         self.update_surrogate()
606 | 
607 |         return super().acquire()
608 | 
609 | 
610 | class RandomRandomForestClassifierSurrogateAcquisitionEntropy(
611 |         _RandomRandomForestSurrogateAcquisitionBase):
612 | 
613 |     def __init__(self, cfg, dataset, model, model_cfg, *args, **kwargs):
614 |         super().__init__(cfg, dataset, model, model_cfg)
615 | 
616 |     def expected_loss(self, remaining_data, remaining_idxs):
617 |         return entropy_loss(
618 |             remaining_data, remaining_idxs, self.model, self.surr_model,
619 |             cfg=self.cfg)
620 | 
621 | 
622 | def entropy_loss(
623 |         remaining_data, remaining_idxs, model, surr_model=None,
624 |         eps=1e-15, T=None, cfg=None):
625 | 
626 |     model_pred = model.predict(remaining_data, idxs=remaining_idxs)
627 | 
628 |     if T is not None:
629 |         model_pred = np.exp(np.log(model_pred)/T)
630 | 
631 |         model_pred = np.clip(model_pred, eps, 1/eps)
632 |         model_pred[np.isnan(model_pred)] = 1/eps
633 | 
634 |         model_pred /= model_pred.sum(axis=1, keepdims=True)
635 | 
636 |         model_pred = np.clip(model_pred, eps, 1/eps)
637 |         model_pred[np.isnan(model_pred)] = 1/eps
638 | 
639 |     if surr_model is not None:
640 |         surr_model_pred = surr_model.predict(
641 |             remaining_data, idxs=remaining_idxs)
642 | 
643 |         if T is not None:
644 |             surr_model_pred = np.exp(np.log(surr_model_pred)/T)
645 |             surr_model_pred = np.clip(surr_model_pred, eps, 1/eps)
646 |             surr_model_pred[np.isnan(surr_model_pred)] = 1/eps
647 | 
648 |             surr_model_pred /= surr_model_pred.sum(axis=1, keepdims=True)
649 |             surr_model_pred = np.clip(surr_model_pred, eps, 1/eps)
650 |             surr_model_pred[np.isnan(surr_model_pred)] = 1/eps
651 | 
652 |     else:
653 |         surr_model_pred = model_pred
654 | 
655 |     if T is None:
656 |         model_pred = np.clip(model_pred, eps, 1 - eps)
657 |         model_pred /= model_pred.sum(axis=1, keepdims=True)
658 | 
659 |     # Sum_{y=c} p_surr(y=c|x) log p_model(y=c|x)
660 |     res = -1 * (surr_model_pred * np.log(model_pred)).sum(-1)
661 | 
662 |     if T is not None:
663 |         res[np.isnan(res)] = np.nanmax(res)
664 | 
665 |     # Entropy may have zero support over some of the remaining items!
666 |     # This is not good! Model is overconfident! Condition of estimator
667 |     # do no longer hold!
668 | 
669 |     # clip at lowest 10 percentile of prediction (add safeguard for 0 preds)
670 |     # clip_val = max(np.percentile(res, 10), 1e-3)
671 |     # 1e-3 is a lot for large remaining_data, probably better as
672 |     # 1/(100*len(remaining_data))
673 | 
674 |     if cfg is not None and not cfg.get('uniform_clip', False):
675 |         clip_val = np.percentile(res, 10)
676 |         res = np.clip(res, clip_val, 1/eps)
677 | 
678 |     # clipping has moved to after acquisition
679 |     return res
680 | 
681 | def accuracy_loss(
682 |         remaining_data, remaining_idxs, model, surr_model=None):
683 |     # we need higher values = higher loss
684 |     # so we will return 1 - accuracy
685 | 
686 |     model_pred = model.predict(remaining_data, idxs=remaining_idxs)
687 | 
688 |     if surr_model is not None:
689 |         surr_model_pred = surr_model.predict(
690 |             remaining_data, idxs=remaining_idxs)
691 |     else:
692 |         surr_model_pred = model_pred
693 | 
694 |     pred_classes = np.argmax(model_pred, axis=1)
695 | 
696 |     # instead of 0,1 loss we get p_surr(y|x) for accuracy
697 | 
698 |     res = 1 - surr_model_pred[np.arange(len(surr_model_pred)), pred_classes]
699 | 
700 |     res = np.maximum(res, np.max(res)*0.05)
701 | 
702 |     return res
703 | 
704 | 
705 | 


--------------------------------------------------------------------------------
/activetesting/datasets.py:
--------------------------------------------------------------------------------
  1 | """Datasets for active testing."""
  2 | 
  3 | import logging
  4 | from pathlib import Path
  5 | import hydra
  6 | from omegaconf import OmegaConf
  7 | 
  8 | import numpy as np
  9 | from tensorflow import keras
 10 | from sklearn.model_selection import train_test_split as SKtrain_test_split
 11 | 
 12 | 
 13 | class _Dataset:
 14 |     """Implement generic dataset.
 15 | 
 16 |     Load and preprocess data.
 17 |     Provide basic acess, train-test split.
 18 | 
 19 |     raise generic methods
 20 |     """
 21 |     def __init__(self, cfg):
 22 | 
 23 |         # Set task_type and global_std if not present.
 24 |         self.cfg = OmegaConf.merge(
 25 |                 OmegaConf.structured(cfg),
 26 |                 dict(
 27 |                     task_type=cfg.get('task_type', 'regression'),
 28 |                     global_std=cfg.get('global_std', False),
 29 |                     n_classes=cfg.get('n_classes', -1)))
 30 | 
 31 |         self.N = cfg.n_points
 32 |         self.x, self.y = self.generate_data()
 33 | 
 34 |         # For 1D data, ensure Nx1 shape
 35 |         if self.x.ndim == 1:
 36 |             self.x = self.x[:, np.newaxis]
 37 | 
 38 |         self.D = self.x.shape[1:]
 39 | 
 40 |         self.train_idxs, self.test_idxs = self.train_test_split(self.N)
 41 | 
 42 |         if self.cfg.standardize:
 43 |             self.standardize()
 44 | 
 45 |     def train_test_split(self, N, test_size=None):
 46 |         all_indices = np.arange(0, N)
 47 | 
 48 |         if self.cfg.get('stratify', False):
 49 |             stratify = self.y
 50 |         else:
 51 |             stratify = None
 52 | 
 53 |         if test_size is None:
 54 |             test_size = self.cfg.test_proportion
 55 | 
 56 |         train, test = SKtrain_test_split(
 57 |                 all_indices, test_size=test_size,
 58 |                 stratify=stratify)
 59 | 
 60 |         assert np.intersect1d(train, test).size == 0
 61 |         assert np.setdiff1d(
 62 |             np.union1d(train, test),
 63 |             all_indices).size == 0
 64 | 
 65 |         if p := self.cfg.get('test_unseen_proportion', False):
 66 |             test, test_unseen = SKtrain_test_split(
 67 |                 np.arange(0, len(test)), test_size=p)
 68 |             self.test_unseen_idxs = test_unseen
 69 | 
 70 |         return train, test
 71 | 
 72 |     @property
 73 |     def train_data(self):
 74 |         return self.x[self.train_idxs], self.y[self.train_idxs]
 75 | 
 76 |     def standardize(self):
 77 |         """Standardize to zero mean and unit variance using train_idxs."""
 78 | 
 79 |         ax = None if self.cfg['global_std'] else 0
 80 | 
 81 |         x_train, y_train = self.train_data
 82 | 
 83 |         x_std = self.cfg.get('x_std', True)
 84 |         if x_std:
 85 |             self.x_train_mean = x_train.mean(ax)
 86 |             self.x_train_std = x_train.std(ax)
 87 |             self.x = (self.x - self.x_train_mean) / self.x_train_std
 88 | 
 89 |         y_std = self.cfg.get('y_std', True)
 90 |         if (self.cfg['task_type'] == 'regression') and y_std:
 91 |             self.y_train_mean = y_train.mean(ax)
 92 |             self.y_train_std = y_train.std(ax)
 93 |             self.y = (self.y - self.y_train_mean) / self.y_train_std
 94 | 
 95 |     def export(self):
 96 |         package = dict(
 97 |             x=self.x,
 98 |             y=self.y,
 99 |             train_idxs=self.train_idxs,
100 |             test_idxs=self.test_idxs
101 |             )
102 |         return package
103 | 
104 | 
105 | class _ActiveTestingDataset(_Dataset):
106 |     """Active Testing Dataset.
107 | 
108 |     Add functionality for active testing.
109 | 
110 |     Split test data into observed unobserved.
111 | 
112 |     Add Methods to keep track of unobserved/observed.
113 |     Use an ordered set or sth to keep track of that.
114 |     Also keep track of activation function values at time that
115 |     sth was observed.
116 |     """
117 |     def __init__(self, cfg):
118 |         super().__init__(cfg)
119 |         self.start()
120 | 
121 |     def start(self):
122 |         self.test_observed = np.array([], dtype=np.int)
123 |         self.test_remaining = self.test_idxs
124 | 
125 |     def restart(self):
126 |         self.start()
127 | 
128 |     def observe(self, idx):
129 |         """Observe data at idx and move from unobserved to observed.
130 | 
131 |         Note: For efficiency reasons idx is index in test
132 |         """
133 |         self.test_observed = np.append(self.test_observed, idx)
134 |         self.test_remaining = self.test_remaining[self.test_remaining != idx]
135 | 
136 |         return self.x[[idx]], self.y[[idx]]
137 | 
138 |     @property
139 |     def total_observed(self):
140 |         """Return train and observed test data"""
141 |         test = self.x[self.test_observed], self.y[self.test_observed]
142 |         train = self.train_data
143 |         # concatenate x and y separately
144 |         total_observed = [
145 |             np.concatenate([test[i], train[i]], 0)
146 |             for i in range(2)]
147 | 
148 |         return total_observed
149 | 
150 | 
151 | class QuadraticDatasetForLinReg(_ActiveTestingDataset):
152 |     """Parabolic data for use with linear regression – proof of concept."""
153 |     def __init__(self, cfg, *args, **kwargs):
154 |         super().__init__(cfg)
155 | 
156 |     def generate_data(self):
157 |         x = np.linspace(0, 1, self.N)
158 |         y = x**2
159 |         y -= np.mean(y)
160 |         return x, y
161 | 
162 | 
163 | class SinusoidalDatasetForLinReg(_ActiveTestingDataset):
164 |     """Sinusoidal data for use with linear regression – proof of concept.
165 | 
166 |     This dataset has a high and a low-density region.
167 |     A clever acquisition strategy is necessary to estimate the error correctly.
168 | 
169 |     """
170 |     def __init__(self, cfg, *args, **kwargs):
171 |         super().__init__(cfg)
172 | 
173 |     def generate_data(self):
174 |         def regression_function(min_x, max_x, n_samples):
175 |             x = np.linspace(min_x, max_x, n_samples)
176 |             y = np.sin(x * 10) + x ** 3
177 |             return (x, y)
178 | 
179 |         def split_dataset(n_total, min_x=0, max_x=2, center=1):
180 |             """Split regression function into high and low density regions."""
181 |             n_low = int(0.1 * n_total)
182 |             n_high = int(0.9 * n_total)
183 | 
184 |             low_density_data = regression_function(
185 |                 min_x, center - 0.01, n_low)
186 |             high_density_data = regression_function(
187 |                 center, max_x, n_high)
188 | 
189 |             x = np.concatenate([
190 |                 low_density_data[0], high_density_data[0]], 0)
191 |             y = np.concatenate([
192 |                 low_density_data[1], high_density_data[1]], 0)
193 | 
194 |             n_low = len(low_density_data[0])
195 | 
196 |             return x, y, n_low
197 | 
198 |         x, y, self.n_low = split_dataset(self.N)
199 | 
200 |         # TODO: add back!?
201 |         # y = y - np.mean(y)
202 | 
203 |         return x, y
204 | 
205 |     def train_test_split(self, *args):
206 |         """Need to overwrite train_test_split.
207 |         Stratify across low and high_density regions.
208 |         """
209 |         n_low = self.n_low
210 |         n_high = self.N - n_low
211 | 
212 |         low_train, low_test = super().train_test_split(n_low, test_size=4)
213 |         high_train, high_test = super().train_test_split(n_high)
214 |         high_train += n_low
215 |         high_test += n_low
216 | 
217 |         train = np.concatenate([low_train, high_train], 0)
218 |         test = np.concatenate([low_test, high_test], 0)
219 | 
220 |         return train, test
221 | 
222 | 
223 | class GPDatasetForGPReg(_ActiveTestingDataset):
224 |     """Sample from GP prior."""
225 |     def __init__(self, cfg, model_cfg, *args, **kwargs):
226 |         self.model_cfg = model_cfg
227 |         super().__init__(cfg)
228 | 
229 |     def generate_data(self):
230 |         from activetesting.utils import maps
231 |         self.model = maps.model[self.model_cfg.name](self.model_cfg)
232 |         xmax = self.cfg.get('xmax', 1)
233 |         x = np.linspace(0, xmax, self.N)[:, np.newaxis]
234 |         y = self.model.sample_y(x, random_state=np.random.randint(0, 10000))
235 |         return x, y
236 | 
237 | 
238 | class MNISTDataset(_ActiveTestingDataset):
239 |     """MNIST Data.
240 | 
241 |     TODO: Respect train/test split of MNIST.
242 |     """
243 |     def __init__(self, cfg, n_classes=10, *args, **kwargs):
244 | 
245 |         cfg = OmegaConf.merge(
246 |             OmegaConf.structured(cfg),
247 |             dict(task_type='classification', global_std=True,
248 |                  n_classes=n_classes))
249 | 
250 |         super().__init__(cfg)
251 | 
252 |     def generate_data(self):
253 | 
254 |         data_home = Path(hydra.utils.get_original_cwd()) / 'data/MNIST'
255 | 
256 |         # from sklearn.datasets import fetch_openml
257 |         # x, y = fetch_openml(
258 |         #     'mnist_784', version=1, return_X_y=True, data_home=data_home,
259 |         #     cache=True)
260 |         data = keras.datasets.mnist.load_data(
261 |             path=data_home / 'mnist.npz'
262 |         )
263 |         return self.preprocess(data)
264 | 
265 |     def preprocess(self, data):
266 | 
267 |         (x_train, y_train), (x_test, y_test) = data
268 |         x = np.concatenate([x_train, x_test], 0)
269 |         x = x.astype(np.float32) / 255
270 |         x = x.reshape(x.shape[0], -1)
271 |         y = np.concatenate([y_train, y_test], 0)
272 |         y = y.astype(np.int)
273 | 
274 |         N = self.N
275 | 
276 |         if N < y.size:
277 |             # get a stratified subset
278 |             # note that mnist does not have equal class count
279 |             idxs, _ = SKtrain_test_split(
280 |                 np.arange(0, y.size), train_size=N, stratify=y)
281 |             x = x[idxs]
282 |             y = y[idxs]
283 | 
284 |         return x, y
285 | 
286 |     def train_test_split(self, N):
287 | 
288 |         if self.cfg.get('respect_train_test', False):
289 |             train = np.arange(0, 50000)
290 |             n_test = int(self.cfg.test_proportion * 60000)
291 |             test = np.random.choice(
292 |                 np.arange(50000, 60000), n_test, replace=False)
293 | 
294 |             return train, test
295 | 
296 |         else:
297 |             train, test = super().train_test_split(N)
298 | 
299 |         # only keep the first n sevens in the train distribution
300 |         if n7 := self.cfg.get('n_initial_7', False):
301 |             # to get correct indices, need to first select from y
302 |             old7 = np.where(self.y == 7)[0]
303 |             # then filter to train indicees
304 |             old_train7 = np.intersect1d(old7, train)
305 |             # now only keep the first n7
306 |             sevens_remove = old_train7[n7:]
307 |             # and now remove those from the train set
308 |             train = np.setdiff1d(train, sevens_remove)
309 | 
310 |         return train, test
311 | 
312 | 
313 | class TwoMoonsDataset(_ActiveTestingDataset):
314 |     """TwoMoons Data."""
315 |     def __init__(self, cfg,
316 |                  *args, **kwargs):
317 | 
318 |         cfg = OmegaConf.merge(
319 |             OmegaConf.structured(cfg),
320 |             dict(task_type='classification', global_std=False, n_classes=2))
321 | 
322 |         super().__init__(cfg)
323 | 
324 |     def generate_data(self):
325 | 
326 |         from sklearn.datasets import make_moons
327 | 
328 |         x, y = make_moons(n_samples=self.cfg.n_points, noise=self.cfg.noise)
329 | 
330 |         return x, y
331 | 
332 | 
333 | class FashionMNISTDataset(MNISTDataset):
334 |     """FashionMNIST Data.
335 | 
336 |     TODO: Respect train/test split of FashionMNIST.
337 |     """
338 |     def __init__(self, cfg,
339 |                  *args, **kwargs):
340 | 
341 |         super().__init__(cfg)
342 | 
343 |     def generate_data(self):
344 | 
345 |         data = keras.datasets.fashion_mnist.load_data()
346 | 
347 |         return self.preprocess(data)
348 | 
349 | 
350 | class Cifar10Dataset(MNISTDataset):
351 |     """CIFAR10 Data.
352 |     """
353 |     def __init__(self, cfg,
354 |                  *args, **kwargs):
355 | 
356 |         super().__init__(cfg)
357 | 
358 |     def generate_data(self):
359 | 
360 |         data = keras.datasets.cifar10.load_data()
361 | 
362 |         x, y = self.preprocess(data)
363 |         x = x.reshape(len(x), 32, 32, 3).transpose(0, 3, 1, 2)
364 |         x = x.reshape(len(x), -1)
365 |         return x, y[:, 0]
366 | 
367 | 
368 | class Cifar100Dataset(MNISTDataset):
369 |     """CIFAR100 Data.
370 |     """
371 |     def __init__(self, cfg,
372 |                  *args, **kwargs):
373 | 
374 |         super().__init__(cfg, n_classes=100)
375 | 
376 |     def generate_data(self):
377 | 
378 |         data = keras.datasets.cifar100.load_data()
379 | 
380 |         x, y = self.preprocess(data)
381 |         x = x.reshape(len(x), 32, 32, 3).transpose(0, 3, 1, 2)
382 |         x = x.reshape(len(x), -1)
383 |         return x, y[:, 0]
384 | 
385 | 
386 | def get_CIFAR10():
387 |     """From pruning code. Only used for debugging purposes."""
388 |     import os
389 |     import torch
390 |     from torchvision import transforms, datasets
391 | 
392 |     root = "./data"
393 | 
394 |     input_size = 32
395 |     num_classes = 10
396 |     train_transform = transforms.Compose(
397 |         [
398 |             transforms.RandomCrop(32, padding=4),
399 |             transforms.RandomHorizontalFlip(),
400 |             transforms.ToTensor(),
401 |             transforms.Normalize(
402 |                 (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
403 |         ]
404 |     )
405 |     train_dataset = datasets.CIFAR10(
406 |         root + "/CIFAR10", train=True, transform=train_transform, download=True
407 |     )
408 | 
409 |     test_transform = transforms.Compose(
410 |         [
411 |             transforms.ToTensor(),
412 |             transforms.Normalize(
413 |                 (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
414 |         ]
415 |     )
416 |     test_dataset = datasets.CIFAR10(
417 |         root + "/CIFAR10", train=False, transform=test_transform,
418 |         download=False
419 |     )
420 | 
421 |     kwargs = {"num_workers": 4, "pin_memory": True}
422 |     batch_size = 128
423 | 
424 |     train_loader = torch.utils.data.DataLoader(
425 |         train_dataset, batch_size=batch_size, shuffle=True, **kwargs
426 |     )
427 | 
428 |     test_loader = torch.utils.data.DataLoader(
429 |         test_dataset, batch_size=1000, shuffle=False, **kwargs
430 |     )
431 | 
432 |     return train_loader, test_loader
433 | 


--------------------------------------------------------------------------------
/activetesting/experiment.py:
--------------------------------------------------------------------------------
  1 | """Run experiment."""
  2 | import pandas as pd
  3 | import numpy as np
  4 | 
  5 | from activetesting.utils import maps
  6 | 
  7 | 
  8 | class Experiment:
  9 |     """Orchestrates experiment.
 10 | 
 11 |     Main goal: Just need to call Experiment.run_experiment()
 12 |     and a model will get trained and tested.
 13 | 
 14 |     This trains and actively tests the models.
 15 | 
 16 |     Has a step() method.
 17 |     Main loop is probably externally controlled for logging purposes.
 18 |     Maybe not..
 19 |     """
 20 |     def __init__(self, run, cfg, dataset, model, acquisition, acq_config):
 21 |         self.cfg = cfg
 22 |         self.dataset = dataset
 23 |         self.model = model
 24 | 
 25 |         self.risk_estimators = {
 26 |             risk_estimator: maps.risk_estimator[risk_estimator](
 27 |                 self.cfg.experiment.loss,
 28 |                 self.dataset,
 29 |                 self.model,
 30 |                 )
 31 |             for risk_estimator in self.cfg.risk_estimators}
 32 | 
 33 |         true_loss = self.risk_estimators['TrueRiskEstimator'].true_loss_vals
 34 | 
 35 |         # TODO: model_cfg is used for surr model.
 36 |         # TODO: allow to pass a different cfg here, matching the aux model
 37 |         # TODO: this is needed when aux model and model are different!
 38 | 
 39 |         acq_config = model.cfg if acq_config is None else acq_config
 40 | 
 41 |         self.acquisition = (
 42 |             maps.acquisition[acquisition](
 43 |                 [self.cfg.acquisition, run],
 44 |                 self.dataset,
 45 |                 true_loss_vals=true_loss,
 46 |                 model=self.model,
 47 |                 model_cfg=acq_config))
 48 | 
 49 |         self.finished = False
 50 |         self.predictions = None
 51 | 
 52 |     def estimate_risks(self):
 53 |         """Estimate test risk."""
 54 |         pred = self.predictions
 55 |         obs = self.dataset.y[self.dataset.test_observed]
 56 | 
 57 |         for risk_estimator in self.risk_estimators.values():
 58 |             risk_estimator.estimate(pred, obs, self.acquisition.weights)
 59 | 
 60 |     def step(self, i):
 61 |         """Perform a single testing step."""
 62 | 
 63 |         # choose index for next observation
 64 |         test_idx, pmf_idx = self.acquisition.acquire()
 65 | 
 66 |         self.observe_at_idx(i, test_idx)
 67 | 
 68 |         return test_idx, pmf_idx
 69 | 
 70 |     def observe_at_idx(self, i, idx):
 71 | 
 72 |         # add true pmf to logging to plot loss dist
 73 |         if self.acquisition.check_save(off=1):
 74 |             true_pmf = (
 75 |                 self.risk_estimators[
 76 |                     'TrueRiskEstimator'].true_loss_all_idxs[
 77 |                     self.dataset.test_remaining])
 78 |             true_pmf = (
 79 |                 self.acquisition.safe_normalise(
 80 |                     true_pmf))
 81 | 
 82 |             self.acquisition.all_pmfs[-1]['true_pmf'] = true_pmf
 83 | 
 84 |         # observe point
 85 |         x, _ = self.dataset.observe(idx)
 86 | 
 87 |         # predict at point
 88 |         y_pred = self.model.predict(x, [idx])
 89 | 
 90 |         if self.predictions is None:
 91 |             self.predictions = y_pred
 92 |         else:
 93 |             self.predictions = np.concatenate([self.predictions, y_pred], 0)
 94 | 
 95 |         # estimate test risk
 96 |         self.estimate_risks()
 97 | 
 98 |         # print(
 99 |         #     x, idx, self.dataset.test_observed.size,
100 |         #     self.dataset.test_remaining.size)
101 | 
102 |         if len(self.dataset.test_remaining) == 0:
103 |             self.finished = True
104 | 
105 |         if lim := self.cfg.experiment.get('abort_test_after', False):
106 |             if i > lim:
107 |                 self.finished = True
108 | 
109 |     def external_step(self, i, test_idx, pmf_idx):
110 |         """Externally force experiment to acquire data at 'idx'. """
111 |         # hot-patch the forced acquisition
112 |         # (would ideally make this passable to acquire()
113 |         # but I can't be bothered
114 |         self.acquisition.externally_controlled = True
115 |         self.acquisition.ext_test_idx = test_idx
116 |         self.acquisition.ext_pmf_idx = pmf_idx
117 | 
118 |         # need to call this s.t. acquisition weights are properly written
119 |         self.acquisition.acquire()
120 |         self.observe_at_idx(i, test_idx)
121 | 
122 |         # make valid for next round again
123 |         self.acquisition.externally_controlled = False
124 |         self.acquisition.ext_test_idx = None
125 |         self.acquisition.ext_pmf_idx = None
126 | 
127 |     def export_data(self):
128 |         """Extract data from experiment."""
129 |         if self.dataset.cfg.task_type == 'classification':
130 |             preds = np.argmax(self.predictions, 1)
131 |         else:
132 |             preds = self.predictions
133 | 
134 |         result = dict(
135 |             id=np.arange(0, len(self.dataset.test_observed)),
136 |             idx=self.dataset.test_observed,
137 |             y_preds=preds,
138 |             y_true=self.dataset.y[self.dataset.test_observed]
139 |         )
140 | 
141 |         result.update(
142 |             {risk_name: risk.risks for risk_name, risk
143 |                 in self.risk_estimators.items()})
144 | 
145 |         result = pd.DataFrame.from_dict(result)
146 | 
147 |         return result, self.acquisition.all_pmfs
148 | 


--------------------------------------------------------------------------------
/activetesting/hoover.py:
--------------------------------------------------------------------------------
 1 | """Keep track of data across runs."""
 2 | from pathlib import Path
 3 | import logging
 4 | import pickle
 5 | from collections import defaultdict
 6 | 
 7 | 
 8 | class Hoover:
 9 |     """Sucks up all data generated by experiment."""
10 | 
11 |     def __init__(self, cfg, name=None):
12 |         self.datasets = dict()
13 |         self.pmfs = defaultdict(dict)
14 |         self.results = None
15 |         self.cfg = cfg
16 |         if name is None:
17 |             self.name = None
18 |         else:
19 |             self.name = name
20 | 
21 |     def add_data(self, run, export):
22 |         if self.cfg.get('save_data', False):
23 |             self.datasets.update({run: export})
24 | 
25 |     def add_results(self, run, acquisition, export):
26 |         export, all_pmfs = export
27 |         export['run'] = run
28 |         export['acquisition'] = acquisition
29 | 
30 |         if self.results is None:
31 |             self.results = export
32 |         else:
33 |             self.results = self.results.append(export, ignore_index=True)
34 | 
35 |         if all_pmfs is not None:
36 |             self.pmfs[run][acquisition] = all_pmfs
37 | 
38 |     def save(self):
39 |         if self.name is None:
40 |             base = Path('.')
41 |         else:
42 |             base = Path(f'model_{self.name}')
43 |             base.mkdir(parents=True, exist_ok=True)
44 | 
45 |         if self.cfg.save_data:
46 |             pickle.dump(self.datasets, open(base / "datasets.pkl", "wb"))
47 |         pickle.dump(self.pmfs, open(base / "pmfs.pkl", "wb"))
48 |         self.results.to_pickle(base / 'results.pkl')
49 |         logging.info('Saving results to file.')
50 | 


--------------------------------------------------------------------------------
/activetesting/loss.py:
--------------------------------------------------------------------------------
 1 | """Define losses."""
 2 | 
 3 | import numpy as np
 4 | from sklearn.preprocessing import OneHotEncoder
 5 | 
 6 | 
 7 | class SELoss:
 8 |     def __init__(self, *args, **kwargs):
 9 |         pass
10 | 
11 |     def __call__(self, pred, target, *args, **kwargs):
12 |         """Does not aggregate."""
13 |         return (pred-target)**2
14 | 
15 | 
16 | class MSELoss:
17 |     def __init__(self, *args, **kwargs):
18 |         pass
19 | 
20 |     def __call__(self, pred, target, *args, **kwargs):
21 |         """Aggregates to single digit."""
22 |         return (SELoss()(pred, target, *args, **kwargs)).mean()
23 | 
24 | 
25 | class RMSELoss:
26 |     def __init__(self, *args, **kwargs):
27 |         pass
28 | 
29 |     def __call__(self, pred, target, *args, **kwargs):
30 |         """Aggregates to single digit."""
31 |         return np.sqrt(MSELoss()(pred, target, *args, **kwargs))
32 | 
33 | 
34 | class AccuracyLoss:
35 |     def __init__(self, *args, **kwargs):
36 |         pass
37 | 
38 |     def __call__(self, pred, target):
39 |         """Compute 1 - accuracy.
40 | 
41 |         Expects pred to be probabilities NxC and target to be in [1,..., C].
42 | 
43 |         Currently inconsistent with Crossentropy loss.
44 | 
45 |         """
46 |         return 1. - (np.argmax(pred, axis=1) == target).astype(np.float)
47 | 
48 | 
49 | class CrossEntropyLoss:
50 | 
51 |     enc = None
52 |     eps = 1e-15
53 | 
54 |     def __call__(self, pred, target):
55 |         """Compute Cross-entropy loss.
56 | 
57 |         TODO: Numerical instabilities?
58 |         pred: Predicted probabilities, NxC
59 |         target: true class values in [1,..., C], N times
60 |         """
61 | 
62 |         # One-Hot Encode
63 |         if CrossEntropyLoss.enc is None:
64 |             CrossEntropyLoss.enc = OneHotEncoder(sparse=False)
65 |             CrossEntropyLoss.enc.fit(
66 |                 np.arange(0, pred.shape[1])[..., np.newaxis])
67 | 
68 |         # Clipping
69 |         pred = np.clip(pred, self.eps, 1 - self.eps)
70 |         # Renormalize
71 |         pred /= pred.sum(axis=1)[:, np.newaxis]
72 | 
73 |         one_hot = CrossEntropyLoss.enc.transform(target[..., np.newaxis])
74 |         res = -1 * (one_hot * np.log(pred)).sum(axis=1)
75 | 
76 |         return res


--------------------------------------------------------------------------------
/activetesting/models/__init__.py:
--------------------------------------------------------------------------------
1 | from activetesting.models.skmodels import *
2 | from activetesting.models.torchmodels import *
3 | from activetesting.models.efficient_models import *
4 | 


--------------------------------------------------------------------------------
/activetesting/models/cnn/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__
 2 | .vscode
 3 | *.pt
 4 | *.err
 5 | *.out
 6 | .nfs*
 7 | *.png
 8 | plots
 9 | runs
10 | data
11 | notification.sh
12 | 
13 | 


--------------------------------------------------------------------------------
/activetesting/models/cnn/models.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | import torch.nn.functional as F
  3 | 
  4 | from .resnet_custom import resnet18, resnet32_grasp
  5 | from .wide_resnet import Wide_ResNet
  6 | from torchvision.models import vgg
  7 | 
  8 | 
  9 | class Model(nn.Module):
 10 |     def __init__(self, input_size, num_classes):
 11 |         super().__init__()
 12 | 
 13 |         self.conv1 = nn.Conv2d(1, 64, 3, padding=1)
 14 |         self.bn1 = nn.BatchNorm2d(64)
 15 | 
 16 |         self.conv2 = nn.Conv2d(64, 128, 3, padding=1)
 17 |         self.bn2 = nn.BatchNorm2d(128)
 18 | 
 19 |         self.conv3 = nn.Conv2d(128, 128, 3)
 20 |         self.bn3 = nn.BatchNorm2d(128)
 21 | 
 22 |         self.fc1 = nn.Linear(2 * 2 * 128, 256)
 23 |         self.fc2 = nn.Linear(256, num_classes)
 24 | 
 25 |     def forward(self, x):
 26 |         x = F.relu(self.bn1(self.conv1(x)))
 27 |         x = F.max_pool2d(x, 2, 2)
 28 | 
 29 |         x = F.relu(self.bn2(self.conv2(x)))
 30 |         x = F.max_pool2d(x, 2, 2)
 31 | 
 32 |         x = F.relu(self.bn3(self.conv3(x)))
 33 |         x = F.max_pool2d(x, 2, 2)
 34 | 
 35 |         x = x.flatten(1)
 36 | 
 37 |         x = F.relu(self.fc1(x))
 38 |         x = self.fc2(x)
 39 | 
 40 |         x = F.log_softmax(x, dim=1)
 41 | 
 42 |         return x
 43 | 
 44 | 
 45 | class DeepModel(nn.Module):
 46 |     def __init__(self, input_size, num_classes, config):
 47 |         super().__init__()
 48 | 
 49 |         # assert input_size in [32, 64], "Imagenet is not supported yet"
 50 | 
 51 |         if config == "resnet18":
 52 |             self.model = resnet18(num_classes=num_classes)
 53 |         elif config == "resnet32grasp":
 54 |             self.model = resnet32_grasp(num_classes=num_classes)
 55 |         elif config == "wideresnet":
 56 |             dropout_rate = 0.0
 57 |             # self.model = Wide_ResNet(28, 10, dropout_rate, num_classes)
 58 |             self.model = Wide_ResNet(40, 10, dropout_rate, num_classes)
 59 |         elif config == "vgg19":
 60 |             # Adapted from:
 61 |             # https://github.com/alecwangcq/GraSP/blob/master/models/base/vgg.py
 62 |             cfg = [
 63 |                 64,
 64 |                 64,
 65 |                 "M",
 66 |                 128,
 67 |                 128,
 68 |                 "M",
 69 |                 256,
 70 |                 256,
 71 |                 256,
 72 |                 256,
 73 |                 "M",
 74 |                 512,
 75 |                 512,
 76 |                 512,
 77 |                 512,
 78 |                 "M",
 79 |                 512,
 80 |                 512,
 81 |                 512,
 82 |                 512,
 83 |             ]
 84 |             self.model = vgg.VGG(
 85 |                 vgg.make_layers(cfg, batch_norm=True), num_classes=num_classes
 86 |             )
 87 |             self.model.avgpool = nn.AdaptiveAvgPool2d((2, 2))
 88 |             self.model.classifier = nn.Sequential(
 89 |                 nn.Linear(512 * 2 * 2, 1024),
 90 |                 nn.ReLU(True),
 91 |                 nn.Linear(1024, 512),
 92 |                 nn.ReLU(True),
 93 |                 nn.Linear(512, num_classes),
 94 |             )
 95 |         else:
 96 |             raise TypeError
 97 | 
 98 |     def forward(self, x):
 99 |         x = self.model(x)
100 | 
101 |         return x
102 | 


--------------------------------------------------------------------------------
/activetesting/models/cnn/resnet_custom.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | 
  7 | __all__ = ["ResNet", "resnet18", "resnet32_grasp", "resnet32_eigendamage"]
  8 | 
  9 | 
 10 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
 11 |     """3x3 convolution with padding"""
 12 |     return nn.Conv2d(
 13 |         in_planes,
 14 |         out_planes,
 15 |         kernel_size=3,
 16 |         stride=stride,
 17 |         padding=dilation,
 18 |         groups=groups,
 19 |         bias=False,
 20 |         dilation=dilation,
 21 |     )
 22 | 
 23 | 
 24 | def conv1x1(in_planes, out_planes, stride=1):
 25 |     """1x1 convolution"""
 26 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 27 | 
 28 | 
 29 | class BasicBlock(nn.Module):
 30 |     expansion = 1
 31 | 
 32 |     def __init__(
 33 |         self,
 34 |         inplanes,
 35 |         planes,
 36 |         stride=1,
 37 |         downsample=None,
 38 |         groups=1,
 39 |         base_width=64,
 40 |         dilation=1,
 41 |         norm_layer=None,
 42 |     ):
 43 |         super(BasicBlock, self).__init__()
 44 |         if norm_layer is None:
 45 |             norm_layer = nn.BatchNorm2d
 46 |         if groups != 1 or base_width != 64:
 47 |             raise ValueError("BasicBlock only supports groups=1 and base_width=64")
 48 |         if dilation > 1:
 49 |             raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
 50 |         # Both self.conv1 and self.downsample layers downsample the input when stride != 1
 51 |         self.conv1 = conv3x3(inplanes, planes, stride)
 52 |         self.bn1 = norm_layer(planes)
 53 |         self.relu = nn.ReLU(inplace=True)
 54 |         self.conv2 = conv3x3(planes, planes)
 55 |         self.bn2 = norm_layer(planes)
 56 |         self.downsample = downsample
 57 |         self.stride = stride
 58 | 
 59 |     def forward(self, x):
 60 |         identity = x
 61 | 
 62 |         out = self.conv1(x)
 63 |         out = self.bn1(out)
 64 |         out = self.relu(out)
 65 | 
 66 |         out = self.conv2(out)
 67 |         out = self.bn2(out)
 68 | 
 69 |         if self.downsample is not None:
 70 |             identity = self.downsample(x)
 71 | 
 72 |         out += identity
 73 |         out = self.relu(out)
 74 | 
 75 |         return out
 76 | 
 77 | 
 78 | class Bottleneck(nn.Module):
 79 |     # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
 80 |     # while original implementation places the stride at the first 1x1 convolution(self.conv1)
 81 |     # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
 82 |     # This variant is also known as ResNet V1.5 and improves accuracy according to
 83 |     # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
 84 | 
 85 |     expansion = 4
 86 | 
 87 |     def __init__(
 88 |         self,
 89 |         inplanes,
 90 |         planes,
 91 |         stride=1,
 92 |         downsample=None,
 93 |         groups=1,
 94 |         base_width=64,
 95 |         dilation=1,
 96 |         norm_layer=None,
 97 |     ):
 98 |         super(Bottleneck, self).__init__()
 99 |         if norm_layer is None:
100 |             norm_layer = nn.BatchNorm2d
101 |         width = int(planes * (base_width / 64.0)) * groups
102 | 
103 |         # Both self.conv2 and self.downsample layers downsample the input when stride != 1
104 |         self.conv1 = conv1x1(inplanes, width)
105 |         self.bn1 = norm_layer(width)
106 |         self.conv2 = conv3x3(width, width, stride, groups, dilation)
107 |         self.bn2 = norm_layer(width)
108 |         self.conv3 = conv1x1(width, planes * self.expansion)
109 |         self.bn3 = norm_layer(planes * self.expansion)
110 |         self.relu = nn.ReLU(inplace=True)
111 |         self.downsample = downsample
112 |         self.stride = stride
113 | 
114 |     def forward(self, x):
115 |         identity = x
116 | 
117 |         out = self.conv1(x)
118 |         out = self.bn1(out)
119 |         out = self.relu(out)
120 | 
121 |         out = self.conv2(out)
122 |         out = self.bn2(out)
123 |         out = self.relu(out)
124 | 
125 |         out = self.conv3(out)
126 |         out = self.bn3(out)
127 | 
128 |         if self.downsample is not None:
129 |             identity = self.downsample(x)
130 | 
131 |         out += identity
132 |         out = self.relu(out)
133 | 
134 |         return out
135 | 
136 | 
137 | class ResNet(nn.Module):
138 |     def __init__(
139 |         self,
140 |         block,
141 |         layer_channels,
142 |         channels,
143 |         num_classes=1000,
144 |         zero_init_residual=False,
145 |         groups=1,
146 |         width_per_group=64,
147 |         replace_stride_with_dilation=None,
148 |         norm_layer=None,
149 |     ):
150 |         super(ResNet, self).__init__()
151 |         if norm_layer is None:
152 |             norm_layer = nn.BatchNorm2d
153 |         self._norm_layer = norm_layer
154 | 
155 |         self.zero_init_residual = zero_init_residual
156 | 
157 |         self.inplanes = channels[0]
158 |         self.dilation = 1
159 |         if replace_stride_with_dilation is None:
160 |             # each element in the tuple indicates if we should replace
161 |             # the 2x2 stride with a dilated convolution instead
162 |             replace_stride_with_dilation = [False, False, False]
163 |         if len(replace_stride_with_dilation) != 3:
164 |             raise ValueError(
165 |                 "replace_stride_with_dilation should be None "
166 |                 "or a 3-element tuple, got {}".format(replace_stride_with_dilation)
167 |             )
168 |         self.groups = groups
169 |         self.base_width = width_per_group
170 | 
171 |         # Customised for CIFAR-10, needs to be large conv for imagenet
172 |         self.conv1 = nn.Conv2d(
173 |             3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False
174 |         )
175 |         self.bn1 = norm_layer(self.inplanes)
176 |         self.relu = nn.ReLU(inplace=True)
177 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
178 |         self.layer1 = self._make_layer(block, self.inplanes, layer_channels[0])
179 |         self.layer2 = self._make_layer(
180 |             block,
181 |             channels[1],
182 |             layer_channels[1],
183 |             stride=2,
184 |             dilate=replace_stride_with_dilation[0],
185 |         )
186 |         self.layer3 = self._make_layer(
187 |             block,
188 |             channels[2],
189 |             layer_channels[2],
190 |             stride=2,
191 |             dilate=replace_stride_with_dilation[1],
192 |         )
193 | 
194 |         if len(layer_channels) == 4:
195 |             self.layer4 = self._make_layer(
196 |                 block,
197 |                 channels[3],
198 |                 layer_channels[3],
199 |                 stride=2,
200 |                 dilate=replace_stride_with_dilation[2],
201 |             )
202 |             self.fc = nn.Linear(channels[3] * block.expansion, num_classes)
203 |         else:
204 |             # only three layers
205 |             self.fc = nn.Linear(channels[2] * block.expansion, num_classes)
206 | 
207 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
208 | 
209 |     def _initialize_weights(self):
210 |         for m in self.modules():
211 |             if isinstance(m, nn.Conv2d):
212 |                 # kaiming_uniform_ in default conv2d
213 |                 nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
214 |             elif isinstance(m, nn.Linear):
215 |                 # better accuracy and more stable pruning? - 94.7 accuracy
216 |                 nn.init.normal_(m.weight, 0, 0.01)
217 |                 # better pruning? - 94.4 accuracy
218 |                 # nn.init.xavier_normal_(m.weight)
219 |                 nn.init.constant_(m.bias, 0)
220 | 
221 |         # Zero-initialize the last BN in each residual branch,
222 |         # so that the residual branch starts with zeros, and each residual block behaves like an identity.
223 |         # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
224 |         if self.zero_init_residual:
225 |             for m in self.modules():
226 |                 if isinstance(m, Bottleneck):
227 |                     nn.init.constant_(m.bn3.weight, 0)
228 |                 elif isinstance(m, BasicBlock):
229 |                     nn.init.constant_(m.bn2.weight, 0)
230 | 
231 |     def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
232 |         norm_layer = self._norm_layer
233 |         downsample = None
234 |         previous_dilation = self.dilation
235 |         if dilate:
236 |             self.dilation *= stride
237 |             stride = 1
238 |         if stride != 1 or self.inplanes != planes * block.expansion:
239 |             downsample = nn.Sequential(
240 |                 conv1x1(self.inplanes, planes * block.expansion, stride),
241 |                 norm_layer(planes * block.expansion),
242 |             )
243 |             name = "ds_block"
244 |         else:
245 |             name = "n_block"
246 | 
247 |         layers = OrderedDict()
248 |         layers[name + "0"] = block(
249 |             self.inplanes,
250 |             planes,
251 |             stride,
252 |             downsample,
253 |             self.groups,
254 |             self.base_width,
255 |             previous_dilation,
256 |             norm_layer,
257 |         )
258 |         self.inplanes = planes * block.expansion
259 |         for i in range(1, blocks):
260 |             layers["n_block" + str(i)] = block(
261 |                 self.inplanes,
262 |                 planes,
263 |                 groups=self.groups,
264 |                 base_width=self.base_width,
265 |                 dilation=self.dilation,
266 |                 norm_layer=norm_layer,
267 |             )
268 | 
269 |         return nn.Sequential(layers)
270 | 
271 |     def _forward_impl(self, x):
272 |         # See note [TorchScript super()]
273 |         x = self.conv1(x)
274 |         x = self.bn1(x)
275 |         x = self.relu(x)
276 |         # x = self.maxpool(x) - Remove for now, back in with ImageNet
277 | 
278 |         x = self.layer1(x)
279 |         x = self.layer2(x)
280 |         x = self.layer3(x)
281 | 
282 |         if hasattr(self, "layer4"):
283 |             x = self.layer4(x)
284 | 
285 |         x = self.avgpool(x)
286 |         x = torch.flatten(x, 1)
287 |         x = self.fc(x)
288 | 
289 |         return x
290 | 
291 |     def forward(self, x):
292 |         return self._forward_impl(x)
293 | 
294 | 
295 | def _resnet(arch, block, layer_channels, channels, **kwargs):
296 |     model = ResNet(block, layer_channels, channels, **kwargs)
297 |     return model
298 | 
299 | 
300 | def resnet18(pretrained=False, progress=True, **kwargs):
301 |     r"""ResNet-18 model from
302 |     `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
303 | 
304 |     Args:
305 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
306 |         progress (bool): If True, displays a progress bar of the download to stderr
307 |     """
308 |     layer_channels = [64, 128, 256, 512]
309 |     return _resnet("resnet18", BasicBlock, [2, 2, 2, 2], layer_channels, **kwargs)
310 | 
311 | 
312 | def resnet32_grasp(**kwargs):
313 |     # https://github.com/alecwangcq/GraSP/blob/master/models/base/resnet.py#L55
314 |     layer_channels = [32, 64, 128]
315 | 
316 |     depth = 32
317 |     n = (depth - 2) // 6
318 | 
319 |     return _resnet("resnet32", BasicBlock, [n] * 3, layer_channels, **kwargs)
320 | 
321 | 
322 | def resnet32_eigendamage(**kwargs):
323 |     # https://github.com/alecwangcq/EigenDamage-Pytorch/blob/master/models/resnet.py#L111
324 | 
325 |     layer_channels = [64, 128, 256]
326 | 
327 |     depth = 32
328 |     n = (depth - 2) // 6
329 | 
330 |     return _resnet("resnet32", BasicBlock, [n] * 3, layer_channels, **kwargs)
331 | 


--------------------------------------------------------------------------------
/activetesting/models/cnn/wide_resnet.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.init as init
  6 | import torch.nn.functional as F
  7 | from torch.autograd import Variable
  8 | 
  9 | 
 10 | def conv3x3(in_planes, out_planes, stride=1):
 11 |     return nn.Conv2d(
 12 |         in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=True
 13 |     )
 14 | 
 15 | 
 16 | def conv_init(m):
 17 |     classname = m.__class__.__name__
 18 |     if classname.find("Conv") != -1:
 19 |         init.xavier_uniform_(m.weight, gain=1.414)
 20 |         init.constant_(m.bias, 0)
 21 |     elif classname.find("BatchNorm") != -1:
 22 |         init.constant_(m.weight, 1)
 23 |         init.constant_(m.bias, 0)
 24 | 
 25 | 
 26 | class wide_basic(nn.Module):
 27 |     def __init__(self, in_planes, planes, dropout_rate, stride=1):
 28 |         super(wide_basic, self).__init__()
 29 |         self.bn1 = nn.BatchNorm2d(in_planes)
 30 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, padding=1, bias=True)
 31 |         self.dropout = nn.Dropout(p=dropout_rate)
 32 |         self.bn2 = nn.BatchNorm2d(planes)
 33 |         self.conv2 = nn.Conv2d(
 34 |             planes, planes, kernel_size=3, stride=stride, padding=1, bias=True
 35 |         )
 36 | 
 37 |         self.downsample = None
 38 |         if stride != 1 or in_planes != planes:
 39 |             self.downsample = nn.Sequential(
 40 |                 nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=True),
 41 |             )
 42 | 
 43 |     def forward(self, x):
 44 |         out = self.dropout(self.conv1(F.relu(self.bn1(x))))
 45 |         out = self.conv2(F.relu(self.bn2(out)))
 46 | 
 47 |         if self.downsample is not None:
 48 |             identity = self.downsample(x)
 49 |         else:
 50 |             identity = x
 51 | 
 52 |         out += identity
 53 | 
 54 |         return out
 55 | 
 56 | 
 57 | class Wide_ResNet(nn.Module):
 58 |     def __init__(self, depth, widen_factor, dropout_rate, num_classes):
 59 |         super(Wide_ResNet, self).__init__()
 60 |         self.in_planes = 16
 61 | 
 62 |         assert (depth - 4) % 6 == 0, "Wide-resnet depth should be 6n+4"
 63 |         n = (depth - 4) / 6
 64 |         k = widen_factor
 65 | 
 66 |         nStages = [16, 16 * k, 32 * k, 64 * k]
 67 | 
 68 |         self.conv1 = conv3x3(3, nStages[0])
 69 |         self.layer1 = self._wide_layer(
 70 |             wide_basic, nStages[1], n, dropout_rate, stride=1
 71 |         )
 72 |         self.layer2 = self._wide_layer(
 73 |             wide_basic, nStages[2], n, dropout_rate, stride=2
 74 |         )
 75 |         self.layer3 = self._wide_layer(
 76 |             wide_basic, nStages[3], n, dropout_rate, stride=2
 77 |         )
 78 |         self.bn1 = nn.BatchNorm2d(nStages[3], momentum=0.9)
 79 |         self.linear = nn.Linear(nStages[3], num_classes)
 80 | 
 81 |     def _wide_layer(self, block, planes, num_blocks, dropout_rate, stride):
 82 |         strides = [stride] + [1] * (int(num_blocks) - 1)
 83 |         names = ["ds_block"] + ["n_block"] * (int(num_blocks) - 1)
 84 | 
 85 |         layers = OrderedDict()
 86 |         for i, stride in enumerate(strides):
 87 |             layers[names[i]] = block(self.in_planes, planes, dropout_rate, stride)
 88 |             self.in_planes = planes
 89 | 
 90 |         return nn.Sequential(layers)
 91 | 
 92 |     def forward(self, x):
 93 |         out = self.conv1(x)
 94 |         out = self.layer1(out)
 95 |         out = self.layer2(out)
 96 |         out = self.layer3(out)
 97 |         out = F.relu(self.bn1(out))
 98 |         out = F.avg_pool2d(out, 8)
 99 |         out = out.view(out.size(0), -1)
100 |         out = self.linear(out)
101 | 
102 |         return out
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     net = Wide_ResNet(28, 10, 0.3, 10)
107 |     y = net(Variable(torch.randn(1, 3, 32, 32)))
108 | 
109 |     print(y.size())
110 | 


--------------------------------------------------------------------------------
/activetesting/models/efficient_models.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from types import MethodType
  3 | 
  4 | 
  5 | def make_efficient(model, dataset):
  6 |     """Model is constant over acquisition.
  7 | 
  8 |     Exploit this for efficiency gains.
  9 |     Predict on all unobserved test data once at the beginning of training.
 10 |     Then, when predict is called, just regurgitate these predictions.
 11 | 
 12 |     Currently this does not save stds from predictions.
 13 | 
 14 |     If make_efficient is called twice, the model will predict again!
 15 |     """
 16 |     idxs = dataset.test_remaining
 17 |     x = dataset.x[idxs]
 18 | 
 19 |     if getattr(model, 'efficient_instance', False):
 20 |         if model.cfg.task_type == 'regression':
 21 |             out = model.real_predict(x, return_std=True)
 22 |         else:
 23 |             out = model.real_predict(x)
 24 |     else:
 25 |         if model.cfg.task_type == 'regression':
 26 |             out = model.predict(x, return_std=True)
 27 |         else:
 28 |             out = model.predict(x)
 29 |         model = EfficientModel(model)
 30 | 
 31 |     if isinstance(out, tuple):
 32 |         # Handle with std
 33 |         out = list(out)
 34 |         if out[0].ndim == 1:
 35 |             preds = np.zeros(dataset.N)
 36 |             stds = np.zeros(dataset.N)
 37 |         else:
 38 |             preds = np.zeros((dataset.N, out[0].shape[1]))
 39 |             stds = np.zeros((dataset.N, out[1].shape[1]))
 40 | 
 41 |         preds[idxs] = out[0]
 42 |         stds[idxs] = out[1]
 43 |         model.test_predictions = preds
 44 |         model.test_predictions_std = stds
 45 |     else:
 46 |         if out.ndim == 1:
 47 |             preds = np.zeros(dataset.N)
 48 |         else:
 49 |             preds = np.zeros((dataset.N, out.shape[1]))
 50 |         preds[idxs] = out
 51 |         model.test_predictions = preds
 52 |         model.test_predictions_std = None
 53 | 
 54 |     if getattr(model.model, 'has_mi', False):
 55 |         mis = np.zeros(dataset.N)
 56 |         mi = model.model.predict(x, mutual_info=True)
 57 |         mis[idxs] = mi
 58 |         model.test_predictions_mi = mis
 59 |     else:
 60 |         model.test_predictions_mi = None
 61 | 
 62 |     return model
 63 | 
 64 | 
 65 | class EfficientModel():
 66 | 
 67 |     def __init__(self, model):
 68 |         self.model = model
 69 |         self.cfg = self.model.cfg
 70 |         self.efficient_instance = True
 71 | 
 72 |     def fit(self, *args, **kwargs):
 73 |         if self.cfg.get('keep_constant', False):
 74 |             print('debug: no refitting, is efficient')
 75 |             pass
 76 |         else:
 77 |             return self.model.fit(self, *args, **kwargs)
 78 | 
 79 |     def real_fit(self, *args, **kwargs):
 80 |         return self.model.fit(*args, **kwargs)
 81 | 
 82 |     def predict(self, *args, **kwargs):
 83 |         return self.efficient_predict(*args, **kwargs)
 84 | 
 85 |     def real_predict(self, *args, **kwargs):
 86 |         return self.model.predict(*args, **kwargs)
 87 | 
 88 |     def efficient_predict(
 89 |             self, data, idxs, return_std=False, mutual_info=False,
 90 |             no_lazy=False, *args, **kwargs):
 91 | 
 92 |         if no_lazy:
 93 |             self.real_predict(
 94 |                 data, *args, return_std=return_std,
 95 |                 mutual_info=mutual_info, **kwargs)
 96 | 
 97 |         if return_std and self.test_predictions_std is not None:
 98 |             return (self.test_predictions[idxs],
 99 |                     self.test_predictions_std[idxs])
100 | 
101 |         elif mutual_info and self.test_predictions_mi is not None:
102 |             return self.test_predictions_mi[idxs]
103 | 
104 |         else:
105 |             return self.test_predictions[idxs]
106 | 


--------------------------------------------------------------------------------
/activetesting/models/radial_bnn/__init__.py:
--------------------------------------------------------------------------------
1 | from .bnn_models import RadialBNN
2 | 


--------------------------------------------------------------------------------
/activetesting/models/radial_bnn/bnn_models.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Defines models for active learning experiments.
  3 | """
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | from . import consistent_mc_dropout
  9 | 
 10 | from .radial_layers.variational_bayes import (
 11 |     SVI_Linear, SVIConv2D, SVIMaxPool2D)
 12 | 
 13 | 
 14 | class BNN(nn.Module):
 15 |     def __init__(self, p, channels):
 16 |         super().__init__()
 17 |         self.p = p
 18 |         self.conv1 = nn.Conv2d(1, channels, kernel_size=5)
 19 |         self.conv2 = nn.Conv2d(channels, 2*channels, kernel_size=5)
 20 |         self.fc_in_dim = 32 * channels
 21 |         self.fc1 = nn.Linear(self.fc_in_dim, 128)
 22 |         self.fc2 = nn.Linear(128, 10)
 23 | 
 24 |     def forward(self, x):
 25 |         x = F.relu(F.max_pool2d(F.dropout2d(self.conv1(x), p=self.p, training=True), 2))
 26 |         x = F.relu(F.max_pool2d(F.dropout2d(self.conv2(x), p=self.p, training=True), 2))
 27 |         x = x.view(-1, self.fc_in_dim)
 28 |         x = F.relu(F.dropout(self.fc1(x), p=self.p, training=True))
 29 |         x = self.fc2(x)
 30 |         return F.log_softmax(x, dim=1)
 31 | 
 32 | 
 33 | class RadialBNN(nn.Module):
 34 |     def __init__(self, channels):
 35 |         super(RadialBNN, self).__init__()
 36 |         prior = {"name": "gaussian_prior",
 37 |                  "sigma": 0.25,
 38 |                  "mu": 0}
 39 |         initial_rho = -4
 40 |         self.conv1 = SVIConv2D(1, channels, [5,5], "radial", prior, initial_rho, "he")
 41 |         self.conv2 = SVIConv2D(channels, channels * 2, [5, 5], "radial", prior, initial_rho, "he")
 42 |         self.fc_in_dim = 32 * channels
 43 |         self.fc1 = SVI_Linear(self.fc_in_dim, 128, initial_rho, "he", "radial", prior)
 44 |         self.fc2 = SVI_Linear(128, 10, initial_rho, "he", "radial", prior)
 45 |         self.maxpool = SVIMaxPool2D((2,2))
 46 | 
 47 |     def forward(self, x):
 48 |         x = F.relu(self.maxpool(self.conv1(x)))
 49 |         x = F.relu(self.maxpool(self.conv2(x)))
 50 |         variational_samples = x.shape[1]
 51 |         x = x.view(-1, variational_samples, self.fc_in_dim)
 52 |         x = F.relu(self.fc1(x))
 53 |         x = self.fc2(x)
 54 |         return F.log_softmax(x, dim=2)
 55 | 
 56 | 
 57 | class TinyRadialBNN(nn.Module):
 58 |     def __init__(self):
 59 |         super(TinyRadialBNN, self).__init__()
 60 |         prior = {"name": "gaussian_prior",
 61 |                  "sigma": 0.25,
 62 |                  "mu": 0}
 63 |         initial_rho = -4
 64 |         self.fc1 = SVI_Linear(784, 50, initial_rho, "he", "radial", prior)
 65 |         self.fc2 = SVI_Linear(50, 10, initial_rho, "he", "radial", prior)
 66 | 
 67 |     def forward(self, x):
 68 |         variational_samples = x.shape[1]
 69 |         x = x.view(-1, variational_samples, 784)
 70 |         x = F.relu(self.fc1(x))
 71 |         x = self.fc2(x)
 72 |         return F.log_softmax(x, dim=2)
 73 | 
 74 | class LinearRadialBNN(nn.Module):
 75 |     def __init__(self):
 76 |         super(TinyRadialBNN, self).__init__()
 77 |         prior = {"name": "gaussian_prior",
 78 |                  "sigma": 0.25,
 79 |                  "mu": 0}
 80 |         initial_rho = -4
 81 |         self.fc1 = SVI_Linear(784,10, initial_rho, "he", "radial", prior)
 82 | 
 83 |     def forward(self, x):
 84 |         variational_samples = x.shape[1]
 85 |         x = x.view(-1, variational_samples, 784)
 86 |         x = self.fc1(x)
 87 |         return F.log_softmax(x, dim=2)
 88 | 
 89 | 
 90 | class ToyBNN(nn.Module):
 91 |     def __init__(self, p):
 92 |         super().__init__()
 93 |         self.p = p
 94 |         self.fc1 = nn.Linear(2, 50)
 95 |         self.fc2 = nn.Linear(50, 2)
 96 | 
 97 |     def forward(self, x):
 98 |         x = x.view(-1, 2)
 99 |         x = F.relu(F.dropout(self.fc1(x), p=self.p, training=True))
100 |         x = self.fc2(x)
101 |         return F.log_softmax(x, dim=1)
102 | 
103 | 
104 | class ConsistentBNN(consistent_mc_dropout.BayesianModule):
105 |     def __init__(self, num_classes=10):
106 |         super().__init__()
107 | 
108 |         self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
109 |         self.conv1_drop = consistent_mc_dropout.ConsistentMCDropout2d()
110 |         self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
111 |         self.conv2_drop = consistent_mc_dropout.ConsistentMCDropout2d()
112 |         self.fc1 = nn.Linear(1024, 128)
113 |         self.fc1_drop = consistent_mc_dropout.ConsistentMCDropout()
114 |         self.fc2 = nn.Linear(128, num_classes)
115 | 
116 |     def mc_forward_impl(self, input: torch.Tensor):
117 |         input = F.relu(F.max_pool2d(self.conv1_drop(self.conv1(input)), 2))
118 |         input = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(input)), 2))
119 |         input = input.view(-1, 1024)
120 |         input = F.relu(self.fc1_drop(self.fc1(input)))
121 |         input = self.fc2(input)
122 |         input = F.log_softmax(input, dim=1)
123 | 
124 |         return input


--------------------------------------------------------------------------------
/activetesting/models/radial_bnn/consistent_mc_dropout.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.nn import Module
  3 | 
  4 | class BayesianModule(Module):
  5 |     """A module that we can sample multiple times from given a single input batch.
  6 |     To be efficient, the module allows for a part of the forward pass to be deterministic.
  7 |     """
  8 | 
  9 |     k = None
 10 | 
 11 |     def __init__(self):
 12 |         super().__init__()
 13 | 
 14 |     # Returns B x n x output
 15 |     def forward(self, input_B: torch.Tensor, k: int):
 16 |         BayesianModule.k = k
 17 | 
 18 |         mc_input_BK = BayesianModule.mc_tensor(input_B, k)
 19 |         mc_output_BK = self.mc_forward_impl(mc_input_BK)
 20 |         mc_output_B_K = BayesianModule.unflatten_tensor(mc_output_BK, k)
 21 |         return mc_output_B_K
 22 | 
 23 |     def mc_forward_impl(self, mc_input_BK: torch.Tensor):
 24 |         return mc_input_BK
 25 | 
 26 |     @staticmethod
 27 |     def unflatten_tensor(input: torch.Tensor, k: int):
 28 |         input = input.view([-1, k] + list(input.shape[1:]))
 29 |         return input
 30 | 
 31 |     @staticmethod
 32 |     def flatten_tensor(mc_input: torch.Tensor):
 33 |         return mc_input.flatten(0, 1)
 34 | 
 35 |     @staticmethod
 36 |     def mc_tensor(input: torch.tensor, k: int):
 37 |         mc_shape = [input.shape[0], k] + list(input.shape[1:])
 38 |         return input.unsqueeze(1).expand(mc_shape).flatten(0, 1)
 39 | 
 40 | 
 41 | class _ConsistentMCDropout(Module):
 42 |     def __init__(self, p=0.5):
 43 |         super().__init__()
 44 | 
 45 |         if p < 0 or p > 1:
 46 |             raise ValueError("dropout probability has to be between 0 and 1, " "but got {}".format(p))
 47 | 
 48 |         self.p = p
 49 |         self.mask = None
 50 | 
 51 |     def extra_repr(self):
 52 |         return "p={}".format(self.p)
 53 | 
 54 |     def reset_mask(self):
 55 |         self.mask = None
 56 | 
 57 |     def train(self, mode=True):
 58 |         super().train(mode)
 59 |         if not mode:
 60 |             self.reset_mask()
 61 | 
 62 |     def _get_sample_mask_shape(self, sample_shape):
 63 |         return sample_shape
 64 | 
 65 |     def _create_mask(self, input, k):
 66 |         mask_shape = [1, k] + list(self._get_sample_mask_shape(input.shape[1:]))
 67 |         mask = torch.empty(mask_shape, dtype=torch.bool, device=input.device).bernoulli_(self.p)
 68 |         return mask
 69 | 
 70 |     def forward(self, input: torch.Tensor):
 71 |         if self.p == 0.0:
 72 |             return input
 73 | 
 74 |         k = BayesianModule.k
 75 |         if self.training:
 76 |             # Create a new mask on each call and for each batch element.
 77 |             k = input.shape[0]
 78 |             mask = self._create_mask(input, k)
 79 |         else:
 80 |             if self.mask is None:
 81 |                 # print('recreating mask', self)
 82 |                 # Recreate mask.
 83 |                 self.mask = self._create_mask(input, k)
 84 | 
 85 |             mask = self.mask
 86 | 
 87 |         mc_input = BayesianModule.unflatten_tensor(input, k)
 88 |         mc_output = mc_input.masked_fill(mask, 0) / (1 - self.p)
 89 | 
 90 |         # Flatten MCDI, batch into one dimension again.
 91 |         return BayesianModule.flatten_tensor(mc_output)
 92 | 
 93 | 
 94 | class ConsistentMCDropout(_ConsistentMCDropout):
 95 |     r"""Randomly zeroes some of the elements of the input
 96 |     tensor with probability :attr:`p` using samples from a Bernoulli
 97 |     distribution. The elements to zero are randomized on every forward call during training time.
 98 |     During eval time, a fixed mask is picked and kept until `reset_mask()` is called.
 99 |     This has proven to be an effective technique for regularization and
100 |     preventing the co-adaptation of neurons as described in the paper
101 |     `Improving neural networks by preventing co-adaptation of feature
102 |     detectors`_ .
103 |     Furthermore, the outputs are scaled by a factor of :math:`\frac{1}{1-p}` during
104 |     training. This means that during evaluation the module simply computes an
105 |     identity function.
106 |     Args:
107 |         p: probability of an element to be zeroed. Default: 0.5
108 |         inplace: If set to ``True``, will do this operation in-place. Default: ``False``
109 |     Shape:
110 |         - Input: `Any`. Input can be of any shape
111 |         - Output: `Same`. Output is of the same shape as input
112 |     Examples::
113 |         >>> m = nn.Dropout(p=0.2)
114 |         >>> input = torch.randn(20, 16)
115 |         >>> output = m(input)
116 |     .. _Improving neural networks by preventing co-adaptation of feature
117 |         detectors: https://arxiv.org/abs/1207.0580
118 |     """
119 |     pass
120 | 
121 | 
122 | class ConsistentMCDropout2d(_ConsistentMCDropout):
123 |     r"""Randomly zeroes whole channels of the input tensor.
124 |     The channels to zero-out are randomized on every forward call.
125 |     During eval time, a fixed mask is picked and kept until `reset_mask()` is called.
126 |     Usually the input comes from :class:`nn.Conv2d` modules.
127 |     As described in the paper
128 |     `Efficient Object Localization Using Convolutional Networks`_ ,
129 |     if adjacent pixels within feature maps are strongly correlated
130 |     (as is normally the case in early convolution layers) then i.i.d. dropout
131 |     will not regularize the activations and will otherwise just result
132 |     in an effective learning rate decrease.
133 |     In this case, :func:`nn.Dropout2d` will help promote independence between
134 |     feature maps and should be used instead.
135 |     Args:
136 |         p (float, optional): probability of an element to be zero-ed.
137 |         inplace (bool, optional): If set to ``True``, will do this operation
138 |             in-place
139 |     Shape:
140 |         - Input: :math:`(N, C, H, W)`
141 |         - Output: :math:`(N, C, H, W)` (same shape as input)
142 |     Examples::
143 |         >>> m = nn.Dropout2d(p=0.2)
144 |         >>> input = torch.randn(20, 16, 32, 32)
145 |         >>> output = m(input)
146 |     .. _Efficient Object Localization Using Convolutional Networks:
147 |        http://arxiv.org/abs/1411.4280
148 |     """
149 | 
150 |     def _get_sample_mask_shape(self, sample_shape):
151 |         return [sample_shape[0]] + [1] * (len(sample_shape) - 1)


--------------------------------------------------------------------------------
/activetesting/models/radial_bnn/radial_layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jlko/active-testing/2807fac9ad91fb12e5814e71b9a26c55df9d50fb/activetesting/models/radial_bnn/radial_layers/__init__.py


--------------------------------------------------------------------------------
/activetesting/models/radial_bnn/radial_layers/distributions.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | # Priors
  4 | 
  5 | def gaussian_prior(name,
  6 |                    log2pi,
  7 |                    mu,
  8 |                    sigma,
  9 |                    device):
 10 |     """
 11 |     Args:
 12 |         *args: {"mu": , "sigma":, "log2pi"}
 13 | 
 14 |     Returns: log_gaussian_pdf that takes a weight of arbitrary shape
 15 | 
 16 |     """
 17 |     if mu == 0 and sigma == 1:
 18 |         # We handle this case slightly differently as it is common and can be made more efficient
 19 |         def log_gaussian_pdf(x):
 20 |             x = x.view(x.shape[0], -1)
 21 |             return - log2pi * x.shape[1] / 2 - torch.sum(x**2) / 2.
 22 |         return log_gaussian_pdf
 23 |     else:
 24 |         mu_tensor = torch.tensor(mu, requires_grad=False, dtype=torch.float32, device=device)
 25 |         sigma_tensor = torch.tensor(sigma, requires_grad=False, dtype=torch.float32, device=device)
 26 |         two_sigma_squared = 2 * (sigma_tensor ** 2)
 27 |         log_sigma = torch.log(sigma_tensor)
 28 | 
 29 |         def log_gaussian_pdf(x):
 30 |             x = x.view(x.shape[0], -1)
 31 |             log_pd = - log2pi * x.shape[1] / 2
 32 |             log_pd = log_pd - torch.sum((x - mu_tensor) ** 2, dim=1) / two_sigma_squared
 33 |             log_pd = log_pd - log_sigma * x.shape[1] / 2
 34 |             return log_pd
 35 | 
 36 |         return log_gaussian_pdf
 37 | 
 38 | def scale_mixture_prior(name,
 39 |                         log2pi,
 40 |                         mus,
 41 |                         sigmas,
 42 |                         pi,
 43 |                         device):
 44 |     first_log_gaussian_pdf = gaussian_prior(name, log2pi, mus[0], sigmas[0], device)
 45 |     second_log_gaussian_pdf = gaussian_prior(name, log2pi, mus[1], sigmas[1], device)
 46 | 
 47 |     def log_mixture_pdf(x):
 48 |         return pi * first_log_gaussian_pdf(x) + (1 - pi) * second_log_gaussian_pdf(x)
 49 | 
 50 |     return log_mixture_pdf
 51 | 
 52 | 
 53 | # Sampling noise distributions
 54 | 
 55 | def radial(size):
 56 |     """
 57 |     Creates a distribution that is unit Gaussian along r and uniform over \theta.
 58 | 
 59 |     :param size: The size of the weight distribution to be generated.
 60 |                     Zeroth dimension is variational samples.
 61 |                     1+ dimensions are the weight for each sample from the variational distribution.
 62 |                     The same weight is applied to each example in a batch.
 63 |     :return: noise distribution
 64 |     """
 65 |     if torch.cuda.is_available():
 66 |         device = 'cuda'
 67 |     else:
 68 |         device = 'cpu'
 69 | 
 70 |     # First we find a random direction (\epsilon_{\text{MFVI}} in equation (3) on page 4)
 71 |     epsilon_mfvi = torch.randn(size, device=device)
 72 | 
 73 |     # Then we pick a distance (r in equation (3) on page 4)
 74 |     distance = torch.randn((size[0]), device=device)
 75 | 
 76 |     # Then we normalize each variational sample independently
 77 |     if len(size) == 2:
 78 |         normalizing_factor = torch.norm(epsilon_mfvi.view(size[0], -1), p=2, dim=1).unsqueeze(1)
 79 |         distance = distance.unsqueeze(1)
 80 |     elif len(size) == 3:
 81 |         normalizing_factor = torch.norm(epsilon_mfvi.view(size[0], -1), p=2, dim=1).unsqueeze(1).unsqueeze(1)
 82 |         distance = distance.unsqueeze(1).unsqueeze(1)
 83 |     elif len(size) == 5:
 84 |         # Here we have a CNN with dimensions (var samples, out_channels, in_channels, kernel, kernel)
 85 |         normalizing_factor = torch.norm(epsilon_mfvi.view(size[0], -1), p=2, dim=1).unsqueeze(1).unsqueeze(1).unsqueeze(
 86 |             1).unsqueeze(1)
 87 |         distance = distance.unsqueeze(1).unsqueeze(1).unsqueeze(1).unsqueeze(1)
 88 |     else:
 89 |         raise ValueError("Number of dimensions for epsilon not expected. Are you sure you wanted size {}".format(size))
 90 | 
 91 |     direction = epsilon_mfvi / normalizing_factor
 92 |     epsilon_radial = direction * distance
 93 |     return epsilon_radial
 94 | 
 95 | 
 96 | def gaussian(size):
 97 |     """
 98 |     Returns a tensor of random epsilon using the default gaussian unit distribution
 99 |     :param size: shape of tensor to return (tuple)
100 |     :return: FloatTensor of Size
101 |     """
102 |     if torch.cuda.is_available():
103 |         device = 'cuda'
104 |     else:
105 |         device = 'cpu'
106 | 
107 |     epsilon_mfvi = torch.randn(size,
108 |                     device=device)
109 |     return epsilon_mfvi


--------------------------------------------------------------------------------
/activetesting/models/radial_bnn/radial_layers/loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | 
  4 | 
  5 | class Binary_cross_entropy():
  6 |     def compute_loss(self, y_input, y_target):
  7 |         return F.binary_cross_entropy_with_logits(y_input, y_target.type(torch.cuda.FloatTensor))
  8 | 
  9 |     def set_model(self, blank1, blank2):
 10 |         # Placeholder for compatibility with Elbo
 11 |         return
 12 | 
 13 | 
 14 | class Nll_Loss():
 15 | 
 16 |     def __init__(self):
 17 |         return
 18 | 
 19 |     def compute_loss(self, y_input, y_target):
 20 |         return 0, 0, F.nll_loss(y_input, y_target.squeeze())
 21 | 
 22 | 
 23 | class Mse_Loss():
 24 | 
 25 |     def __init__(self):
 26 |         return
 27 | 
 28 |     def compute_loss(self, y_input, y_target):
 29 |         return F.mse_loss(y_input, y_target)
 30 | 
 31 | 
 32 | class Elbo():
 33 | 
 34 |     def __init__(self,
 35 |                  binary,
 36 |                  regression):
 37 |         # In the case of binary classification we make different assumption about the input dimension
 38 |         self.binary = binary
 39 |         self.regression = regression
 40 |         self.writer = None
 41 |         return
 42 | 
 43 |     def set_model(self, model, batch_size):
 44 |         self.model = model
 45 |         self.batch_size = batch_size
 46 | 
 47 |     def set_num_batches(self, num_batches):
 48 |         self.num_batches = num_batches
 49 | 
 50 |     def set_writer(self, writer):
 51 |         self.writer = writer
 52 | 
 53 |     def compute_loss(self, y_predicted, y_target):
 54 |         """
 55 |         Estimates the variational free energy loss function ELBO. Note that the KL divergence terms are computed
 56 |         within the variational mean-field layers, and we loop over them here.
 57 |         :param y_predicted: output of the forward pass
 58 |         :param y_target: target from training/test knowledge
 59 |         :return:
 60 |         """
 61 |         # The overall loss is
 62 |         # - ELBO = entropy_sum - cross_entropy_sum - negative_log_likelihood_sum
 63 |         # We calculate each separately. The first two depend on weights, the last on data.
 64 | 
 65 |         # This term accumulates the cross-entropy between the posterior and prior
 66 |         # L_\text{cross-entropy} in the paper
 67 |         # \int q(w) log[p(w)] dw
 68 |         # This is estimated using MC integration.
 69 |         cross_entropy_sum = 0
 70 | 
 71 |         # This term accumulates the entropy of the variational posterior
 72 |         # L_\text{entropy} in the paper
 73 |         # \int q(w) log[q(w)] dw
 74 |         # This is found analytically up to a constant and is shown in the paper to be
 75 |         # -\sum_i log[\sigma_i] + c
 76 |         # Where i sums over the weights. Which is up to a constant the same
 77 |         # as when w is distributed with a multivariate Gaussian
 78 |         entropy_sum = 0
 79 | 
 80 |         for module in self.model.modules():
 81 |             # Iterating over all radial_layers, including one representing the module as a whole.
 82 |             # So check if it supports the loss.
 83 |             if hasattr(module, "cross_entropy"):
 84 |                 cross_entropy_sum += module.cross_entropy()
 85 |             if hasattr(module, "entropy"):
 86 |                 entropy_sum += module.entropy()
 87 | 
 88 |         # Estimate the log likelihood of the data given the parameters
 89 |         # log(P(D|*w*))
 90 |         # Note that there is log softmax inside the model and that NLL loss performs elementwise mean by default
 91 |         # Instead, we want to mean over samples from the variational distirbution and sum over examples
 92 |         # y_input: Tensor predictions [examples, samples, classes]
 93 |         # target: Tensor of targets [examples]
 94 | 
 95 |         # First we add a samples dimension
 96 |         epoch_variational_samples = y_predicted.shape[1]
 97 |         y_target = y_target.unsqueeze(1)
 98 |         y_target = y_target.expand((-1, epoch_variational_samples))
 99 | 
100 |         # In the case of regression we must estimate the target noise
101 |         if self.regression:
102 |             y_target = y_target.mean(dim=1)
103 |             variance = y_target.var(dim=0)
104 |             nll_tensor = (y_target - y_predicted.squeeze(dim=2).mean(dim=1)) ** 2 / (2 * variance)
105 |         else:
106 |             if self.binary:
107 |                 nll_tensor = F.binary_cross_entropy_with_logits(
108 |                     y_predicted.squeeze(dim=2),
109 |                     y_target.type(torch.cuda.FloatTensor),
110 |                     reduction='none')
111 |             else:
112 |                 nll_tensor = F.nll_loss(
113 |                     y_predicted.permute(0, 2, 1),
114 |                     y_target,
115 |                     reduction="none")
116 | 
117 |         if len(nll_tensor.shape) > 1:
118 |             # This should be taking the expectation over epsilon.
119 |             # We squeeze this in the binary case, so need to be careful
120 |             nll_tensor = torch.mean(nll_tensor, dim=1)
121 | 
122 |         nll_sum = torch.sum(nll_tensor)
123 | 
124 |         if self.writer is not None:
125 |             self.writer.add_scalar('cross-entropy', cross_entropy_sum/self.num_batches)
126 |             self.writer.add_scalar('entropy', entropy_sum/self.num_batches)
127 |             self.writer.add_scalar('nll_loss', nll_sum)
128 |         kl_divergence_estimated_over_batch = (cross_entropy_sum - entropy_sum) / self.num_batches
129 | 
130 |         return nll_tensor / self.batch_size, kl_divergence_estimated_over_batch / self.batch_size
131 | 


--------------------------------------------------------------------------------
/activetesting/models/radial_bnn/radial_layers/variational_bayes.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import copy
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | import numpy as np
  7 | 
  8 | from torch.autograd import Variable
  9 | from . import distributions
 10 | 
 11 | class SVI_Base(nn.Module):
 12 |     """
 13 |     Base class for Stochastic Variational Inference. Called by layers. Currently implemented:
 14 |     SVI_Linear
 15 |     SVI_Conv2D
 16 |     SVIMaxPool2D
 17 |     SVIGlobalMaxPool2D
 18 |     SVIAverageMaxPool2D
 19 |     """
 20 | 
 21 |     def __init__(self,
 22 |                  weight_shape,
 23 |                  bias_shape,
 24 |                  variational_distribution,
 25 |                  prior,
 26 |                  use_bias):
 27 |         super(SVI_Base, self).__init__()
 28 | 
 29 |         self.data_type = torch.float32
 30 | 
 31 |         self.weight_rhos = nn.Parameter(torch.empty(weight_shape, dtype=self.data_type))
 32 |         self.weight_mus = nn.Parameter(torch.empty(weight_shape, dtype=self.data_type))
 33 |         self.weight = Variable(torch.empty(weight_shape, dtype=self.data_type))
 34 |         self.use_bias = use_bias
 35 |         if use_bias:
 36 |             self.bias_rhos = nn.Parameter(torch.empty(bias_shape, dtype=self.data_type))
 37 |             self.bias_mus = nn.Parameter(torch.empty(bias_shape, dtype=self.data_type))
 38 |             self.bias = Variable(torch.empty(bias_shape, dtype=self.data_type))
 39 |         else:
 40 |             self.register_parameter('bias_rhos', None)
 41 |             self.register_parameter('bias_mus', None)
 42 |             self.register_parameter('bias', None)
 43 | 
 44 |         # The prior log probability density function is any function that takes takes a Tensor of weight and returns
 45 |         # a Tensor of the same shape with the log probability density of those points.
 46 |         # gaussian_prior is implemented
 47 |         assert hasattr(distributions, prior['name']), "The prior named in config is not defined in utils.distributions"
 48 |         prior_args = copy.deepcopy(prior)
 49 |         prior_args["log2pi"] = torch.log(Variable(torch.from_numpy(np.array(2.0 * np.pi)).type(self.data_type),
 50 |                                          requires_grad=False))
 51 |         prior_args["device"] = 'cpu'
 52 |         if torch.cuda.is_available():
 53 |             prior_args["log2pi"] = prior_args["log2pi"].cuda()
 54 |             prior_args["device"] = 'cuda'
 55 |         self.prior_log_pdf = getattr(distributions, prior_args['name'])(**prior_args)
 56 | 
 57 |         # The variational distribution must take a size and return a sample from the noise distribution of the same size
 58 |         assert hasattr(distributions, variational_distribution), "The variational distribution is not defined in util.distributions"
 59 |         self.noise_distribution = getattr(distributions, variational_distribution)
 60 | 
 61 |         # The pretraining flag is controlled in the training loop
 62 |         # In the configuration file, set trainer["pretrain_epochs"] to a non-zero integer
 63 |         # While pretraining, no noise is sampled and only the means are optimized with an NLL loss
 64 |         # This helps stabilize training and is especially important for standard
 65 |         # MFVI with multivariate Gaussians.
 66 |         self.pretraining = False
 67 | 
 68 | 
 69 |     def _rho_to_sigma(self, rho):
 70 |         """
 71 |         We actually parameterize sigma with rho.
 72 |          Sigma is softplus rho, which ensures that we have positive standard deviation.
 73 |         :param rho: tensor of rhos
 74 |         :return: tensor of sigmas
 75 |         """
 76 |         return torch.log(1 + torch.exp(rho))
 77 | 
 78 |     def entropy(self):
 79 |         """
 80 |         Calculates the entropy:
 81 |         -\int q(w) log q(w)
 82 |         of the variational posterior up to a constant.
 83 |         For both the radial and multivariate Gaussian approximating distributions, this is:
 84 |         \sum_i log \sigma_i + c
 85 |         where i indexes over the weights.
 86 |         Returns: entropy of the approximate posterior up to a constant.
 87 |         """
 88 |         if not self.pretraining:
 89 |             entropy = torch.sum(torch.log(self._rho_to_sigma(self.weight_rhos)))
 90 |             if self.use_bias:
 91 |                 entropy += torch.sum(torch.log(self._rho_to_sigma(self.bias_rhos)))
 92 |             return entropy
 93 |         else:
 94 |             return 0
 95 | 
 96 |     def cross_entropy(self):
 97 |         """
 98 |         Estimates the cross entropy between the variational posterior and prior
 99 |         - \int q(w) log(p(w)) dw
100 |         using Monte Carlo integration.
101 |         We find that this is a fairly low-variance estimator.
102 |         Returns: cross-entropy
103 | 
104 |         """
105 |         if not self.pretraining:
106 |             weight_log_prior_mean_over_epsilon = torch.mean(self.prior_log_pdf(self.weight), dim=0)
107 |             cross_entropy = -torch.sum(weight_log_prior_mean_over_epsilon)
108 |             if self.use_bias:
109 |                 bias_log_prior_mean_over_epsilon = torch.mean(self.prior_log_pdf(self.bias), dim=0)
110 |                 cross_entropy -= torch.sum(bias_log_prior_mean_over_epsilon)
111 |             return cross_entropy
112 |         else:
113 |             return 0
114 | 
115 |     def is_pretraining(self, pretraining_on):
116 |         self.pretraining = pretraining_on
117 |         return 1
118 | 
119 | 
120 | class SVI_Linear(SVI_Base):
121 |     """Models an independent Gaussian/mean-field approximation neural network. Based on
122 |     pytorch module for nn.Linear"""
123 | 
124 |     def __init__(self,
125 |                  in_features,
126 |                  out_features,
127 |                  initial_rho,
128 |                  initial_mu,
129 |                  variational_distribution,
130 |                  prior,
131 |                  use_bias=True):
132 |         """
133 |         Initializes weights and biases of a linear layer with stochastic variational inference over the weights.
134 |         :param in_features: Number of inputs features to the layer
135 |         :param out_features: Number of outputs from the leayer
136 |         :param initial_rho: controls starting variance of layer (sigma = log(1+exp(rho))
137 |         :param initial_mu: initial variance of mu as a zero-mean Gaussian or "he" uses Kaiming He initialization
138 |         :param use_bias: flag for use of bias term default True
139 |         """
140 |         super(SVI_Linear, self).__init__((out_features, in_features),
141 |                                          (out_features),
142 |                                          variational_distribution,
143 |                                          prior,
144 |                                          use_bias)
145 |         self.reset_parameters(initial_rho, initial_mu)
146 | 
147 | 
148 |     def reset_parameters(self, initial_rho, mu_std):
149 |         """Randomly populates mus by Gaussian distribution around zero
150 |         and sets all rhos to preset value"""
151 |         if mu_std == 'he':
152 |             # He Kaiming for mus, assuming Leaky ReLUs with gradient -0.2
153 |             fan_in = self.weight_mus.shape[1]
154 |             std = math.sqrt(1.92 / fan_in)
155 |         elif isinstance(mu_std, (int, float)):
156 |             std = mu_std
157 |         else:
158 |             ValueError("Standard deviation of mu was {}. Expected 'he' or an int/float")
159 |         self.weight_rhos.data.normal_(initial_rho, std=0.5)
160 |         self.weight_mus.data.normal_(std=std)
161 |         if self.bias_mus is not None:
162 |             self.bias_rhos.data.normal_(initial_rho, std=0.5)
163 |             self.bias_mus.data.normal_(std=std)
164 | 
165 |     def forward(self, x):
166 |         """
167 |         Computes the weights using reparameterization trick and then does a forward pass
168 |         :param x: tensor of examples [examples, samples, features]
169 |         :return: tensor of features to next layer
170 |         """
171 |         if not self.pretraining:
172 |             # We transform our parameterisation in rho into sigma
173 |             weight_sigma = self._rho_to_sigma(self.weight_rhos)  # [in_features, out_features]
174 |             if self.use_bias:
175 |                 bias_sigma = self._rho_to_sigma(self.bias_rhos)  # [out_features]
176 |             # Now we compute the random noise
177 |             # We deduce the number of training samples from the second dimension of the input data
178 |             train_samples = x.size()[1]
179 |             # torch.Size() has base class tuple, so we add a singleton for the new size
180 |             # This gives weight_epsilon size (training_samples, in_features, out_features)
181 |             weight_epsilon = Variable(self.noise_distribution((train_samples,) + self.weight_mus.size()))
182 |             if self.use_bias:
183 |                 # bias_epsilon [training_samples, out_features]
184 |                 bias_epsilon = Variable(self.noise_distribution((train_samples,) + self.bias_mus.size()))
185 |             # And determine the parameters *w*
186 | 
187 |             self.weight = torch.addcmul(self.weight_mus, weight_sigma, weight_epsilon)
188 |             output = torch.einsum('ijk,jlk->ijl', [x, self.weight])
189 |             if self.use_bias:
190 |                 self.bias = torch.addcmul(self.bias_mus, bias_sigma, bias_epsilon)
191 |                 output = output + self.bias
192 |         else:
193 |             output = torch.einsum('ijk,lk->ijl', [x, self.weight_mus])
194 |             if self.use_bias:
195 |                 output = output + self.bias_mus
196 |         return output
197 | 
198 | 
199 | class _SVIConvNd(SVI_Base):
200 | 
201 |     __constants__ = ['stride', 'padding', 'dilation', 'groups', 'bias']
202 | 
203 |     def __init__(self,
204 |                  in_channels,
205 |                  out_channels,
206 |                  kernel_size,
207 |                  stride,
208 |                  padding,
209 |                  dilation,
210 |                  transposed,
211 |                  output_padding,
212 |                  groups,
213 |                  use_bias,
214 |                  variational_distribution,
215 |                  prior,
216 |                  initial_rho,
217 |                  initial_mu_std):
218 | 
219 |         if in_channels % groups != 0:
220 |             raise ValueError('in_channels must be divisible by groups')
221 |         if out_channels % groups != 0:
222 |             raise ValueError('out_channels must be divisible by groups')
223 |         if transposed:
224 |             weight_shape = (in_channels, out_channels // groups, *kernel_size)
225 |         else:
226 |             weight_shape = (out_channels, in_channels // groups, *kernel_size)
227 |         bias_shape = (out_channels)
228 |         super(_SVIConvNd, self).__init__(weight_shape, bias_shape, variational_distribution, prior, use_bias)
229 |         self.in_channels = in_channels
230 |         self.out_channels = out_channels
231 |         self.kernel_size = kernel_size
232 |         self.stride = stride
233 |         self.padding = padding
234 |         self.dilation = dilation
235 |         self.transposed = transposed
236 |         self.output_padding = output_padding
237 |         self.groups = groups
238 | 
239 |         self.reset_parameters(initial_rho, initial_mu_std)
240 | 
241 |     def reset_parameters(self, initial_rho, mu_std):
242 |         self.weight_rhos.data.normal_(initial_rho, std=0.5)
243 |         if mu_std == 'he':
244 |             # Using pytorch's recommendation for Leaky Relu :math:`\sqrt{\frac{2}{1 + \text{negative\_slope}^2}}`
245 |             torch.nn.init.kaiming_uniform_(self.weight_mus, math.sqrt(1.92))
246 |         elif isinstance(mu_std, (int, float)):
247 |             self.weight_mus.data.normal_(std=mu_std)
248 |         else:
249 |             ValueError("Standard deviation of mu was {}. Expected 'he' or an int/float")
250 |         if self.bias_mus is not None:
251 |             self.bias_rhos.data.normal_(initial_rho, std=0.5)
252 |             fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weight_mus)
253 |             bound = 1 / math.sqrt(fan_in)
254 |             torch.nn.init.uniform_(self.bias_mus, -bound, bound)
255 | 
256 |     def extra_repr(self):
257 |         s = ('{in_channels}, {out_channels}, kernel_size={kernel_size}'
258 |              ', stride={stride}')
259 |         if self.padding != 0:
260 |             s += ', padding={padding}'
261 |         if self.bias is None:
262 |             s += ', bias=False'
263 |         return s.format(**self.__dict__)
264 | 
265 | 
266 | class SVIConv2D(_SVIConvNd):
267 |     """Models an independent Gaussian/mean-field approximation neural network. Based on
268 |     pytorch module for nnConv2d."""
269 | 
270 |     def __init__(self, in_channels, out_channels, kernel_size, variational_distribution, prior, initial_rho, mu_std,
271 |                  stride=(1,1), padding=0, dilation=1, groups=1, bias=True):
272 |         if dilation != 1:
273 |             raise NotImplementedError
274 |         if groups != 1:
275 |             raise NotImplementedError
276 |         if padding < 0:
277 |             raise ValueError("Padding for SVIConv2D must be 0 or greater.")
278 |         if stride[0] < 1 or stride[1] < 1:
279 |             raise ValueError("Padding for SVIConv2D must be 1 or greater.")
280 |         super(SVIConv2D, self).__init__(
281 |             in_channels, out_channels, kernel_size, stride, padding, dilation,
282 |             False, 0, groups, bias, variational_distribution, prior, initial_rho, mu_std)
283 | 
284 |     def forward(self, x):
285 |         """
286 |         Computes the weights using reparameterization trick and then does a forwards pass
287 |         :param x: tensor of examples [examples, samples, in_channels, H, W]
288 |         :return: tensor of features to next layer
289 |         """
290 |         if not self.pretraining:
291 |             # We transform our parameterisation in rho into sigma
292 |             weight_sigma = self._rho_to_sigma(self.weight_rhos)  # [out_channels, in_channels, H, W]
293 |             if self.use_bias:
294 |                 bias_sigma = self._rho_to_sigma(self.bias_rhos)  # [out_channels]
295 | 
296 |             # We deduce the number of variational training samples from the second dimension of the input data
297 |             train_samples = x.shape[1]
298 | 
299 |             # torch.Size() has base class tuple, so we add a singleton for the new size
300 |             # This gives weight_epsilon size (training_samples, out, in, *kernel_size)
301 |             weight_epsilon = Variable(self.noise_distribution((train_samples,) + self.weight_mus.size()))
302 |             self.weight = torch.addcmul(self.weight_mus, weight_sigma, weight_epsilon)
303 |             if self.use_bias:
304 |                 # bias_epsilon [training_samples, out_channels]
305 |                 bias_epsilon = Variable(self.noise_distribution((train_samples,) + self.bias_mus.size()))
306 |                 self.bias = torch.addcmul(self.bias_mus, bias_sigma, bias_epsilon)  # [samples, out_channels]
307 |         else:
308 |             self.weight = self.weight_mus.unsqueeze(0)
309 |             if self.use_bias:
310 |                 self.bias = self.bias_mus.unsqueeze(0)
311 |         # Add padding
312 |         if self.padding != 0:
313 |             x = torch.nn.functional.pad(x, [self.padding, self.padding, self.padding, self.padding])
314 |         # We unfold into our kernel areas
315 |         x = x.unfold(3, self.kernel_size[0], self.stride[0]) # Over W
316 |         x = x.unfold(4, self.kernel_size[1], self.stride[1])  # Over H giving [N, samples, in_channels, H_fields, W_fields, H_kernel, W_kernel]
317 |         # Then we multiply in our weights which are [samples, out_channels, in_samples, H_kernel, W_kernel]
318 |         # This gives [N, samples, out_channels, H, W]
319 |         x = torch.einsum('ijklmno,jpkno->ijplm',[x, self.weight])
320 |         x = x + self.bias.unsqueeze(0).unsqueeze(3).unsqueeze(4)
321 |         return x
322 | 
323 | 
324 | class SVIMaxPool2D(nn.Module):
325 |     """
326 |     Expects
327 |     :param x: [examples, samples, channels, H, W]
328 |     :param kernel_size:  [H, W]
329 |     :param stride: [H, W]
330 |     :param padding: Not implemented
331 |     :param dilation:
332 |     :return:
333 |     """
334 |     def __init__(self, kernel_size, stride=None, padding=0, dilation=1):
335 |         super(SVIMaxPool2D, self).__init__()
336 |         self.kernel_size = kernel_size
337 |         self.padding = padding
338 |         if stride == None:
339 |             self.stride = kernel_size
340 |         else:
341 |             self.stride = stride
342 |         if dilation != 1:
343 |             raise NotImplementedError
344 | 
345 |     def forward(self, x):
346 |         x = F.pad(x, (self.padding, self.padding, self.padding, self.padding))
347 |         x = x.unfold(3, self.kernel_size[0], self.stride[0])
348 |         x = x.unfold(4, self.kernel_size[1], self.stride[1]) #  Now this is [examples, samples, channels, pooled_H, pooled_W, size_H, size_W)
349 |         x = x.max(6)[0].max(5)[0]
350 |         return x
351 | 
352 |     def extra_repr(self):
353 |         s = ('pool_size={kernel_size}, stride={stride}, padding={padding}')
354 |         return s.format(**self.__dict__)
355 | 
356 | class SVIGlobalMaxPool2D(nn.Module):
357 |     """
358 |         Expects
359 |         :param x: [examples, samples, channels, H, W]
360 |         :return: [examples, samples, channels]
361 |         """
362 | 
363 |     def __init__(self):
364 |         super(SVIGlobalMaxPool2D, self).__init__()
365 | 
366 | 
367 |     def forward(self, x):
368 |         x = x.max(4)[0].max(3)[0]
369 |         return x
370 | 
371 | 
372 | class SVIGlobalMeanPool2D(nn.Module):
373 |     """
374 |         Expects
375 |         :param x: [examples, samples, channels, H, W]
376 |         :return: [examples, samples, channels]
377 |         """
378 | 
379 |     def __init__(self):
380 |         super(SVIGlobalMeanPool2D, self).__init__()
381 | 
382 |     def forward(self, x):
383 |         x = x.mean(4).mean(3)
384 |         return x
385 | 


--------------------------------------------------------------------------------
/activetesting/models/sk2torch.py:
--------------------------------------------------------------------------------
  1 | """Make PyTorch models work with SKLearn interface."""
  2 | import logging
  3 | from pathlib import Path
  4 | import copy
  5 | import hydra
  6 | from omegaconf import OmegaConf
  7 | 
  8 | import math
  9 | import numpy as np
 10 | import torch
 11 | import torch.nn.functional as F
 12 | from torchvision import transforms
 13 | from sklearn.model_selection import train_test_split
 14 | 
 15 | from .radial_bnn.radial_layers.loss import Elbo
 16 | from .skmodels import BaseModel
 17 | 
 18 | 
 19 | # ---- Interface between SKLearn and Pytorch ----
 20 | # Make Pytorch model behave as SKLearn model on the outside.
 21 | class TransformDataset(torch.utils.data.Dataset):
 22 |     def __init__(self, data, targets, chw, transform):
 23 |         self.data = data
 24 |         self.targets = targets
 25 |         self.transform = transform
 26 |         self.chw = chw
 27 | 
 28 |     def __getitem__(self, index):
 29 |         x = self.data[index]
 30 |         y = self.targets[index]
 31 | 
 32 |         if self.transform:
 33 |             x = x.reshape(self.chw).transpose(1, 2, 0)
 34 |             x = self.transform(x)
 35 |             x = x.reshape(-1)
 36 | 
 37 |         return x, y
 38 | 
 39 |     def __len__(self):
 40 |         return len(self.data)
 41 | 
 42 | 
 43 | class SK2TorchBNN(BaseModel):
 44 |     """Interface for Pytorch Models and SKlearn Methods."""
 45 |     def __init__(self, model, cfg):
 46 |         self.device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
 47 |         cfg = OmegaConf.merge(
 48 |             OmegaConf.structured(cfg),
 49 |             dict(task_type='classification',))
 50 | 
 51 |         self.cfg = cfg
 52 |         self.t_cfg = cfg['training_cfg']
 53 |         self.model = model.to(device=self.device).type(
 54 |             torch.float32)
 55 | 
 56 |         self.needs_reinit = True
 57 | 
 58 |     def predict(self, x, for_acquisition=True, mutual_info=False, *args,
 59 |                 **kwargs):
 60 | 
 61 |         n_samples = self.t_cfg.get('variational_samples', None)
 62 | 
 63 |         if len(x) > self.t_cfg['batch_size']:
 64 |             loader = self.make_loader([x], train=False)
 65 |         else:
 66 |             loader = [[torch.from_numpy(x)]]
 67 | 
 68 |         if for_acquisition:
 69 |             self.model.eval()
 70 | 
 71 |         preds = []
 72 |         with torch.no_grad():
 73 |             for (data, ) in loader:
 74 |                 data = data.to(self.device)
 75 |                 if not mutual_info:
 76 |                     pred = self.model(data, n_samples=n_samples)
 77 |                     # model outputs log probabilities, our code does not expect
 78 |                     # this the additional exp_logging hopefully does not
 79 |                     # introduce too much error
 80 |                     pred = torch.exp(pred)
 81 | 
 82 |                 else:
 83 |                     # N x Samples x Classes
 84 |                     out = self.model(
 85 |                         data, n_samples=n_samples, log_sum_exp=False)
 86 | 
 87 |                     mean_samples = torch.logsumexp(out, dim=1) - math.log(
 88 |                         n_samples)
 89 | 
 90 |                     entropy_average = -torch.sum(
 91 |                         mean_samples.exp() * mean_samples, dim=1)
 92 | 
 93 |                     average_entropy = -torch.sum(
 94 |                         out.exp() * out, dim=2).mean(1)
 95 | 
 96 |                     mi = entropy_average - average_entropy
 97 | 
 98 |                     pred = mi
 99 | 
100 |                 preds.append(pred)
101 | 
102 |         preds = torch.cat(preds, 0)
103 |         return preds.detach().cpu().numpy()
104 | 
105 |     def fit(self, x, y):
106 | 
107 |         p = self.cfg['skip_fit_debug']
108 | 
109 |         if self.cfg.get('skip_fit_debug_relative', False):
110 |             base = Path('.')
111 |         else:
112 |             base = Path(hydra.utils.get_original_cwd())
113 | 
114 |         if p and (loc := (base / p)).exists():
115 |             logging.info(f'Loading model from {p}.')
116 |             self.model.load_state_dict(
117 |                 torch.load(loc, map_location=self.device))
118 |         else:
119 |             if p and not (loc := (base / p)).exists():
120 |                 logging.info(f'Tried to load model, but {p} does not exist.')
121 | 
122 |             train_loader, val_loader = self.val_train_loaders(x, y)
123 |             # from activetesting.datasets import get_CIFAR10
124 |             # train_loader, val_loader = get_CIFAR10()
125 |             self.model = self.train_to_convergence(
126 |                 self.model, train_loader, val_loader)
127 | 
128 |         path = Path(self.cfg.get('save_path', 'model.pth'))
129 | 
130 |         if not path.exists():
131 |             path.parent.mkdir(parents=True, exist_ok=True)
132 |             torch.save(self.model.state_dict(), path)
133 | 
134 |     def val_train_loaders(self, x, y):
135 | 
136 |         # generate random splits for train and val
137 |         val_size = int(self.t_cfg['validation_set_size'])
138 |         if self.t_cfg.get('stratify_val', False):
139 |             strata = y
140 |         else:
141 |             strata = None
142 | 
143 |         x_train, x_val, y_train, y_val = train_test_split(
144 |             x, y, test_size=val_size, stratify=strata)
145 | 
146 |         train_loader = self.make_loader([x_train, y_train])
147 |         val_loader = self.make_loader([x_val, y_val], train=False)
148 | 
149 |         return train_loader, val_loader
150 | 
151 |     def make_loader(self, arrs, train=True):
152 | 
153 |         if train and (self.t_cfg.get('transforms', False) == 'cifar'):
154 |             transform = transforms.Compose([
155 |                 transforms.ToTensor(),
156 |                 transforms.RandomCrop(self.cfg.data_CHW[-1], padding=4),
157 |                 transforms.RandomHorizontalFlip(),
158 |                 ])
159 | 
160 |             dataset = TransformDataset(
161 |                 arrs[0], arrs[1], list(self.cfg.data_CHW), transform)
162 |         else:
163 |             arrs = [torch.from_numpy(arr) for arr in arrs]
164 |             dataset = torch.utils.data.TensorDataset(*arrs)
165 | 
166 |         bs = self.cfg.get('testing_cfg', dict()).get('batch_size', False)
167 |         if bs and not train:
168 |             batch_size = bs
169 | 
170 |         else:
171 |             batch_size = self.t_cfg['batch_size']
172 | 
173 |         data_loader = torch.utils.data.DataLoader(
174 |             dataset,
175 |             shuffle=train,
176 |             batch_size=self.t_cfg['batch_size'],
177 |             num_workers=self.t_cfg['num_workers'],
178 |             pin_memory=self.t_cfg['pin_memory'])
179 | 
180 |         return data_loader
181 | 
182 |     # def make_loader(self, arrs, train=True):
183 | 
184 |     #     if train and (self.t_cfg.get('transforms', False) == 'cifar'):
185 |     #         transform = transforms.Compose([
186 |     #             transforms.ToTensor(),
187 |     #             transforms.RandomCrop(32, padding=4),
188 |     #             transforms.RandomHorizontalFlip(),
189 |     #             ])
190 | 
191 |     #         dataset = TransformDataset(
192 |     #             arrs[0], arrs[1], list(self.cfg.data_CHW), transform)
193 |     #     else:
194 |     #         arrs = [torch.from_numpy(arr) for arr in arrs]
195 |     #         dataset = torch.utils.data.TensorDataset(*arrs)
196 | 
197 |     #     data_loader = torch.utils.data.DataLoader(
198 |     #         dataset,
199 |     #         shuffle=train,
200 |     #         batch_size=self.t_cfg['batch_size'],
201 |     #         num_workers=self.t_cfg['num_workers'],
202 |     #         pin_memory=self.t_cfg['pin_memory'])
203 | 
204 |     #     return data_loader
205 | 
206 |     def train_to_convergence(self, model, train_loader, val_loader):
207 |         logging.info(
208 |             f'Beginning training with {len(train_loader.dataset)} training '
209 |             f'points and {len(val_loader.dataset)} validation.'
210 |         )
211 |         m = self.t_cfg['max_epochs']
212 |         log_every = int(.02 * m) if m > 100 else 1
213 |         best = np.inf
214 |         best_model = model
215 |         patience = 0
216 |         optimizer = self.get_optimizer()
217 |         scheduler = self.get_scheduler(optimizer)
218 | 
219 |         for epoch in range(self.t_cfg['max_epochs']):
220 | 
221 |             self.train(model, train_loader, optimizer)
222 | 
223 |             val_nll, val_accuracy = self.evaluate(model, val_loader)
224 | 
225 |             if epoch % log_every == 0:
226 |                 logging.info(
227 |                     f'Epoch {epoch:0>3d} eval: Val nll: {val_nll:.4f}, '
228 |                     f'Val Accuracy: {val_accuracy}')
229 | 
230 |             if val_nll < best:
231 |                 best = val_nll
232 |                 best_model = copy.deepcopy(model)
233 |                 patience = 0
234 |             else:
235 |                 patience += 1
236 | 
237 |             if patience >= self.t_cfg['early_stopping_epochs']:
238 |                 logging.info(
239 |                     f'Patience reached - stopping training. '
240 |                     f'Best was {best}')
241 |                 break
242 | 
243 |             if scheduler is not None:
244 |                 scheduler.step()
245 | 
246 |         logging.info('Completed training for acquisition.')
247 | 
248 |         return best_model
249 | 
250 |     def train(self, model, train_loader, optimizer):
251 |         n_samples = self.t_cfg['variational_samples']
252 |         model.train()
253 | 
254 |         if self.t_cfg.get('model', False) == "radial_bnn":
255 |             loss_object = Elbo(binary=False, regression=False)
256 |             loss_object.set_model(model, train_loader.batch_size)
257 |             loss_object.set_num_batches(len(train_loader))
258 | 
259 |             def loss_helper(pred, target):
260 |                 nll_loss, kl_loss = loss_object.compute_loss(pred, target)
261 |                 return (nll_loss + kl_loss / 10).mean(dim=0)
262 | 
263 |             raw_loss = loss_helper
264 | 
265 |         else:
266 |             # raw_loss = F.cross_entropy
267 |             raw_loss = F.nll_loss
268 | 
269 |         for idx, (data, target) in enumerate(train_loader):
270 | 
271 |             data = data.to(self.device)
272 |             target = target.to(self.device)
273 | 
274 |             optimizer.zero_grad()
275 | 
276 |             prediction = model(data, n_samples, log_sum_exp=False)
277 | 
278 |             loss = raw_loss(prediction, target)
279 | 
280 |             loss.backward()
281 | 
282 |             optimizer.step()
283 | 
284 |     def evaluate(self, model, eval_loader):
285 |         self.model.train()
286 |         # We actually only want eval mode on when we're doing acquisition
287 |         # because of how consistent dropout works.
288 |         n_samples = self.t_cfg['variational_samples']
289 |         nll = correct = 0
290 | 
291 |         with torch.no_grad():
292 |             for data, target_N in eval_loader:
293 | 
294 |                 data = data.to(self.device)
295 |                 target_N = target_N.to(self.device)
296 | 
297 |                 prediction_N = model(data, n_samples=n_samples)
298 | 
299 |                 # if self.cfg.get('model', False) == "radial_bnn":
300 |                 #     raw_loss = F.nll_loss
301 |                 # else:
302 |                 #     raw_loss = F.cross_entropy
303 |                 raw_loss = F.nll_loss
304 | 
305 |                 raw_nll_N = raw_loss(
306 |                     prediction_N, target_N, reduction="none")
307 | 
308 |                 nll += torch.sum(raw_nll_N)
309 | 
310 |                 # get the index of the max log-probability
311 |                 class_prediction = prediction_N.max(1, keepdim=True)[1]
312 |                 correct += class_prediction.eq(
313 |                     target_N.view_as(class_prediction)).sum().item()
314 | 
315 |         nll /= len(eval_loader.dataset)
316 | 
317 |         percentage_correct = 100.0 * correct / len(eval_loader.dataset)
318 | 
319 |         return nll.item(), percentage_correct
320 | 
321 |     def get_optimizer(self):
322 |         c = self.t_cfg.get('optimizer', False)
323 | 
324 |         if not c:
325 |             optimizer = torch.optim.Adam(
326 |                     self.model.parameters(),
327 |                     lr=self.t_cfg['learning_rate'],
328 |                     weight_decay=self.t_cfg['weight_decay'],)
329 | 
330 |         elif c == 'cifar':
331 |             optimizer = torch.optim.SGD(
332 |                 self.model.parameters(),
333 |                 lr=self.t_cfg['learning_rate'],
334 |                 momentum=0.9, weight_decay=self.t_cfg['weight_decay'])
335 |         else:
336 |             raise ValueError
337 | 
338 |         return optimizer
339 | 
340 |     def get_scheduler(self, optimizer):
341 |         c = self.t_cfg.get('scheduler', False)
342 |         epochs = self.t_cfg['max_epochs']
343 | 
344 |         if not c:
345 |             scheduler = None
346 | 
347 |         elif c == 'cifar':
348 |             milestones = [int(epochs * 0.5), int(epochs * 0.75)]
349 |             scheduler = torch.optim.lr_scheduler.MultiStepLR(
350 |                 optimizer, milestones=milestones, gamma=0.1)
351 | 
352 |         elif c == 'cosine':
353 |             scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
354 |                 optimizer, T_max=epochs)
355 | 
356 |         elif c == 'devries':
357 |             # https://arxiv.org/abs/1708.04552v2
358 |             assert epochs == 200
359 |             milestones = [60, 120, 160]
360 |             scheduler = torch.optim.lr_scheduler.MultiStepLR(
361 |                 optimizer, milestones=milestones, gamma=0.2)
362 | 
363 |         return scheduler
364 | # ---- Make Radial BNN Sane ----
365 | # There is some unexpected behaviour in terms of the data shapes
366 | # expected as input and given as output from Radial BNN.
367 | # Make them behave more similar to any other PyTorch model.
368 | 
369 | 
370 | 
371 | # If I ever want to build sane MCDO, use this to modify forward
372 | # if model_arch == "consistent_mcdo":
373 | #     prediction = torch.logsumexp(
374 | #         model(data_N_C_H_W, variational_samples), dim=1
375 | #     ) - math.log(variational_samples)


--------------------------------------------------------------------------------
/activetesting/models/skmodels.py:
--------------------------------------------------------------------------------
  1 | """Models for active testing."""
  2 | 
  3 | import logging
  4 | from omegaconf import OmegaConf
  5 | import numpy as np
  6 | from scipy.stats import special_ortho_group
  7 | 
  8 | from activetesting.loss import RMSELoss, AccuracyLoss, CrossEntropyLoss
  9 | 
 10 | 
 11 | class BaseModel:
 12 |     """Base class for models."""
 13 |     def __init__(self, cfg, model):
 14 |         # Set task_type and global_std if not present.
 15 |         self.cfg = OmegaConf.merge(
 16 |                 OmegaConf.structured(cfg),
 17 |                 dict(task_type=cfg.get('task_type', 'regression'),))
 18 | 
 19 |         self.model = model
 20 | 
 21 |     def fit(self, x, y):
 22 |         raise NotImplementedError
 23 | 
 24 |     def predict(self, x, **kwargs):
 25 |         raise NotImplementedError
 26 | 
 27 |     def performance(self, x, y, task_type):
 28 |         pred = self.predict(x)
 29 | 
 30 |         if task_type == 'regression':
 31 |             logging.info(f'RMSE: {RMSELoss()(pred, y)}')
 32 |         elif task_type == 'classification':
 33 |             logging.info(f'Accuracy: {AccuracyLoss()(pred, y).mean()}%.')
 34 |             logging.info(f'CrossEntropy: {CrossEntropyLoss()(pred, y).mean()}.')
 35 |         else:
 36 |             raise ValueError
 37 | 
 38 | 
 39 | class SKLearnModel(BaseModel):
 40 |     """SKLearn derived models."""
 41 |     def __init__(self, cfg, model):
 42 |         super().__init__(cfg, model)
 43 | 
 44 |     def fit(self, x, y):
 45 |         if x.ndim == 1:
 46 |             # Sklearn expects x to be NxD
 47 |             x = x[..., np.newaxis]
 48 | 
 49 |         self.model = self.model.fit(x, y)
 50 | 
 51 |     def predict(self, x, idxs=None, *args, **kwargs):
 52 |         # Sklearn expects x to be NxD
 53 |         predict_proba = self.cfg['task_type'] == 'classification'
 54 | 
 55 |         return self.predict_sk(x, predict_proba=predict_proba, **kwargs)
 56 | 
 57 |     def predict_sk(self, x, predict_proba, **kwargs):
 58 | 
 59 |         if predict_proba:
 60 |             y = self.model.predict_proba(x, **kwargs)
 61 |         else:
 62 |             y = self.model.predict(x, **kwargs)
 63 | 
 64 |         return y
 65 | 
 66 | 
 67 | class LinearRegressionModel(SKLearnModel):
 68 |     """Simple linear regression."""
 69 |     def __init__(self, cfg):
 70 |         from sklearn.linear_model import LinearRegression
 71 |         model = LinearRegression()
 72 |         super().__init__(cfg, model)
 73 | 
 74 |     def predict(self, x, idxs=None, *args, **kwargs):
 75 |         return self.model.predict(x)
 76 | 
 77 | 
 78 | class GaussianProcessRegressor(SKLearnModel):
 79 |     """Gaussian Process regression."""
 80 |     def __init__(self, cfg):
 81 |         from sklearn.gaussian_process import GaussianProcessRegressor
 82 |         from sklearn.gaussian_process.kernels import Matern, WhiteKernel
 83 | 
 84 |         k = Matern(length_scale=cfg.get('length_scale', 1.))
 85 | 
 86 |         if σ := cfg.get('with_noise', False):
 87 |             k += WhiteKernel(noise_level=σ**2)
 88 | 
 89 |         model = GaussianProcessRegressor(kernel=k, optimizer=None)
 90 | 
 91 |         super().__init__(cfg, model)
 92 | 
 93 |     def sample_y(self, x, **kwargs):
 94 |         return self.model.sample_y(x, **kwargs)[:, 0]
 95 | 
 96 | 
 97 | class RandomForestClassifier(SKLearnModel):
 98 |     """Simple linear regression."""
 99 |     def __init__(self, cfg):
100 |         from sklearn.ensemble import RandomForestClassifier as SKForest
101 |         cfg = OmegaConf.merge(
102 |             OmegaConf.structured(cfg),
103 |             dict(task_type='classification',))
104 | 
105 |         sk_args = cfg.get('sk_args', dict())
106 |         model = SKForest(**sk_args)
107 | 
108 |         super().__init__(cfg, model)
109 | 
110 | 
111 | class RandomDirectionRandomForestClassifier(SKLearnModel):
112 |     """Simple linear regression."""
113 |     def __init__(self, cfg, speedup=True, dim=None):
114 |         from sklearn.ensemble import RandomForestClassifier as SKForest
115 |         cfg = OmegaConf.merge(
116 |             OmegaConf.structured(cfg),
117 |             dict(task_type='classification',))
118 | 
119 |         sk_args = cfg.get('sk_args', dict())
120 |         model = SKForest(**sk_args)
121 |         super().__init__(cfg, model)
122 | 
123 |         self.speedup = speedup
124 |         if self.speedup:
125 |             # Only sample from ortho group once
126 |             self.n_rots = 40
127 |             self._rotations = special_ortho_group.rvs(dim, size=self.n_rots)
128 | 
129 |     def fit(self, x, y):
130 |         self.set_rotation()
131 |         rotated = np.dot(x, self.rot)
132 |         return super().fit(rotated, y)
133 | 
134 |     def set_rotation(self):
135 |         if not self.speedup:
136 |             self.rot = special_ortho_group.rvs(x.shape[1])
137 |         else:
138 |             self.rot = self._rotations[np.random.randint(0, self.n_rots)]
139 | 
140 |     def predict(self, x, *args, **kwargs):
141 |         rotated = np.dot(x, self.rot)
142 |         return super().predict(rotated, *args, **kwargs)
143 | 
144 | 
145 | class SVMClassifier(SKLearnModel):
146 |     """Simple linear regression."""
147 |     def __init__(self, cfg):
148 |         from sklearn.svm import SVC
149 | 
150 |         cfg = OmegaConf.merge(
151 |             OmegaConf.structured(cfg),
152 |             dict(task_type='classification',))
153 | 
154 |         sk_args = cfg.get('sk_args', dict())
155 |         model = SVC(probability=True, **sk_args)
156 | 
157 |         super().__init__(cfg, model)
158 | 
159 | 
160 | class GPClassifier(SKLearnModel):
161 |     """Simple linear regression."""
162 |     def __init__(self, cfg):
163 |         from sklearn.gaussian_process import GaussianProcessClassifier
164 |         from sklearn.gaussian_process.kernels import Matern, WhiteKernel
165 | 
166 |         kernel = Matern(length_scale=1)
167 | 
168 |         if σ := cfg.get('with_noise', False):
169 |             kernel += WhiteKernel(noise_level=σ**2)
170 | 
171 |         model = GaussianProcessClassifier(
172 |             kernel=kernel, optimizer=cfg.get('optimizer', None))
173 | 
174 |         cfg = OmegaConf.merge(
175 |             OmegaConf.structured(cfg),
176 |             dict(task_type='classification',))
177 | 
178 |         super().__init__(cfg, model)
179 | 


--------------------------------------------------------------------------------
/activetesting/models/torchmodels.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | import torch
  4 | 
  5 | from .radial_bnn import bnn_models
  6 | from .sk2torch import SK2TorchBNN
  7 | from .cnn.models import DeepModel
  8 | from .skmodels import BaseModel
  9 | 
 10 | # ---- Final Pytorch Wrappers ----
 11 | # Import these models. From the outside, they can be used like Sklearn models.
 12 | 
 13 | 
 14 | def modify_bnn(model, data_CHW, *args, **kwargs):
 15 | 
 16 |     class Sanify(model):
 17 |         """Change default behaviour of Radial BNN.
 18 | 
 19 |         In particular, hide sampling behaviour and special input/output
 20 |         formatting, s.t. SaneRadialBNN behaves as a normal Pytorch model.
 21 |         """
 22 |         def __init__(self, *args, **kwargs):
 23 |             self.data_CHW = data_CHW
 24 |             super().__init__(*args, **kwargs)
 25 | 
 26 |         def forward(self, data, n_samples, log_sum_exp=True):
 27 |             data = self.radial_bnn_forward_reshape(data, n_samples)
 28 |             out = super().forward(data)
 29 |             if log_sum_exp:
 30 |                 out = torch.logsumexp(out, dim=1) - math.log(n_samples)
 31 |             return out
 32 | 
 33 |         def radial_bnn_forward_reshape(self, data_N_HW, n_samples):
 34 |             # expects empty channel dimension after batch dim
 35 |             data_N_C_HW = torch.unsqueeze(data_N_HW, 1)
 36 | 
 37 |             if self.data_CHW is None:
 38 |                 data_N_C_H_W = data_N_C_HW
 39 |             else:
 40 |                 # Radial BNN and RGB Data actually does not work yet
 41 |                 data_N_C_H_W = data_N_C_HW.reshape(
 42 |                     list(data_N_C_HW.shape[:-1]) + list(self.data_CHW[1:]))
 43 | 
 44 |             # assert len(data_N_C_H_W.shape) == 4
 45 |             data_N_V_C_H_W = torch.unsqueeze(data_N_C_H_W, 1)
 46 |             data_N_V_C_H_W = data_N_V_C_H_W.expand(
 47 |                 -1, n_samples, -1, -1, -1
 48 |             )
 49 |             return data_N_V_C_H_W
 50 | 
 51 |     return Sanify(*args, **kwargs)
 52 | 
 53 | 
 54 | class RadialBNN(SK2TorchBNN):
 55 |     def __init__(self, cfg):
 56 |         data_CHW = cfg.get('data_CHW', None)
 57 |         kwargs = dict(channels=cfg['channels'])
 58 |         model = modify_bnn(bnn_models.RadialBNN, data_CHW, **kwargs)
 59 |         self.has_mi = True
 60 |         super().__init__(model, cfg)
 61 | 
 62 | 
 63 | class TinyRadialBNN(SK2TorchBNN):
 64 |     def __init__(self, cfg):
 65 |         data_CHW = cfg.get('data_CHW', None)
 66 |         model = modify_bnn(bnn_models.TinyRadialBNN, data_CHW)
 67 |         super().__init__(model, cfg)
 68 |         self.has_mi = True
 69 | 
 70 | 
 71 | def modify_cnns(model, data_CHW, debug_mnist):
 72 | 
 73 |     class Sanify(model):
 74 |         """Change default behaviour of Deterministic CNNs.
 75 | 
 76 |         Make them ignore args, kwargs in forward pass.
 77 |         """
 78 |         def __init__(self, *args, **kwargs):
 79 |             self.data_CHW = list(data_CHW)
 80 |             self.debug_mnist = debug_mnist
 81 |             super().__init__(*args, **kwargs)
 82 | 
 83 |             # original model uses Crossentropy loss
 84 |             # we use NLL loss --> need to add logsoftmax layer
 85 |             self.log_softmax = torch.nn.LogSoftmax(dim=1)
 86 | 
 87 |         def forward(self, data, *args, **kwargs):
 88 |             N = data.shape[0]
 89 |             data = data.reshape([N]+self.data_CHW)
 90 |             if self.debug_mnist:
 91 |                 data = data.repeat(1, 3, 1, 1)
 92 | 
 93 |             out = super().forward(data)
 94 |             out = self.log_softmax(out)
 95 | 
 96 |             return out
 97 | 
 98 |     return Sanify
 99 | 
100 | 
101 | class ResNet18(SK2TorchBNN):
102 |     def __init__(self, cfg):
103 |         model = modify_cnns(DeepModel, cfg['data_CHW'], cfg['debug_mnist'])(
104 |             cfg['data_CHW'][-1], cfg['num_classes'], 'resnet18')
105 |         super().__init__(model, cfg)
106 | 
107 | 
108 | class WideResNet(SK2TorchBNN):
109 |     def __init__(self, cfg):
110 |         model = modify_cnns(DeepModel, cfg['data_CHW'], cfg['debug_mnist'])(
111 |             cfg['data_CHW'][-1], cfg['num_classes'], 'wideresnet')
112 |         super().__init__(model, cfg)
113 | 
114 | 
115 | class TorchEnsemble(BaseModel):
116 |     def __init__(self, cfg, TorchModel):
117 |         from omegaconf import OmegaConf
118 |         n_models = cfg['n_models']
119 |         self.models = []
120 |         for i in range(n_models):
121 |             # update model save path
122 |             if cfg.get('skip_fit_debug', False):
123 |                 cfg_i = OmegaConf.merge(
124 |                     OmegaConf.structured(cfg),
125 |                     dict(
126 |                         save_path=cfg.save_path.format(i),
127 |                         skip_fit_debug=cfg.skip_fit_debug.format(i),
128 |                     ),
129 |                     )
130 |             else:
131 |                 cfg_i = cfg
132 | 
133 |             model = TorchModel(cfg_i)
134 |             self.models.append(model)
135 | 
136 |         super().__init__(cfg, None)
137 | 
138 |     def predict(self, *args, **kwargs):
139 |         preds = []
140 |         for model in self.models:
141 |             pred = model.predict(*args, **kwargs)
142 |             preds.append(pred)
143 | 
144 |         preds = np.stack(preds, 0)
145 |         mean_preds = np.mean(preds, 0)
146 |         return mean_preds
147 | 
148 |     def fit(self, *args, **kwargs):
149 | 
150 |         for model in self.models:
151 |             model.fit(*args, **kwargs)
152 | 
153 | 
154 | class ResNet18Ensemble(TorchEnsemble):
155 |     def __init__(self, cfg):
156 |         super().__init__(cfg, ResNet18)
157 | 
158 | 
159 | class WideResNetEnsemble(TorchEnsemble):
160 |     def __init__(self, cfg):
161 |         super().__init__(cfg, WideResNet)
162 | 


--------------------------------------------------------------------------------
/activetesting/plotting/__init__.py:
--------------------------------------------------------------------------------
1 | from .paths import *
2 | from .utils import *
3 | 


--------------------------------------------------------------------------------
/activetesting/plotting/paths.py:
--------------------------------------------------------------------------------
  1 | 
  2 | def plus_base(base, end):
  3 |     return [base+e for e in end]
  4 | 
  5 | 
  6 | class ReproduciblePaths:
  7 |     """Store paths for experiment results."""
  8 |     base = 'outputs/final/'
  9 |     figure123 = [
 10 |         'SyntheticGPGP',
 11 |         'SyntheticQuadraticLinear',
 12 |         'SyntheticTwoMoonsRF']
 13 |     figure123 = plus_base(base, figure123)
 14 | 
 15 |     figure4 = [
 16 |         'SmallMNISTBNN',
 17 |         'SmallFMNISTResNet']
 18 | 
 19 |     figure4names = [
 20 |         'BNN MNIST',
 21 |         'ResNet-18 Fashion-MNIST']
 22 | 
 23 |     figure5 = base + 'LargeCIFAR100ResNet'
 24 |     figure5name = 'ResNet CIFAR100'
 25 | 
 26 |     figure6 = [
 27 |         'LargeFMNISTResNet',
 28 |         'LargeCIFAR10ResNet',
 29 |         'LargeCIFAR100WideResNet',
 30 |         'LargeCIFAR10ResNetAccuracy']
 31 |     figure6 = plus_base(base, figure6)
 32 |     figure6names = [
 33 |         'ResNet Fashion-MNIST',
 34 |         'ResNet CIFAR-10',
 35 |         'WideResNet CIFAR-100',
 36 |         'Resnet CIFAR-10 Accuracy']
 37 | 
 38 |     figure7 = base + 'LargeFMNISTBNN'
 39 | 
 40 |     old_figure4 = 'outputs/final/LargeMNISTBNN'
 41 | 
 42 | 
 43 | class LegacyPaths:
 44 |     """Legacy paths from before reproducible."""
 45 | 
 46 |     base = 'outputs/legacy/'
 47 |     figure123 = [
 48 |         '2020-12-31-12-00-30',
 49 |         '2020-12-30-19-18-12',
 50 |         '2021-01-06-20-27-51']
 51 |     figure123 = plus_base(base, figure123)
 52 | 
 53 |     figure4 = [
 54 |         [
 55 |             base+'2021-01-20-09-26-51',
 56 |             base+'2021-01-20-09-27-40',
 57 |             base+'2021-01-20-09-28-06',
 58 |             base+'2021-01-20-11-48-23',
 59 |         ],
 60 |         [
 61 |             base + '2021-01-20-11-40-35',
 62 |             base + '2021-01-24-14-57-58'
 63 |         ]
 64 |     ]
 65 |     figure4names = [
 66 |         'BNN MNIST',
 67 |         'ResNet-18 Fashion-MNIST']
 68 | 
 69 |     figure5 = base + '2021-01-20-16-33-33'
 70 |     figure5name = 'ResNet CIFAR100'
 71 | 
 72 |     figure6 = [
 73 |         [base + '2021-01-21-08-52-31'],
 74 |         [base + '2021-01-20-16-27-51'],
 75 |         [base + '2021-01-26-15-22-32'],
 76 |         [base + '2021-01-20-16-29-06']
 77 |     ]
 78 |     figure6names = [
 79 |         'ResNet Fashion-MNIST',
 80 |         'ResNet CIFAR-10',
 81 |         'WideResNet CIFAR-100',
 82 |         'Resnet CIFAR-10 Accuracy']
 83 | 
 84 |     figure7 = [
 85 |         '2021-01-20-10-12-35',
 86 |         '2021-01-20-10-17-31',
 87 |         '2021-01-22-08-41-17',
 88 |         '2021-01-22-08-43-13']
 89 |     figure7 = plus_base(base, figure7)
 90 | 
 91 |     old_figure4 = [
 92 |         '2021-01-20-09-53-03',
 93 |         '2021-01-20-10-10-36',
 94 |         '2021-01-22-07-32-21',
 95 |         '2021-01-22-07-33-23']
 96 |     old_figure4 = plus_base(base, old_figure4)
 97 | 
 98 |     base = 'outputs/legacy/'
 99 |     figureA1 = [
100 |         '2020-12-31-12-00-30',
101 |         '2020-12-30-22-34-23',
102 |         '2020-12-31-11-00-13',
103 |         '2020-12-30-19-18-12',
104 |         '2021-01-06-20-27-51']
105 | 
106 |     figureA1 = plus_base(base, figureA1)
107 | 
108 | 
109 | class OldLegacyPaths:
110 |     """OldLegacy paths from before reproducible."""
111 |     figure123 = [
112 |         'outputs/keep/GPExperiment/2020-12-31-12-00-30',
113 |         'outputs/keep/GPExperiment/2020-12-30-19-18-12',
114 |         'outputs/keep/TwoMoonsExperiment/2021-01-06-20-27-51']
115 | 
116 |     base = 'outputs/outputs-azure/outputs/keep/MNISTExperiment/'
117 |     figure4 = [
118 |         [
119 |             base+'2021-01-20-09-26-51',
120 |             base+'2021-01-20-09-27-40',
121 |             base+'2021-01-20-09-28-06',
122 |             base+'2021-01-20-11-48-23',
123 |         ],
124 |         [
125 |             'outputs/paper/2021-01-20-11-40-35',
126 |             'outputs/paper/2021-01-24-14-57-58'
127 |         ]
128 |     ]
129 | 
130 |     figure4names = [
131 |         'BNN MNIST',
132 |         'ResNet-18 Fashion-MNIST']
133 | 
134 |     figure5 = 'outputs/paper/2021-01-20-16-33-33'
135 |     figure5name = 'ResNet CIFAR100'
136 | 
137 |     figure6 = [
138 |         ['outputs/paper/2021-01-21-08-52-31'],
139 |         ['outputs/paper/2021-01-20-16-27-51'],
140 |         ['outputs/paper/2021-01-26-15-22-32'],
141 |         ['outputs/paper/2021-01-20-16-29-06']
142 |     ]
143 |     figure6names = [
144 |         'ResNet Fashion-MNIST',
145 |         'ResNet CIFAR-10',
146 |         'WideResNet CIFAR-100',
147 |         'Resnet CIFAR-10 Accuracy']
148 | 
149 |     base = 'outputs/outputs-azure/outputs/keep/MNISTExperiment/'
150 |     figure7 = [
151 |         '2021-01-20-10-12-35',
152 |         '2021-01-20-10-17-31',
153 |         '2021-01-22-08-41-17',
154 |         '2021-01-22-08-43-13']
155 |     figure7 = plus_base(base, figure7)
156 | 
157 |     base = 'outputs/outputs-azure/outputs/keep/MNISTExperiment/'
158 |     old_figure4 = [
159 |         '2021-01-20-09-53-03',
160 |         '2021-01-20-10-10-36',
161 |         '2021-01-22-07-32-21',
162 |         '2021-01-22-07-33-23']
163 |     old_figure4 = plus_base(base, old_figure4)
164 | 


--------------------------------------------------------------------------------
/activetesting/risk_estimators.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from activetesting.utils import maps
  4 | 
  5 | DEBUG_WEIGHTS = False
  6 | 
  7 | 
  8 | class RiskEstimator:
  9 |     def __init__(self, loss):
 10 |         self.loss = maps.loss[loss]()
 11 |         self.risks = np.array([[]])
 12 | 
 13 |     def return_and_save(self, loss):
 14 |         self.risks = np.append(self.risks, loss)
 15 |         return loss
 16 | 
 17 | 
 18 | class TrueRiskEstimator(RiskEstimator):
 19 |     """Used for performance evaluation."""
 20 |     def __init__(self, loss, dataset, model):
 21 |         super().__init__(loss)
 22 | 
 23 |         idxs = dataset.test_idxs
 24 |         y_true = dataset.y[idxs]
 25 |         y_pred = model.predict(dataset.x[idxs], idxs=idxs)
 26 |         self.true_loss_vals = self.loss(y_pred, y_true)
 27 |         self.true_loss = self.true_loss_vals.mean()
 28 | 
 29 |         self.true_loss_all_idxs = np.zeros(dataset.N)
 30 |         self.true_loss_all_idxs[idxs] = self.true_loss_vals
 31 |         # print('true loss debug', self.true_loss)
 32 | 
 33 |     def estimate(self, *args):
 34 |         return self.return_and_save(self.true_loss)
 35 | 
 36 | 
 37 | class TrueUnseenRiskEstimator(RiskEstimator):
 38 |     """Used for performance evaluation."""
 39 |     def __init__(self, loss, dataset, model):
 40 |         super().__init__(loss)
 41 | 
 42 |         # not compatible with lazy prediction
 43 |         idxs = dataset.test_unseen_idxs
 44 |         y_true = dataset.y[idxs]
 45 |         y_pred = model.predict(dataset.x[idxs], idxs=idxs)
 46 |         self.true_loss_vals = self.loss(y_pred, y_true)
 47 |         self.true_loss = self.true_loss_vals.mean()
 48 | 
 49 |         self.true_loss_all_idxs = np.zeros(dataset.N)
 50 |         self.true_loss_all_idxs[idxs] = self.true_loss_vals
 51 |         # print('true loss debug', self.true_loss)
 52 | 
 53 |     def estimate(self, *args):
 54 |         return self.return_and_save(self.true_loss)
 55 | 
 56 | 
 57 | class BiasedRiskEstimator(RiskEstimator):
 58 |     def __init__(self, loss, *args):
 59 |         super().__init__(loss)
 60 | 
 61 |     def estimate(self, predictions, observed, *args):
 62 |         l_i = self.loss(predictions, observed).mean()
 63 |         # print('debug', l_i)
 64 |         return self.return_and_save(l_i)
 65 | 
 66 | 
 67 | class ImportanceWeightedRiskEstimator(RiskEstimator):
 68 |     def __init__(self, loss, dataset, *args):
 69 |         super().__init__(loss)
 70 |         self.N = len(dataset.test_idxs)
 71 | 
 72 |     def estimate(self, predictions, observed, acq_weights):
 73 | 
 74 |         l_i = self.loss(predictions, observed)
 75 |         M = len(predictions)
 76 | 
 77 |         R = 1/M * (1/acq_weights * l_i).sum()
 78 | 
 79 |         return self.return_and_save(R)
 80 | 
 81 | 
 82 | class NaiveUnbiasedRiskEstimator(RiskEstimator):
 83 |     def __init__(self, loss, dataset, *args):
 84 |         super().__init__(loss)
 85 |         self.N = len(dataset.test_idxs)
 86 | 
 87 |     def estimate(self, predictions, observed, acq_weights):
 88 | 
 89 |         l_i = self.loss(predictions, observed)
 90 |         N = self.N
 91 |         M = len(predictions)
 92 |         m = np.arange(1, M+1)
 93 | 
 94 |         v = 1/(N * acq_weights) + (M-m) / N
 95 | 
 96 |         R = 1/M * (v * l_i).sum()
 97 | 
 98 |         return self.return_and_save(R)
 99 | 
100 | 
101 | class FancyUnbiasedRiskEstimator(RiskEstimator):
102 |     def __init__(self, loss, dataset, *args):
103 |         super().__init__(loss)
104 |         self.N = len(dataset.test_idxs)
105 | 
106 |     def estimate(self, predictions, observed, acq_weights):
107 | 
108 |         l_i = self.loss(predictions, observed)
109 |         N = self.N
110 |         M = len(predictions)
111 | 
112 |         if M < N:
113 |             m = np.arange(1, M+1)
114 |             v = (
115 |                 1
116 |                 + (N-M)/(N-m) * (
117 |                         1 / ((N-m+1) * acq_weights)
118 |                         - 1
119 |                         )
120 |                 )
121 |         else:
122 |             v = 1
123 | 
124 |         R = 1/M * (v * l_i).sum()
125 | 
126 |         if DEBUG_WEIGHTS:
127 |             if isinstance(v, int):
128 |                 v = [v]
129 |             with open('weights.csv', 'a') as f:
130 |                 data = str(list(v)).replace('[', '').replace(']', '')
131 |                 f.write(f'{len(v)}, {data}\n')
132 | 
133 |         return self.return_and_save(R)
134 | 
135 | 
136 | class FancyUnbiasedRiskEstimatorCut(RiskEstimator):
137 |     def __init__(self, loss, dataset, *args):
138 |         super().__init__(loss)
139 |         self.N = len(dataset.test_idxs)
140 | 
141 |     def estimate(self, predictions, observed, acq_weights):
142 | 
143 |         l_i = self.loss(predictions, observed)
144 |         N = self.N
145 |         M = len(predictions)
146 | 
147 |         if M < N:
148 |             m = np.arange(1, M+1)
149 |             v = (
150 |                 1
151 |                 + (N-M)/(N-m) * (
152 |                         1 / ((N-m+1) * acq_weights)
153 |                         - 1
154 |                         )
155 |                 )
156 | 
157 |             # no single weight should be more than 25 percent of all weights
158 |             v_sum = v.sum()
159 |             cut = 0.05
160 |             # select those weights and cut them
161 |             v[v > cut * v_sum] = cut * v_sum
162 |         else:
163 |             v = 1
164 | 
165 |         R = 1/M * (v * l_i).sum()
166 | 
167 |         if DEBUG_WEIGHTS:
168 |             if isinstance(v, int):
169 |                 v = [v]
170 |             with open('weights_cut25.csv', 'a') as f:
171 |                 data = str(list(v)).replace('[', '').replace(']', '')
172 |                 f.write(f'{len(v)}, {data}\n')
173 | 
174 |         return self.return_and_save(R)
175 | 
176 | 
177 | class FancyUnbiasedRiskEstimatorCut1(RiskEstimator):
178 |     def __init__(self, loss, dataset, *args):
179 |         super().__init__(loss)
180 |         self.N = len(dataset.test_idxs)
181 | 
182 |     def estimate(self, predictions, observed, acq_weights):
183 | 
184 |         l_i = self.loss(predictions, observed)
185 |         N = self.N
186 |         M = len(predictions)
187 | 
188 |         if M < N:
189 |             m = np.arange(1, M+1)
190 |             v = (
191 |                 1
192 |                 + (N-M)/(N-m) * (
193 |                         1 / ((N-m+1) * acq_weights)
194 |                         - 1
195 |                         )
196 |                 )
197 | 
198 |             # no single weight should be more than 10 percent of all weights
199 |             v_sum = v.sum()
200 |             cut = 0.1
201 |             # select those weights and cut them
202 |             v[v > cut * v_sum] = 0
203 |         else:
204 |             v = 1
205 | 
206 |         R = 1/M * (v * l_i).sum()
207 | 
208 |         if DEBUG_WEIGHTS:
209 |             if isinstance(v, int):
210 |                 v = [v]
211 |             with open('weights_cut10.csv', 'a') as f:
212 |                 data = str(list(v)).replace('[', '').replace(']', '')
213 |                 f.write(f'{len(v)}, {data}\n')
214 | 
215 |         return self.return_and_save(R)
216 | 
217 | 
218 | class FancyUnbiasedRiskEstimatorCut2(RiskEstimator):
219 |     def __init__(self, loss, dataset, *args):
220 |         super().__init__(loss)
221 |         self.N = len(dataset.test_idxs)
222 | 
223 |     def estimate(self, predictions, observed, acq_weights):
224 | 
225 |         l_i = self.loss(predictions, observed)
226 |         N = self.N
227 |         M = len(predictions)
228 | 
229 |         if M < N:
230 |             m = np.arange(1, M+1)
231 |             v = (
232 |                 1
233 |                 + (N-M)/(N-m) * (
234 |                         1 / ((N-m+1) * acq_weights)
235 |                         - 1
236 |                         )
237 |                 )
238 | 
239 |             # no single weight should be more than 10 percent of all weights
240 |             v_sum = v.sum()
241 |             cut = 0.5
242 |             # select those weights and cut them
243 |             v[v > cut * v_sum] = 0
244 |         else:
245 |             v = 1
246 | 
247 |         R = 1/M * (v * l_i).sum()
248 | 
249 |         if DEBUG_WEIGHTS:
250 |             if isinstance(v, int):
251 |                 v = [v]
252 |             with open('weights_cut40.csv', 'a') as f:
253 |                 data = str(list(v)).replace('[', '').replace(']', '')
254 |                 f.write(f'{len(v)}, {data}\n')
255 | 
256 |         return self.return_and_save(R)
257 | 
258 | 
259 | class FancyUnbiasedRiskEstimatorCut3(RiskEstimator):
260 |     def __init__(self, loss, dataset, *args):
261 |         super().__init__(loss)
262 |         self.N = len(dataset.test_idxs)
263 | 
264 |     def estimate(self, predictions, observed, acq_weights):
265 | 
266 |         l_i = self.loss(predictions, observed)
267 |         N = self.N
268 |         M = len(predictions)
269 | 
270 |         if M < N:
271 |             m = np.arange(1, M+1)
272 |             v = (
273 |                 1
274 |                 + (N-M)/(N-m) * (
275 |                         1 / ((N-m+1) * acq_weights)
276 |                         - 1
277 |                         )
278 |                 )
279 | 
280 |             # no single weight should be more than 10 percent of all weights
281 |             v_sum = v.sum()
282 |             cut = 0.3
283 |             # select those weights and cut them
284 |             v[v > cut * v_sum] = 0
285 |         else:
286 |             v = 1
287 | 
288 |         R = 1/M * (v * l_i).sum()
289 | 
290 |         if DEBUG_WEIGHTS:
291 |             if isinstance(v, int):
292 |                 v = [v]
293 |             with open('weights_cut30.csv', 'a') as f:
294 |                 data = str(list(v)).replace('[', '').replace(']', '')
295 |                 f.write(f'{len(v)}, {data}\n')
296 | 
297 |         return self.return_and_save(R)
298 | 


--------------------------------------------------------------------------------
/activetesting/utils/maps.py:
--------------------------------------------------------------------------------
  1 | """Map strings to classes."""
  2 | from activetesting.models import (
  3 |     LinearRegressionModel, GaussianProcessRegressor, RandomForestClassifier,
  4 |     SVMClassifier, GPClassifier, RadialBNN, TinyRadialBNN, ResNet18,
  5 |     WideResNet, ResNet18Ensemble, WideResNetEnsemble)
  6 | from activetesting.datasets import (
  7 |     QuadraticDatasetForLinReg, SinusoidalDatasetForLinReg,
  8 |     GPDatasetForGPReg, MNISTDataset, TwoMoonsDataset, FashionMNISTDataset,
  9 |     Cifar10Dataset, Cifar100Dataset)
 10 | from activetesting.acquisition import (
 11 |     RandomAcquisition, TrueLossAcquisition, DistanceBasedAcquisition,
 12 |     GPAcquisitionUncertainty,
 13 |     GPSurrogateAcquisitionLogLik, GPSurrogateAcquisitionMSE,
 14 |     ClassifierAcquisitionEntropy,
 15 |     RandomForestClassifierSurrogateAcquisitionEntropy,
 16 |     SVMClassifierSurrogateAcquisitionEntropy,
 17 |     GPClassifierSurrogateAcquisitionEntropy,
 18 |     RandomRandomForestClassifierSurrogateAcquisitionEntropy,
 19 |     GPSurrogateAcquisitionMSEDoublyUncertain,
 20 |     SelfSurrogateAcquisitionEntropy,
 21 |     BNNClassifierAcquisitionMI,
 22 |     AnySurrogateAcquisitionEntropy,
 23 |     ClassifierAcquisitionAccuracy,
 24 |     SelfSurrogateAcquisitionAccuracy,
 25 |     AnySurrogateAcquisitionAccuracy
 26 |     )
 27 | from activetesting.loss import (
 28 |     SELoss, MSELoss, RMSELoss, CrossEntropyLoss, AccuracyLoss)
 29 | 
 30 | from activetesting.risk_estimators import (
 31 |     BiasedRiskEstimator, NaiveUnbiasedRiskEstimator,
 32 |     FancyUnbiasedRiskEstimator, TrueRiskEstimator,
 33 |     ImportanceWeightedRiskEstimator, TrueUnseenRiskEstimator,
 34 |     FancyUnbiasedRiskEstimatorCut, FancyUnbiasedRiskEstimatorCut1,
 35 |     FancyUnbiasedRiskEstimatorCut2,
 36 |     FancyUnbiasedRiskEstimatorCut3
 37 |     )
 38 | 
 39 | model = dict(
 40 |     LinearRegressionModel=LinearRegressionModel,
 41 |     GaussianProcessRegressor=GaussianProcessRegressor,
 42 |     RandomForestClassifier=RandomForestClassifier,
 43 |     SVMClassifier=SVMClassifier,
 44 |     GPClassifier=GPClassifier,
 45 |     RadialBNN=RadialBNN,
 46 |     TinyRadialBNN=TinyRadialBNN,
 47 |     ResNet18=ResNet18,
 48 |     WideResNet=WideResNet,
 49 |     ResNet18Ensemble=ResNet18Ensemble,
 50 |     WideResNetEnsemble=WideResNetEnsemble,
 51 | )
 52 | 
 53 | dataset = dict(
 54 |     QuadraticDatasetForLinReg=QuadraticDatasetForLinReg,
 55 |     SinusoidalDatasetForLinReg=SinusoidalDatasetForLinReg,
 56 |     GPDatasetForGPReg=GPDatasetForGPReg,
 57 |     MNISTDataset=MNISTDataset,
 58 |     TwoMoonsDataset=TwoMoonsDataset,
 59 |     FashionMNISTDataset=FashionMNISTDataset,
 60 |     Cifar10Dataset=Cifar10Dataset,
 61 |     Cifar100Dataset=Cifar100Dataset,
 62 | )
 63 | 
 64 | acquisition = dict(
 65 |     RandomAcquisition=RandomAcquisition,
 66 |     TrueLossAcquisition=TrueLossAcquisition,
 67 |     DistanceBasedAcquisition=DistanceBasedAcquisition,
 68 |     GPAcquisitionUncertainty=GPAcquisitionUncertainty,
 69 |     GPSurrogateAcquisitionLogLik=GPSurrogateAcquisitionLogLik,
 70 |     GPSurrogateAcquisitionMSE=GPSurrogateAcquisitionMSE,
 71 |     ClassifierAcquisitionEntropy=ClassifierAcquisitionEntropy,
 72 |     RandomForestClassifierSurrogateAcquisitionEntropy=(
 73 |         RandomForestClassifierSurrogateAcquisitionEntropy),
 74 |     SVMClassifierSurrogateAcquisitionEntropy=(
 75 |         SVMClassifierSurrogateAcquisitionEntropy),
 76 |     GPClassifierSurrogateAcquisitionEntropy=(
 77 |         GPClassifierSurrogateAcquisitionEntropy),
 78 |     RandomRandomForestClassifierSurrogateAcquisitionEntropy=(
 79 |         RandomRandomForestClassifierSurrogateAcquisitionEntropy),
 80 |     GPSurrogateAcquisitionMSEDoublyUncertain=(
 81 |         GPSurrogateAcquisitionMSEDoublyUncertain),
 82 |     SelfSurrogateAcquisitionEntropy=SelfSurrogateAcquisitionEntropy,
 83 |     BNNClassifierAcquisitionMI=BNNClassifierAcquisitionMI,
 84 |     AnySurrogateAcquisitionEntropy=AnySurrogateAcquisitionEntropy,
 85 |     ClassifierAcquisitionAccuracy=ClassifierAcquisitionAccuracy,
 86 |     SelfSurrogateAcquisitionAccuracy=SelfSurrogateAcquisitionAccuracy,
 87 |     AnySurrogateAcquisitionAccuracy=AnySurrogateAcquisitionAccuracy,
 88 | )
 89 | 
 90 | loss = dict(
 91 |     SELoss=SELoss,
 92 |     MSELoss=MSELoss,
 93 |     RMSELoss=RMSELoss,
 94 |     CrossEntropyLoss=CrossEntropyLoss,
 95 |     AccuracyLoss=AccuracyLoss,
 96 | )
 97 | 
 98 | risk_estimator = dict(
 99 |     TrueRiskEstimator=TrueRiskEstimator,
100 |     BiasedRiskEstimator=BiasedRiskEstimator,
101 |     NaiveUnbiasedRiskEstimator=NaiveUnbiasedRiskEstimator,
102 |     FancyUnbiasedRiskEstimator=FancyUnbiasedRiskEstimator,
103 |     ImportanceWeightedRiskEstimator=ImportanceWeightedRiskEstimator,
104 |     TrueUnseenRiskEstimator=TrueUnseenRiskEstimator,
105 |     FancyUnbiasedRiskEstimatorCut=FancyUnbiasedRiskEstimatorCut,
106 |     FancyUnbiasedRiskEstimatorCut1=FancyUnbiasedRiskEstimatorCut1,
107 |     FancyUnbiasedRiskEstimatorCut2=FancyUnbiasedRiskEstimatorCut2,
108 |     FancyUnbiasedRiskEstimatorCut3=FancyUnbiasedRiskEstimatorCut3
109 | )
110 | 


--------------------------------------------------------------------------------
/conf/config.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - hydra: default
 3 | 
 4 | hoover:
 5 |   save_data: True
 6 | 
 7 | dataset:
 8 |   test_proportion: 0.5
 9 |   n_points: 100
10 |   standardize: False
11 | 
12 | experiment: 
13 |   n_runs: 5000
14 |   random_seed: -1
15 |   debug: False
16 |   save_data_until: 10
17 |   loss: SELoss
18 |   log_every: 100
19 |   save_every: 1000
20 | 
21 | risk_estimators:
22 |     - TrueRiskEstimator
23 |     - BiasedRiskEstimator
24 |     - FancyUnbiasedRiskEstimator
25 |     # - NaiveUnbiasedRiskEstimator
26 |     # - ImportanceWeightedRiskEstimator
27 | 
28 | acquisition_functions:
29 |   - TrueLossAcquisition:
30 |   - RandomAcquisition:
31 |   - DistanceBasedAcquisition:
32 | 
33 | acquisition:
34 |   sample: True
35 |   animate: True
36 |   animate_until: 10
37 |   lazy_save: True
38 |   uniform_clip: True
39 |   uniform_clip_val: 0.5
40 | 


--------------------------------------------------------------------------------
/conf/hydra/default.yaml:
--------------------------------------------------------------------------------
1 | # @package _group_
2 | run:
3 |   dir: ./outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}


--------------------------------------------------------------------------------
/conf/paper/LargeCIFAR100ResNet.yaml:
--------------------------------------------------------------------------------
  1 | # @package _global_
  2 | hydra:
  3 |   run:
  4 |     dir: ./outputs/final/LargeCIFAR100ResNet
  5 | 
  6 | hoover:
  7 |   save_data: False
  8 | 
  9 | experiment:
 10 |   n_runs: 1000
 11 |   loss: CrossEntropyLoss
 12 |   save_every: 10
 13 |   log_every: 1
 14 |   abort_test_after: 1000
 15 | 
 16 | dataset:
 17 |   n_points: 60000
 18 |   test_proportion: 0.0166666666
 19 |   name: Cifar100Dataset
 20 |   standardize: True  # we use torchvision transforms for cifar10
 21 |   # standardize: False  # we use torchvision transforms for cifar10
 22 |   respect_train_test: True  # (subsample from original test set)
 23 | 
 24 | acquisition:
 25 |   lazy_save_schedule: [0, 100, 300, 500, 700]
 26 |   uniform_clip: False
 27 | 
 28 | model:
 29 |   keep_constant: True   # no retrain in main, also disable fit method after single use!
 30 |   name: ResNet18
 31 |   # name: WideResNet
 32 |   efficient: True
 33 |   skip_fit_debug: False
 34 |   data_CHW: [3, 32, 32]
 35 |   num_classes: 100
 36 |   debug_mnist: False
 37 |   training_cfg:
 38 |     validation_set_size: 2000  # probably often change this
 39 |     stratify_val: True
 40 |     # max_epochs: 160            # set
 41 |     max_epochs: 30
 42 |     learning_rate: 0.1        # set
 43 |     batch_size: 128
 44 |     num_workers: 4
 45 |     pin_memory: True
 46 |     early_stopping_epochs: 5
 47 |     weight_decay: 5e-4
 48 |     optimizer: cifar
 49 |     scheduler: cosine
 50 |     # scheduler: cifar
 51 |     transforms: cifar
 52 |     testing_cfg:
 53 |       batch_size: 1000
 54 | 
 55 | acquisition_functions:
 56 |     - TrueLossAcquisition:
 57 |     - RandomAcquisition:
 58 |     - ClassifierAcquisitionEntropy:
 59 |     # - SelfSurrogateAcquisitionEntropy: LazySurr
 60 |     - AnySurrogateAcquisitionEntropy: LazySurrEnsemble
 61 | 
 62 | acquisition_configs:
 63 |   LazySurr:
 64 |     keep_constant: True
 65 |     name: ResNet18
 66 |     # name: WideResNet
 67 |     efficient: True
 68 |     save_path: single_aux/model.pth
 69 |     skip_fit_debug: single_aux/model.pth  # load instead of retraining
 70 |     skip_fit_debug_relative: True
 71 |     data_CHW: [3, 32, 32]
 72 |     num_classes: 100
 73 |     debug_mnist: False
 74 |     lazy: True
 75 |     lazy_schedule: []  # never retrain
 76 |     training_cfg:
 77 |       validation_set_size: 2000  # probably often change this
 78 |       stratify_val: True
 79 |       # max_epochs: 160            # set
 80 |       max_epochs: 30
 81 |       learning_rate: 0.1        # set
 82 |       batch_size: 128
 83 |       num_workers: 4
 84 |       pin_memory: True
 85 |       early_stopping_epochs: 5
 86 |       weight_decay: 5e-4
 87 |       optimizer: cifar
 88 |       scheduler: cosine
 89 |       # scheduler: cifar
 90 |       transforms: cifar
 91 |     testing_cfg:
 92 |       batch_size: 1000
 93 |   LazySurrEnsemble:
 94 |     keep_constant: True
 95 |     name: ResNet18Ensemble
 96 |     # name: WideResNet
 97 |     n_models: 15
 98 |     # name: WideResNet
 99 |     efficient: True
100 |     save_path: ensembles/model_{}.pth
101 |     skip_fit_debug: ensembles/model_{}.pth  # load instead of retraining
102 |     skip_fit_debug_relative: True
103 |     data_CHW: [3, 32, 32]
104 |     num_classes: 100
105 |     debug_mnist: False
106 |     lazy: True
107 |     lazy_schedule: []
108 |     training_cfg:
109 |       validation_set_size: 2000  # probably often change this
110 |       stratify_val: True
111 |       # max_epochs: 160            # set
112 |       max_epochs: 30
113 |       learning_rate: 0.1        # set
114 |       batch_size: 128
115 |       num_workers: 4
116 |       pin_memory: True
117 |       early_stopping_epochs: 5
118 |       weight_decay: 5e-4
119 |       optimizer: cifar
120 |       scheduler: cosine
121 |       # scheduler: cifar
122 |       transforms: cifar
123 |     testing_cfg:
124 |       batch_size: 1000


--------------------------------------------------------------------------------
/conf/paper/LargeCIFAR100WideResNet.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | hydra:
 3 |   run:
 4 |     dir: ./outputs/final/LargeCIFAR100WideResNet
 5 | 
 6 | hoover:
 7 |   save_data: False
 8 | 
 9 | experiment:
10 |   n_runs: 1000
11 |   loss: CrossEntropyLoss
12 |   save_every: 10
13 |   log_every: 1
14 |   abort_test_after: 1000
15 | 
16 | dataset:
17 |   n_points: 60000
18 |   test_proportion: 0.0166666666
19 |   name: Cifar100Dataset
20 |   standardize: True  # we use torchvision transforms for cifar10
21 |   # standardize: False  # we use torchvision transforms for cifar10
22 |   respect_train_test: True  # (subsample from original test set)
23 | 
24 | acquisition:
25 |   lazy_save_schedule: [0, 100, 300, 500, 700]
26 |   uniform_clip: False
27 | 
28 | model:
29 |   keep_constant: True   # no retrain in main, also disable fit method after single use!
30 |   name: WideResNet
31 |   efficient: True
32 |   skip_fit_debug: False
33 |   data_CHW: [3, 32, 32]
34 |   num_classes: 100
35 |   debug_mnist: False
36 |   training_cfg:
37 |     validation_set_size: 2000  # probably often change this
38 |     stratify_val: True
39 |     max_epochs: 200
40 |     learning_rate: 0.1        # set
41 |     batch_size: 128
42 |     num_workers: 4
43 |     pin_memory: True
44 |     early_stopping_epochs: 200
45 |     weight_decay: 5e-4
46 |     optimizer: cifar
47 |     scheduler: devries
48 |     # scheduler: cifar
49 |     transforms: cifar
50 |     testing_cfg:
51 |       batch_size: 1000
52 | 
53 | acquisition_functions:
54 |     - TrueLossAcquisition:
55 |     - RandomAcquisition:
56 |     # - ClassifierAcquisitionEntropy:
57 |     # - SelfSurrogateAcquisitionEntropy: LazySurr
58 |     - AnySurrogateAcquisitionEntropy: LazySurrEnsemble
59 | 
60 | # TODO ACTIVATE ENSEMBLE, MAKE ALL EPOCHS THE SAME
61 | acquisition_configs:
62 |   LazySurrEnsemble:
63 |     keep_constant: True
64 |     name: WideResNetEnsemble
65 |     # name: WideResNet
66 |     n_models: 10
67 |     # name: WideResNet
68 |     efficient: True
69 |     save_path: ensembles/model_{}.pth
70 |     skip_fit_debug: ensembles/model_{}.pth  # load instead of retraining
71 |     skip_fit_debug_relative: True
72 |     data_CHW: [3, 32, 32]
73 |     num_classes: 100
74 |     debug_mnist: False
75 |     lazy: True
76 |     lazy_schedule: []
77 |     training_cfg:
78 |       validation_set_size: 2000  # probably often change this
79 |       stratify_val: True
80 |       max_epochs: 200
81 |       learning_rate: 0.1        # set
82 |       batch_size: 128
83 |       num_workers: 4
84 |       pin_memory: True
85 |       early_stopping_epochs: 200
86 |       weight_decay: 5e-4
87 |       optimizer: cifar
88 |       scheduler: devries
89 |       # scheduler: cifar
90 |       transforms: cifar
91 |     testing_cfg:
92 |       batch_size: 1000


--------------------------------------------------------------------------------
/conf/paper/LargeCIFAR10ResNet.yaml:
--------------------------------------------------------------------------------
  1 | # @package _global_
  2 | hydra:
  3 |   run:
  4 |     dir: ./outputs/final/LargeCIFAR10ResNet
  5 | 
  6 | hoover:
  7 |   save_data: False
  8 | 
  9 | experiment:
 10 |   n_runs: 1000
 11 |   loss: CrossEntropyLoss
 12 |   save_every: 5
 13 |   log_every: 1
 14 |   abort_test_after: 1000
 15 | 
 16 | dataset:
 17 |   n_points: 60000
 18 |   test_proportion: 0.0166666666
 19 |   name: Cifar10Dataset
 20 |   standardize: True
 21 |   stratify: True
 22 |   respect_train_test: True  # (subsample from original test set)
 23 | 
 24 | acquisition:
 25 |   lazy_save_schedule: [0, 100, 300, 500, 700]
 26 |   uniform_clip: False
 27 | 
 28 | 
 29 | model:
 30 |   keep_constant: True   # no retrain in main, also disable fit method after single use!
 31 |   name: ResNet18
 32 |   # name: WideResNet
 33 |   efficient: True
 34 |   skip_fit_debug: False
 35 |   data_CHW: [3, 32, 32]
 36 |   num_classes: 10
 37 |   debug_mnist: False
 38 |   training_cfg:
 39 |     validation_set_size: 2000  # probably often change this
 40 |     stratify_val: True
 41 |     # max_epochs: 160            # set
 42 |     max_epochs: 30
 43 |     learning_rate: 0.1        # set
 44 |     batch_size: 128
 45 |     num_workers: 4
 46 |     pin_memory: True
 47 |     early_stopping_epochs: 5
 48 |     weight_decay: 5e-4
 49 |     optimizer: cifar
 50 |     scheduler: cosine
 51 |     # scheduler: cifar
 52 |     transforms: cifar
 53 |     testing_cfg:
 54 |       batch_size: 1000
 55 | 
 56 | acquisition_functions:
 57 |     - TrueLossAcquisition:
 58 |     - RandomAcquisition:
 59 |     # - ClassifierAcquisitionEntropy:
 60 |     # - SelfSurrogateAcquisitionEntropy: LazySurr
 61 |     - AnySurrogateAcquisitionEntropy: LazySurrEnsemble
 62 | 
 63 | acquisition_configs:
 64 |   LazySurr:
 65 |     keep_constant: True
 66 |     name: ResNet18
 67 |     # name: WideResNet
 68 |     efficient: True
 69 |     save_path: single_aux/model.pth
 70 |     skip_fit_debug: single_aux/model.pth  # load instead of retraining
 71 |     skip_fit_debug_relative: True
 72 |     data_CHW: [3, 32, 32]
 73 |     num_classes: 10
 74 |     debug_mnist: False
 75 |     lazy: True
 76 |     lazy_schedule: []
 77 |     training_cfg:
 78 |       validation_set_size: 2000  # probably often change this
 79 |       stratify_val: True
 80 |       # max_epochs: 160            # set
 81 |       max_epochs: 30
 82 |       learning_rate: 0.1        # set
 83 |       batch_size: 128
 84 |       num_workers: 4
 85 |       pin_memory: True
 86 |       early_stopping_epochs: 5
 87 |       weight_decay: 5e-4
 88 |       optimizer: cifar
 89 |       scheduler: cosine
 90 |       # scheduler: cifar
 91 |       transforms: cifar
 92 |     testing_cfg:
 93 |       batch_size: 1000
 94 |   LazySurrEnsemble:
 95 |     keep_constant: True
 96 |     name: ResNet18Ensemble
 97 |     # name: WideResNet
 98 |     n_models: 5
 99 |     # name: WideResNet
100 |     efficient: True
101 |     save_path: ensembles/model_{}.pth
102 |     skip_fit_debug: ensembles/model_{}.pth  # load instead of retraining
103 |     skip_fit_debug_relative: True
104 |     data_CHW: [3, 32, 32]
105 |     num_classes: 10
106 |     debug_mnist: False
107 |     lazy: True
108 |     lazy_schedule: []
109 |     training_cfg:
110 |       validation_set_size: 2000  # probably often change this
111 |       stratify_val: True
112 |       # max_epochs: 160            # set
113 |       max_epochs: 30
114 |       learning_rate: 0.1        # set
115 |       batch_size: 128
116 |       num_workers: 4
117 |       pin_memory: True
118 |       early_stopping_epochs: 5
119 |       weight_decay: 5e-4
120 |       optimizer: cifar
121 |       scheduler: cosine
122 |       # scheduler: cifar
123 |       transforms: cifar
124 |     testing_cfg:
125 |       batch_size: 1000


--------------------------------------------------------------------------------
/conf/paper/LargeCIFAR10ResNetAccuracy.yaml:
--------------------------------------------------------------------------------
  1 | # @package _global_
  2 | hydra:
  3 |   run:
  4 |     dir: ./outputs/final/LargeCIFAR10ResNetAccuracy
  5 | 
  6 | hoover:
  7 |   save_data: False
  8 | 
  9 | experiment:
 10 |   n_runs: 3000
 11 |   loss: AccuracyLoss
 12 |   save_every: 10
 13 |   log_every: 1
 14 |   abort_test_after: 1000
 15 | 
 16 | dataset:
 17 |   n_points: 60000
 18 |   test_proportion: 0.0166666666
 19 |   name: Cifar10Dataset
 20 |   standardize: True  # we use torchvision transforms for cifar10
 21 |   # standardize: False  # we use torchvision transforms for cifar10
 22 |   respect_train_test: True  # (subsample from original test set)
 23 | 
 24 | acquisition:
 25 |   lazy_save_schedule: [0, 100, 300, 500, 700]
 26 |   uniform_clip: False
 27 | 
 28 | model:
 29 |   keep_constant: True   # no retrain in main, also disable fit method after single use!
 30 |   name: ResNet18
 31 |   # name: WideResNet
 32 |   efficient: True
 33 |   skip_fit_debug: False
 34 |   data_CHW: [3, 32, 32]
 35 |   num_classes: 10
 36 |   debug_mnist: False
 37 |   training_cfg:
 38 |     validation_set_size: 2000  # probably often change this
 39 |     stratify_val: True
 40 |     # max_epochs: 160            # set
 41 |     max_epochs: 30
 42 |     learning_rate: 0.1        # set
 43 |     batch_size: 128
 44 |     num_workers: 4
 45 |     pin_memory: True
 46 |     early_stopping_epochs: 5
 47 |     weight_decay: 5e-4
 48 |     optimizer: cifar
 49 |     scheduler: cosine
 50 |     # scheduler: cifar
 51 |     transforms: cifar
 52 |     testing_cfg:
 53 |       batch_size: 1000
 54 | 
 55 | acquisition_functions:
 56 |     - TrueLossAcquisition:
 57 |     - RandomAcquisition:
 58 |     - ClassifierAcquisitionAccuracy:
 59 |     - SelfSurrogateAcquisitionAccuracy: LazySurr
 60 |     - AnySurrogateAcquisitionAccuracy: LazySurrEnsemble
 61 |     - ClassifierAcquisitionEntropy:
 62 |     - SelfSurrogateAcquisitionEntropy: LazySurr
 63 |     - AnySurrogateAcquisitionEntropy: LazySurrEnsemble
 64 | 
 65 | # TODO ACTIVATE ENSEMBLE, MAKE ALL EPOCHS THE SAME
 66 | 
 67 | acquisition_configs:
 68 |   LazySurr:
 69 |     keep_constant: True
 70 |     name: ResNet18
 71 |     # name: WideResNet
 72 |     efficient: True
 73 |     save_path: single_aux/model.pth
 74 |     skip_fit_debug: single_aux/model.pth  # load instead of retraining
 75 |     skip_fit_debug_relative: True
 76 |     data_CHW: [3, 32, 32]
 77 |     num_classes: 10
 78 |     debug_mnist: False
 79 |     lazy: True
 80 |     lazy_schedule: []
 81 |     training_cfg:
 82 |       validation_set_size: 2000  # probably often change this
 83 |       stratify_val: True
 84 |       # max_epochs: 160            # set
 85 |       max_epochs: 30
 86 |       learning_rate: 0.1        # set
 87 |       batch_size: 128
 88 |       num_workers: 4
 89 |       pin_memory: True
 90 |       early_stopping_epochs: 5
 91 |       weight_decay: 5e-4
 92 |       optimizer: cifar
 93 |       scheduler: cosine
 94 |       # scheduler: cifar
 95 |       transforms: cifar
 96 |     testing_cfg:
 97 |       batch_size: 1000
 98 |   LazySurrEnsemble:
 99 |     keep_constant: True
100 |     name: ResNet18Ensemble
101 |     # name: WideResNet
102 |     n_models: 5
103 |     # name: WideResNet
104 |     efficient: True
105 |     save_path: ensembles/model_{}.pth
106 |     skip_fit_debug: ensembles/model_{}.pth  # load instead of retraining
107 |     skip_fit_debug_relative: True
108 |     data_CHW: [3, 32, 32]
109 |     num_classes: 10
110 |     debug_mnist: False
111 |     lazy: True
112 |     lazy_schedule: []
113 |     training_cfg:
114 |       validation_set_size: 2000  # probably often change this
115 |       stratify_val: True
116 |       # max_epochs: 160            # set
117 |       max_epochs: 30
118 |       learning_rate: 0.1        # set
119 |       batch_size: 128
120 |       num_workers: 4
121 |       pin_memory: True
122 |       early_stopping_epochs: 5
123 |       weight_decay: 5e-4
124 |       optimizer: cifar
125 |       scheduler: cosine
126 |       # scheduler: cifar
127 |       transforms: cifar
128 |     testing_cfg:
129 |       batch_size: 1000


--------------------------------------------------------------------------------
/conf/paper/LargeFMNISTBNN.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | hydra:
 3 |   run:
 4 |     dir: ./outputs/final/LargeFMNISTBNN
 5 | 
 6 | hoover:
 7 |   save_data: False
 8 | 
 9 | experiment:
10 |   n_runs: 1000
11 |   loss: CrossEntropyLoss
12 |   save_every: 5
13 |   log_every: 1
14 |   abort_test_after: 1000
15 | 
16 | dataset:
17 |   n_points: 60000
18 |   test_proportion: 0.16666666666
19 |   name: FashionMNISTDataset
20 |   standardize: True
21 |   stratify: True
22 | 
23 | acquisition:
24 |   lazy_save_schedule: [0, 100, 300, 500, 700]
25 |   uniform_clip: False
26 | 
27 | model:
28 |   name: RadialBNN
29 |   channels: 16
30 |   skip_fit_debug: False
31 |   data_CHW: [1, 28, 28]
32 |   efficient: True  # affects main model
33 |   training_cfg:
34 |     validation_set_size: 1280  # probably often change this
35 |     max_epochs: 50
36 |     learning_rate: 1e-4
37 |     batch_size: 64
38 |     variational_samples: 8
39 |     num_workers: 4
40 |     pin_memory: True
41 |     early_stopping_epochs: 5
42 |     padding_epochs: none
43 |     num_repetitions: 1
44 |     weight_decay: 1e-4
45 |     model: radial_bnn
46 |     channels: 16
47 |     checkpoints_frequency: 3
48 |     data_noise_proportion: None
49 |   testing_cfg:
50 |     variational_samples: 100
51 | 
52 | acquisition_functions:
53 |     - TrueLossAcquisition:
54 |     - RandomAcquisition:
55 |     - ClassifierAcquisitionEntropy:
56 |     - BNNClassifierAcquisitionMI:
57 |     # - SelfSurrogateAcquisitionEntropy: LazySurr
58 | 
59 | acquisition_configs:
60 |   LazySurr:
61 |     name: RadialBNN
62 |     channels: 16
63 |     skip_fit_debug: False
64 |     data_CHW: [1, 28, 28]
65 |     efficient: True  # affects main model
66 |     lazy: True
67 |     lazy_schedule: []
68 |     training_cfg:
69 |       validation_set_size: 1280  # probably often change this
70 |       max_epochs: 50
71 |       learning_rate: 1e-4
72 |       batch_size: 64
73 |       variational_samples: 8
74 |       num_workers: 4
75 |       pin_memory: True
76 |       early_stopping_epochs: 5
77 |       padding_epochs: none
78 |       num_repetitions: 1
79 |       weight_decay: 1e-4
80 |       model: radial_bnn
81 |       channels: 16
82 |       checkpoints_frequency: 3
83 |       data_noise_proportion: None
84 |     testing_cfg:
85 |       variational_samples: 100


--------------------------------------------------------------------------------
/conf/paper/LargeFMNISTResNet.yaml:
--------------------------------------------------------------------------------
  1 | # @package _global_
  2 | hydra:
  3 |   run:
  4 |     dir: ./outputs/final/LargeFMNISTResNet
  5 | 
  6 | hoover:
  7 |   save_data: False
  8 | 
  9 | experiment:
 10 |   n_runs: 1000
 11 |   loss: CrossEntropyLoss
 12 |   save_every: 5
 13 |   log_every: 1
 14 |   abort_test_after: 1000
 15 | 
 16 | dataset:
 17 |   n_points: 60000
 18 |   test_proportion: 0.0166666666
 19 |   name: FashionMNISTDataset
 20 |   standardize: True  # we use torchvision transforms for cifar10
 21 |   # standardize: False  # we use torchvision transforms for cifar10
 22 |   stratify: True
 23 |   respect_train_test: True  # (subsample from original test set)
 24 | 
 25 | acquisition:
 26 |   lazy_save_schedule: [0, 100, 300, 500, 700]
 27 |   uniform_clip: False
 28 | 
 29 | model:
 30 |   keep_constant: True   # no retrain in main, also disable fit method after single use!
 31 |   name: ResNet18
 32 |   # name: WideResNet
 33 |   efficient: True
 34 |   skip_fit_debug: False
 35 |   data_CHW: [1, 28, 28]
 36 |   num_classes: 10
 37 |   debug_mnist: True
 38 |   training_cfg:
 39 |     validation_set_size: 2560  # probably often change this
 40 |     stratify_val: True
 41 |     # max_epochs: 160            # set
 42 |     max_epochs: 30
 43 |     learning_rate: 0.1        # set
 44 |     batch_size: 128
 45 |     num_workers: 4
 46 |     pin_memory: True
 47 |     early_stopping_epochs: 5
 48 |     weight_decay: 5e-4
 49 |     optimizer: cifar
 50 |     scheduler: cosine
 51 |     # scheduler: cosine
 52 |     transforms: cifar
 53 |     testing_cfg:
 54 |       batch_size: 1000
 55 | 
 56 | acquisition_functions:
 57 |     - TrueLossAcquisition:
 58 |     - RandomAcquisition:
 59 |     - ClassifierAcquisitionEntropy:
 60 |     # - SelfSurrogateAcquisitionEntropy: LazySurr
 61 |     # - AnySurrogateAcquisitionEntropy: LazySurrEnsemble
 62 |     - AnySurrogateAcquisitionEntropy: LazySurrEnsembleLarge
 63 | 
 64 | 
 65 | acquisition_configs:
 66 |   LazySurr:
 67 |     keep_constant: True
 68 |     name: ResNet18
 69 |     # name: WideResNet
 70 |     efficient: True
 71 |     save_path: single_aux/model.pth
 72 |     skip_fit_debug: single_aux/model.pth  # load instead of retraining
 73 |     skip_fit_debug_relative: True
 74 |     data_CHW: [1, 28, 28]
 75 |     num_classes: 10
 76 |     debug_mnist: True
 77 |     lazy: True
 78 |     lazy_schedule: []
 79 |     training_cfg:
 80 |       validation_set_size: 2560  # probably often change this
 81 |       stratify_val: True
 82 |       # max_epochs: 160            # set
 83 |       max_epochs: 30
 84 |       learning_rate: 0.1        # set
 85 |       batch_size: 128
 86 |       num_workers: 4
 87 |       pin_memory: True
 88 |       early_stopping_epochs: 5
 89 |       weight_decay: 5e-4
 90 |       optimizer: cifar
 91 |       scheduler: cosine
 92 |       # scheduler: cosine
 93 |       transforms: cifar
 94 |     testing_cfg:
 95 |       batch_size: 1000
 96 |   LazySurrEnsemble:
 97 |     keep_constant: True
 98 |     name: ResNet18Ensemble
 99 |     # name: WideResNet
100 |     n_models: 5
101 |     # name: WideResNet
102 |     efficient: True
103 |     save_path: ensembles/model_{}.pth
104 |     skip_fit_debug: ensembles/model_{}.pth  # load instead of retraining
105 |     skip_fit_debug_relative: True
106 |     data_CHW: [1, 28, 28]
107 |     num_classes: 10
108 |     debug_mnist: True
109 |     lazy: True
110 |     lazy_schedule: []
111 |     training_cfg:
112 |       validation_set_size: 2560  # probably often change this
113 |       stratify_val: True
114 |       # max_epochs: 160            # set
115 |       max_epochs: 30
116 |       learning_rate: 0.1        # set
117 |       batch_size: 128
118 |       num_workers: 4
119 |       pin_memory: True
120 |       early_stopping_epochs: 5
121 |       weight_decay: 5e-4
122 |       optimizer: cifar
123 |       scheduler: cosine
124 |       # scheduler: cosine
125 |       transforms: cifar
126 |     testing_cfg:
127 |       batch_size: 1000
128 |   LazySurrEnsembleLarge:
129 |     keep_constant: True
130 |     name: ResNet18Ensemble
131 |     # name: WideResNet
132 |     n_models: 10
133 |     # name: WideResNet
134 |     efficient: True
135 |     save_path: ensembles/model_{}.pth
136 |     skip_fit_debug: ensembles/model_{}.pth  # load instead of retraining
137 |     skip_fit_debug_relative: True
138 |     data_CHW: [1, 28, 28]
139 |     num_classes: 10
140 |     debug_mnist: True
141 |     lazy: True
142 |     lazy_schedule: []
143 |     training_cfg:
144 |       validation_set_size: 2560  # probably often change this
145 |       stratify_val: True
146 |       # max_epochs: 160            # set
147 |       max_epochs: 30
148 |       learning_rate: 0.1        # set
149 |       batch_size: 128
150 |       num_workers: 4
151 |       pin_memory: True
152 |       early_stopping_epochs: 5
153 |       weight_decay: 5e-4
154 |       optimizer: cifar
155 |       scheduler: cosine
156 |       # scheduler: cosine
157 |       transforms: cifar
158 |     testing_cfg:
159 |       batch_size: 1000
160 | 


--------------------------------------------------------------------------------
/conf/paper/LargeMNISTBNN.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | hydra:
 3 |   run:
 4 |     dir: ./outputs/final/LargeMNISTBNN
 5 | 
 6 | hoover:
 7 |   save_data: False
 8 | 
 9 | experiment:
10 |   n_runs: 992
11 |   loss: CrossEntropyLoss
12 |   save_every: 5
13 |   log_every: 1
14 |   abort_test_after: 1000
15 | 
16 | dataset:
17 |   n_points: 60000
18 |   test_proportion: 0.16666666666
19 |   name: MNISTDataset
20 |   standardize: True
21 |   stratify: True
22 | 
23 | acquisition:
24 |   lazy_save_schedule: [0, 100, 300, 500, 700]
25 |   uniform_clip: False
26 | 
27 | model:
28 |   name: RadialBNN
29 |   channels: 16
30 |   skip_fit_debug: False
31 |   data_CHW: [1, 28, 28]
32 |   efficient: True  # affects main model
33 |   training_cfg:
34 |     validation_set_size: 1280  # probably often change this
35 |     max_epochs: 50
36 |     learning_rate: 1e-4
37 |     batch_size: 64
38 |     variational_samples: 8
39 |     num_workers: 4
40 |     pin_memory: True
41 |     early_stopping_epochs: 5
42 |     padding_epochs: none
43 |     num_repetitions: 1
44 |     weight_decay: 1e-4
45 |     model: radial_bnn
46 |     channels: 16
47 |     checkpoints_frequency: 3
48 |     data_noise_proportion: None
49 |   testing_cfg:
50 |     variational_samples: 100
51 | 
52 | acquisition_functions:
53 |     - TrueLossAcquisition:
54 |     - RandomAcquisition:
55 |     - ClassifierAcquisitionEntropy:
56 |     - BNNClassifierAcquisitionMI:
57 |     # - SelfSurrogateAcquisitionEntropy: LazySurr
58 | 
59 | acquisition_configs:
60 |   LazySurr:
61 |     name: RadialBNN
62 |     channels: 16
63 |     skip_fit_debug: False
64 |     data_CHW: [1, 28, 28]
65 |     efficient: True  # affects main model
66 |     lazy: True
67 |     lazy_schedule: []
68 |     training_cfg:
69 |       validation_set_size: 1280  # probably often change this
70 |       max_epochs: 50
71 |       learning_rate: 1e-4
72 |       batch_size: 64
73 |       variational_samples: 8
74 |       num_workers: 4
75 |       pin_memory: True
76 |       early_stopping_epochs: 5
77 |       padding_epochs: none
78 |       num_repetitions: 1
79 |       weight_decay: 1e-4
80 |       model: radial_bnn
81 |       channels: 16
82 |       checkpoints_frequency: 3
83 |       data_noise_proportion: None
84 |     testing_cfg:
85 |       variational_samples: 100


--------------------------------------------------------------------------------
/conf/paper/SmallFMNISTResNet.yaml:
--------------------------------------------------------------------------------
  1 | # @package _global_
  2 | hydra:
  3 |   run:
  4 |     dir: ./outputs/final/SmallFMNISTResNet
  5 | 
  6 | hoover:
  7 |   save_data: False
  8 | 
  9 | experiment:
 10 |   n_runs: 1000
 11 |   loss: CrossEntropyLoss
 12 |   save_every: 1
 13 |   log_every: 1
 14 |   abort_test_after: 1000
 15 | 
 16 | dataset:
 17 |   n_points: 5250  # 5000 test, 250 train
 18 |   test_proportion: 0.9523809524
 19 |   name: FashionMNISTDataset
 20 |   standardize: True
 21 |   stratify: True
 22 | 
 23 | acquisition:
 24 |   lazy_save_schedule: [0, 100, 300, 500, 700]
 25 |   uniform_clip: False
 26 | 
 27 | model:
 28 |   name: ResNet18
 29 |   # name: WideResNet
 30 |   efficient: True
 31 |   lazy: True
 32 |   skip_fit_debug: False
 33 |   data_CHW: [1, 28, 28]
 34 |   num_classes: 10
 35 |   debug_mnist: True
 36 |   training_cfg:
 37 |     validation_set_size: 50  # probably often change this
 38 |     stratify_val: True
 39 |     max_epochs: 160            # set
 40 |     # max_epochs: 5
 41 |     learning_rate: 0.1        # set
 42 |     batch_size: 128
 43 |     num_workers: 4
 44 |     pin_memory: True
 45 |     early_stopping_epochs: 20
 46 |     weight_decay: 5e-4
 47 |     optimizer: cifar
 48 |     scheduler: cosine
 49 |     # scheduler: cifar
 50 |     # transforms: cifar
 51 |   testing_cfg:
 52 |     batch_size: 1000
 53 | 
 54 | acquisition_functions:
 55 |     - TrueLossAcquisition:
 56 |     - RandomAcquisition:
 57 |     - ClassifierAcquisitionEntropy:
 58 |     - RandomForestClassifierSurrogateAcquisitionEntropy: RFInfDepth
 59 |     - SelfSurrogateAcquisitionEntropy:
 60 |     - SelfSurrogateAcquisitionEntropy: LazySurr
 61 |     - AnySurrogateAcquisitionEntropy: LazySurrEnsemble
 62 |     - RandomForestClassifierSurrogateAcquisitionEntropy: RFInfDepthTrain  # retrain on train only
 63 |     - SelfSurrogateAcquisitionEntropy: LazyTrain # retrain on train only
 64 | 
 65 | acquisition_configs:
 66 |   RFInfDepth:
 67 |     lazy: True
 68 |     efficient: True
 69 |     sk_args:
 70 |       n_estimators: 100
 71 |       criterion: entropy
 72 |       max_features: sqrt
 73 |       n_jobs: -1
 74 |   LazySurr:
 75 |     name: ResNet18
 76 |     # name: WideResNet
 77 |     efficient: True
 78 |     lazy: True
 79 |     lazy_schedule: [0]
 80 |     skip_fit_debug: False
 81 |     data_CHW: [1, 28, 28]
 82 |     num_classes: 10
 83 |     debug_mnist: True
 84 |     training_cfg:
 85 |       validation_set_size: 50  # probably often change this
 86 |       stratify_val: True
 87 |       max_epochs: 160            # set
 88 |       # max_epochs: 5
 89 |       learning_rate: 0.1        # set
 90 |       batch_size: 128
 91 |       num_workers: 4
 92 |       pin_memory: True
 93 |       early_stopping_epochs: 20
 94 |       weight_decay: 5e-4
 95 |       optimizer: cifar
 96 |       scheduler: cosine
 97 |       # scheduler: cifar
 98 |       # transforms: cifar
 99 |     testing_cfg:
100 |       batch_size: 1000
101 |   LazySurrEnsemble:
102 |     name: ResNet18Ensemble
103 |     # name: WideResNet
104 |     efficient: True
105 |     n_models: 5
106 |     lazy: True
107 |     lazy_schedule: [0]
108 |     skip_fit_debug: False
109 |     data_CHW: [1, 28, 28]
110 |     num_classes: 10
111 |     debug_mnist: True
112 |     training_cfg:
113 |       validation_set_size: 50  # probably often change this
114 |       stratify_val: True
115 |       max_epochs: 160            # set
116 |       # max_epochs: 5
117 |       learning_rate: 0.1        # set
118 |       batch_size: 128
119 |       num_workers: 4
120 |       pin_memory: True
121 |       early_stopping_epochs: 20
122 |       weight_decay: 5e-4
123 |       optimizer: cifar
124 |       scheduler: cosine
125 |       # scheduler: cifar
126 |       # transforms: cifar
127 |     testing_cfg:
128 |       batch_size: 1000
129 |   RFInfDepthTrain:
130 |     lazy: True
131 |     efficient: True
132 |     on_train_only: True   # only supported for lazy
133 |     sk_args:
134 |       n_estimators: 100
135 |       criterion: entropy
136 |       max_features: sqrt
137 |       n_jobs: -1
138 |   LazyTrain:
139 |     name: ResNet18
140 |     # name: WideResNet
141 |     efficient: True
142 |     lazy: True
143 |     on_train_only: True
144 |     skip_fit_debug: False
145 |     data_CHW: [1, 28, 28]
146 |     num_classes: 10
147 |     debug_mnist: True
148 |     training_cfg:
149 |       validation_set_size: 50  # probably often change this
150 |       stratify_val: True
151 |       max_epochs: 160            # set
152 |       # max_epochs: 5
153 |       learning_rate: 0.1        # set
154 |       batch_size: 128
155 |       num_workers: 4
156 |       pin_memory: True
157 |       early_stopping_epochs: 20
158 |       weight_decay: 5e-4
159 |       optimizer: cifar
160 |       scheduler: cosine
161 |       # scheduler: cifar
162 |       # transforms: cifar
163 |     testing_cfg:
164 |       batch_size: 1000


--------------------------------------------------------------------------------
/conf/paper/SmallMNISTBNN.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | hydra:
 3 |   run:
 4 |     dir: ./outputs/final/SmallMNISTBNN
 5 | 
 6 | hoover:
 7 |   save_data: False
 8 | 
 9 | experiment:
10 |   n_runs: 1000
11 |   loss: CrossEntropyLoss
12 |   save_every: 5
13 |   log_every: 1
14 |   abort_test_after: 1000
15 | 
16 | dataset:
17 |   n_points: 5250  # 5000 test, 250 train
18 |   test_proportion: 0.9523809524
19 |   name: MNISTDataset
20 |   standardize: True
21 |   stratify: True
22 | 
23 | acquisition:
24 |   lazy_save_schedule: [0, 100, 300, 500, 700]
25 |   uniform_clip: False
26 | 
27 | model:
28 |   # name: TinyRadialBNN
29 |   name: RadialBNN
30 |   channels: 16
31 |   skip_fit_debug: False
32 |   data_CHW: [1, 28, 28]
33 |   lazy: True  # affects acquisition
34 |   efficient: True  # affects main model
35 |   training_cfg:
36 |     validation_set_size: 50
37 |     # stratify_val: False
38 |     stratify_val: True
39 |     max_epochs: 500
40 |     learning_rate: 1e-4
41 |     batch_size: 64
42 |     variational_samples: 8
43 |     num_workers: 4
44 |     pin_memory: True
45 |     early_stopping_epochs: 5
46 |     padding_epochs: none
47 |     num_repetitions: 1
48 |     weight_decay: 1e-4
49 |     model: radial_bnn
50 |     channels: 16
51 |     checkpoints_frequency: 3
52 |     data_noise_proportion: None
53 |   testing_cfg:
54 |     variational_samples: 100
55 | 
56 | acquisition_functions:
57 |     - TrueLossAcquisition:
58 |     - RandomAcquisition:
59 |     - ClassifierAcquisitionEntropy:
60 |     - RandomForestClassifierSurrogateAcquisitionEntropy: RFInfDepth
61 |     - SelfSurrogateAcquisitionEntropy:
62 | 
63 | acquisition_configs:
64 |   RFInfDepth:
65 |     lazy: True
66 |     efficient: True
67 |     sk_args:
68 |       n_estimators: 100
69 |       criterion: entropy
70 |       max_features: sqrt
71 |       n_jobs: -1


--------------------------------------------------------------------------------
/conf/paper/SyntheticGPGP.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | hydra:
 3 |   run:
 4 |     dir: ./outputs/final/SyntheticGPGP
 5 | 
 6 | experiment:
 7 |   n_runs: 5000
 8 | 
 9 | dataset:
10 |   test_proportion: 0.9
11 |   n_points: 50
12 |   name: GPDatasetForGPReg
13 |   standardize: False
14 | 
15 | model:
16 |   name: GaussianProcessRegressor
17 |   task: regression
18 |   efficient: True
19 | 
20 | acquisition_functions: 
21 |   - TrueLossAcquisition:
22 |   - RandomAcquisition:
23 |   - DistanceBasedAcquisition:
24 |   - GPSurrogateAcquisitionMSE:
25 |   - GPAcquisitionUncertainty:
26 | 
27 | acquisition:
28 |   uniform_clip: False
29 | 


--------------------------------------------------------------------------------
/conf/paper/SyntheticQuadraticLinear.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | hydra:
 3 |   run:
 4 |     dir: ./outputs/final/SyntheticQuadraticLinear
 5 | 
 6 | experiment:
 7 |   n_runs: 5000
 8 | 
 9 | dataset:
10 |   test_proportion: 0.9
11 |   n_points: 50
12 |   name: QuadraticDatasetForLinReg
13 |   standardize: False
14 | 
15 | model:
16 |   name: LinearRegressionModel
17 |   task: regression
18 |   efficient: True
19 | 
20 | acquisition_functions: 
21 |   - TrueLossAcquisition:
22 |   - RandomAcquisition:
23 |   - DistanceBasedAcquisition:
24 |   - GPSurrogateAcquisitionMSE:
25 | 
26 | acquisition:
27 |   uniform_clip: False
28 | 


--------------------------------------------------------------------------------
/conf/paper/SyntheticTwoMoonsRF.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | hydra:
 3 |   run:
 4 |     dir: ./outputs/final/SyntheticTwoMoonsRF
 5 | 
 6 | experiment:
 7 |   n_runs: 2500
 8 |   loss: CrossEntropyLoss
 9 |   random_seed: 0
10 | 
11 | dataset:
12 |   test_proportion: 0.1
13 |   n_points: 500
14 |   name: TwoMoonsDataset
15 |   standardize: False
16 |   noise: 0.01
17 |   log_every: 50
18 | 
19 | model:
20 |   name: RandomForestClassifier
21 | 
22 | acquisition_functions:
23 |     - TrueLossAcquisition:
24 |     - RandomAcquisition:
25 |     # - ClassifierAcquisitionEntropy:
26 |     - RandomForestClassifierSurrogateAcquisitionEntropy:  
27 | 
28 | acquisition:
29 |   uniform_clip: False


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | """Main active testing loop."""
  2 | import os
  3 | import logging
  4 | import hydra
  5 | import warnings
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | 
 10 | from activetesting.experiment import Experiment
 11 | from activetesting.utils import maps
 12 | from activetesting.hoover import Hoover
 13 | from activetesting.models import make_efficient
 14 | from omegaconf import OmegaConf
 15 | 
 16 | 
 17 | @hydra.main(config_path='conf', config_name='config')
 18 | def main(cfg):
 19 |     """Run main experiment loop.
 20 | 
 21 |     Repeat active testing across multiple data splits and acquisition
 22 |     functions for all risk estimators.
 23 |     """
 24 | 
 25 |     rng = cfg.experiment.random_seed
 26 |     if rng == -1:
 27 |         rng = np.random.randint(0, 1000)
 28 | 
 29 |     if rng is not False:
 30 |         np.random.seed(rng)
 31 |         torch.torch.manual_seed(rng)
 32 |     logging.info(f'Setting random seed to {rng}.')
 33 | 
 34 |     hoover = Hoover(cfg.hoover)
 35 |     logging.info(f'Logging to {os.getcwd()}.')
 36 | 
 37 |     model = None
 38 | 
 39 |     # Right now this averages over both train and testing!
 40 |     for run in range(cfg.experiment.n_runs):
 41 |         if run % cfg.experiment.log_every == 0 or cfg.experiment.debug:
 42 |             logging.info(f'Run {run} in {os.getcwd()}.')
 43 |             if cuda := torch.cuda.is_available():
 44 |                 logging.info(f'Still using cuda: {cuda}.')
 45 |             else:
 46 |                 os.system('touch cuda_failure.txt')
 47 | 
 48 |         dataset = maps.dataset[cfg.dataset.name](cfg.dataset, cfg.model)
 49 | 
 50 |         # Train model on training data.
 51 |         if (not cfg.model.get('keep_constant', False)) or (model is None):
 52 |             # default case
 53 |             model = maps.model[cfg.model.name](cfg.model)
 54 |             model.fit(*dataset.train_data)
 55 | 
 56 |         # Always predict on test data again
 57 |         # TODO: need to fix this for efficient prediction
 58 |         if cfg.model.get('efficient', False):
 59 |             logging.info('Eficient prediction on test set.')
 60 |             model = make_efficient(model, dataset)
 61 | 
 62 |         # if cfg.experiment.debug:
 63 |             # Report train error
 64 |             # logging.info('Model train error:')
 65 |             # model.performance(
 66 |             #     *dataset.train_data, dataset.cfg.task_type)
 67 | 
 68 |         # if not check_valid(model, dataset):
 69 |             # continue
 70 | 
 71 |         if run < cfg.experiment.save_data_until:
 72 |             hoover.add_data(run, dataset.export())
 73 | 
 74 |         for acq_dict in cfg.acquisition_functions:
 75 |             # Slightly unclean, but could not figure out how to make
 76 |             # this work with Hydra otherwise
 77 |             acquisition = list(acq_dict.keys())[0]
 78 |             acq_cfg_name = list(acq_dict.values())[0]
 79 | 
 80 |             if cfg.experiment.debug:
 81 |                 logging.info(f'\t Acquisition: {acquisition}')
 82 | 
 83 |             if (n := acq_cfg_name) is not None:
 84 |                 acq_config = cfg['acquisition_configs'][n]
 85 |             else:
 86 |                 acq_config = None
 87 | 
 88 |             experiment = Experiment(
 89 |                 run, cfg, dataset, model, acquisition, acq_config)
 90 | 
 91 |             i = 0
 92 |             while not experiment.finished:
 93 |                 i += 1
 94 |                 if cfg.experiment.debug:
 95 |                     logging.info(
 96 |                         f'\t Acquisition: {acquisition} – \t Step {i}.')
 97 | 
 98 |                 experiment.step(i)
 99 | 
100 |             # Add config to name for logging.
101 |             if (n := acq_cfg_name) is not None:
102 |                 acquisition = f'{acquisition}_{n}'
103 | 
104 |             # Extract results from acquisition experiment
105 |             hoover.add_results(run, acquisition, experiment.export_data())
106 | 
107 |             # Reset selected test_indices.
108 |             dataset.restart()
109 | 
110 |         if run % cfg.experiment.get('save_every', 1e19) == 0:
111 |             logging.info('Intermediate save.')
112 |             hoover.save()
113 | 
114 |     logging.info('Completed all runs.')
115 |     hoover.save()
116 | 
117 | 
118 | def check_valid(model, dataset):
119 |     """For classification with small number of points and unstratified."""
120 |     if hasattr(model.model, 'n_classes_'):
121 |         if (nc := model.model.n_classes_) != dataset.cfg.n_classes:
122 |             warnings.warn(
123 |                 f'Not all classes present in train data. '
124 |                 f'Skipping run.')
125 |             return False
126 |     return True
127 | 
128 | 
129 | if __name__ == '__main__':
130 |     import os
131 |     os.environ['HYDRA_FULL_ERROR'] = '1'
132 | 
133 |     def get_base_dir():
134 |         return os.getenv('BASE_DIR', default='.')
135 | 
136 |     OmegaConf.register_resolver('BASE_DIR', get_base_dir)
137 | 
138 |     main()


--------------------------------------------------------------------------------
/notebooks/explore_experiment.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "os.chdir('..')"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "## Evaluating Experiments -- The Visualiser"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "markdown",
 22 |    "metadata": {},
 23 |    "source": [
 24 |     "This notebook guides you through the evaluation of a custom experiment.\n",
 25 |     "\n",
 26 |     "The `Visualiser` is the main class that will us help achieve this."
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "from activetesting.visualize import Visualiser\n",
 36 |     "\n",
 37 |     "%load_ext autoreload\n",
 38 |     "%autoreload 2"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "metadata": {},
 44 |    "source": [
 45 |     "Let's say you have run the experiment\n",
 46 |     "```\n",
 47 |     "python main.py +paper=SyntheticGPGP\n",
 48 |     "```\n",
 49 |     "which logs to\n",
 50 |     "```\n",
 51 |     "outputs/final/SyntheticGPGP\n",
 52 |     "```\n",
 53 |     "\n",
 54 |     "You can load the results for this experiment with"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "vis = Visualiser('outputs/final/SyntheticGPGP')"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "The visualiser then has loads of things for you to look at.\n",
 71 |     "\n",
 72 |     "General information about the run:"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "vis.config()"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "vis.n_runs"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "vis.acquisitions"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": null,
105 |    "metadata": {},
106 |    "outputs": [],
107 |    "source": [
108 |     "vis.risks"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "metadata": {},
114 |    "source": [
115 |     "(Note that $\\hat{R}_{\\text{LURE}} = $ `FancyUnbiasedRiskEstimator` and that $\\hat{R}_{\\text{iid}} = $ `BiasedRiskEstimator`.)"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "metadata": {},
121 |    "source": [
122 |     "You can also have a look at the data for the first few runs."
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "vis.plot_data(0)"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "markdown",
136 |    "metadata": {},
137 |    "source": [
138 |     "If you want to look at the convergence of active testing, there are a couple of tools at your disposal.\n",
139 |     "\n",
140 |     "First, select combinations of acquisition strategy and risk estimator."
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": null,
146 |    "metadata": {},
147 |    "outputs": [],
148 |    "source": [
149 |     "acq_risks = [\n",
150 |     "    ['RandomAcquisition', 'BiasedRiskEstimator'],\n",
151 |     "    ['GPSurrogateAcquisitionMSE', 'FancyUnbiasedRiskEstimator'],\n",
152 |     "    ['TrueLossAcquisition', 'FancyUnbiasedRiskEstimator'],\n",
153 |     "    ]"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": null,
159 |    "metadata": {},
160 |    "outputs": [],
161 |    "source": [
162 |     "vis.plot_risks_select_combinations(acq_risks)"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {},
169 |    "outputs": [],
170 |    "source": [
171 |     "fig, ax = vis.plot_log_convergence(acq_risks[:-1])\n",
172 |     "ax.set_ylim(1e-7, 1e-2)\n",
173 |     "ax.set_xscale('linear')\n",
174 |     "ax.set_xlim(0, 30)"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "markdown",
179 |    "metadata": {},
180 |    "source": [
181 |     "You can also investigate the behaviour of the loss distributions (equivalent to Figure 7 in the paper)."
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": null,
187 |    "metadata": {},
188 |    "outputs": [],
189 |    "source": [
190 |     "acquisition = 'GPSurrogateAcquisitionMSE'\n",
191 |     "fig, ax = vis.loss_dist(acquisition, run=0, step=0);"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "markdown",
196 |    "metadata": {},
197 |    "source": [
198 |     "And create animated gifs:"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "vis.animate_acquisition(acquisition, run=0);\n",
208 |     "# vis.animate_loss_dist(acquisition, run=0); # does not show data"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "markdown",
213 |    "metadata": {},
214 |    "source": [
215 |     "You can also plot individual runs with"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": null,
221 |    "metadata": {},
222 |    "outputs": [],
223 |    "source": [
224 |     "vis.acquisitions"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "code",
229 |    "execution_count": null,
230 |    "metadata": {},
231 |    "outputs": [],
232 |    "source": [
233 |     "vis.plot_all_runs('GPSurrogateAcquisitionMSE', 'FancyUnbiasedRiskEstimator', break_after=100)"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "markdown",
238 |    "metadata": {},
239 |    "source": [
240 |     "This should have introduced the vast majority of functionality. Please see the the Visualiser class itself for further methods and feel free to reach out with any questions! :) "
241 |    ]
242 |   }
243 |  ],
244 |  "metadata": {
245 |   "kernelspec": {
246 |    "display_name": "Python 3",
247 |    "language": "python",
248 |    "name": "python3"
249 |   },
250 |   "language_info": {
251 |    "codemirror_mode": {
252 |     "name": "ipython",
253 |     "version": 3
254 |    },
255 |    "file_extension": ".py",
256 |    "mimetype": "text/x-python",
257 |    "name": "python",
258 |    "nbconvert_exporter": "python",
259 |    "pygments_lexer": "ipython3",
260 |    "version": "3.8.3"
261 |   }
262 |  },
263 |  "nbformat": 4,
264 |  "nbformat_minor": 4
265 | }
266 | 


--------------------------------------------------------------------------------
/outputs/animation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jlko/active-testing/2807fac9ad91fb12e5814e71b9a26c55df9d50fb/outputs/animation.gif


--------------------------------------------------------------------------------
/reproduce/experiments/figure-123.sh:
--------------------------------------------------------------------------------
1 | python main.py +paper=SyntheticGPGP
2 | python main.py +paper=SyntheticQuadraticLinear
3 | python main.py +paper=SyntheticTwoMoonsRF
4 | 


--------------------------------------------------------------------------------
/reproduce/experiments/figure-4.sh:
--------------------------------------------------------------------------------
1 | python main.py +paper=SmallMNISTBNN
2 | python main.py +paper=SmallFMNISTResNet


--------------------------------------------------------------------------------
/reproduce/experiments/figure-5.sh:
--------------------------------------------------------------------------------
1 | python main.py +paper=LargeCIFAR100ResNet


--------------------------------------------------------------------------------
/reproduce/experiments/figure-6.sh:
--------------------------------------------------------------------------------
1 | python main.py +paper=LargeFMNISTResNet
2 | python main.py +paper=LargeCIFAR10ResNet
3 | python main.py +paper=LargeCIFAR100WideResNet
4 | python main.py +paper=LargeCIFAR10ResNetAccuracy


--------------------------------------------------------------------------------
/reproduce/experiments/figure-7.sh:
--------------------------------------------------------------------------------
1 | python main.py +paper=LargeFMNISTBNN


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # conda create -n isactive python=3.8
 2 | # conda activate isactive
 3 | # pip install -r requirements.txt
 4 | torch==1.7.0
 5 | tensorflow
 6 | numpy
 7 | hydra-core
 8 | omegaconf
 9 | scikit-learn
10 | pandas
11 | torchvision
12 | jupyterlab
13 | notebook
14 | matplotlib
15 | seaborn
16 | celluloid


--------------------------------------------------------------------------------