├── .gitignore
├── LICENSE.mit
├── README.md
├── images
├── lml.png
└── polytope.png
├── lml.py
├── neural-motifs
├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── config.py
├── dataloaders
│ ├── __init__.py
│ ├── blob.py
│ ├── image_transforms.py
│ ├── mscoco.py
│ └── visual_genome.py
├── docs
│ ├── LICENSE.md
│ ├── _config.yaml
│ ├── _includes
│ │ └── image.html
│ ├── _layouts
│ │ └── default.html
│ ├── index.md
│ ├── teaser.png
│ └── upload.sh
├── lib
│ ├── __init__.py
│ ├── draw_rectangles
│ │ ├── draw_rectangles.c
│ │ ├── draw_rectangles.pyx
│ │ └── setup.py
│ ├── evaluation
│ │ ├── __init__.py
│ │ ├── sg_eval.py
│ │ ├── sg_eval_all_rel_cates.py
│ │ ├── sg_eval_slow.py
│ │ └── test_sg_eval.py
│ ├── fpn
│ │ ├── anchor_targets.py
│ │ ├── box_intersections_cpu
│ │ │ ├── bbox.c
│ │ │ ├── bbox.pyx
│ │ │ └── setup.py
│ │ ├── box_utils.py
│ │ ├── generate_anchors.py
│ │ ├── make.sh
│ │ ├── nms
│ │ │ ├── Makefile
│ │ │ ├── build.py
│ │ │ ├── functions
│ │ │ │ └── nms.py
│ │ │ └── src
│ │ │ │ ├── cuda
│ │ │ │ ├── Makefile
│ │ │ │ ├── nms_kernel.cu
│ │ │ │ └── nms_kernel.h
│ │ │ │ ├── nms_cuda.c
│ │ │ │ └── nms_cuda.h
│ │ ├── proposal_assignments
│ │ │ ├── proposal_assignments_det.py
│ │ │ ├── proposal_assignments_gtbox.py
│ │ │ ├── proposal_assignments_postnms.py
│ │ │ ├── proposal_assignments_rel.py
│ │ │ └── rel_assignments.py
│ │ └── roi_align
│ │ │ ├── Makefile
│ │ │ ├── __init__.py
│ │ │ ├── _ext
│ │ │ ├── __init__.py
│ │ │ └── roi_align
│ │ │ │ └── __init__.py
│ │ │ ├── build.py
│ │ │ ├── functions
│ │ │ ├── __init__.py
│ │ │ └── roi_align.py
│ │ │ ├── modules
│ │ │ ├── __init__.py
│ │ │ └── roi_align.py
│ │ │ └── src
│ │ │ ├── cuda
│ │ │ ├── Makefile
│ │ │ ├── roi_align_kernel.cu
│ │ │ └── roi_align_kernel.h
│ │ │ ├── roi_align_cuda.c
│ │ │ └── roi_align_cuda.h
│ ├── get_dataset_counts.py
│ ├── get_union_boxes.py
│ ├── lstm
│ │ ├── __init__.py
│ │ ├── decoder_rnn.py
│ │ └── highway_lstm_cuda
│ │ │ ├── __init__.py
│ │ │ ├── _ext
│ │ │ ├── __init__.py
│ │ │ └── highway_lstm_layer
│ │ │ │ └── __init__.py
│ │ │ ├── alternating_highway_lstm.py
│ │ │ ├── build.py
│ │ │ ├── make.sh
│ │ │ └── src
│ │ │ ├── highway_lstm_cuda.c
│ │ │ ├── highway_lstm_cuda.h
│ │ │ ├── highway_lstm_kernel.cu
│ │ │ └── highway_lstm_kernel.h
│ ├── object_detector.py
│ ├── pytorch_misc.py
│ ├── rel_model.py
│ ├── rel_model_stanford.py
│ ├── resnet.py
│ ├── sparse_targets.py
│ ├── surgery.py
│ └── word_vectors.py
├── misc
│ ├── __init__.py
│ └── motifs.py
├── models
│ ├── _visualize.py
│ ├── eval_rel_count.py
│ ├── eval_rels.py
│ ├── eval_vis.py
│ ├── train_detector.py
│ └── train_rels.py
└── scripts
│ ├── eval_models_sgcls.sh
│ ├── eval_models_sgdet.sh
│ ├── pretrain_detector.sh
│ ├── refine_for_detection.sh
│ ├── train_models_sgcls.sh
│ ├── train_motifnet.sh
│ ├── train_predcls.sh
│ └── train_stanford.sh
├── setup.py
└── smooth-topk
├── .gitignore
├── LICENSE
├── README.md
├── requirements.txt
└── src
├── cli.py
├── data
├── __init__.py
├── main.py
└── utils.py
├── epoch.py
├── losses
├── __init__.py
├── entr.py
├── functional.py
├── lml_loss.py
├── logarithm.py
├── main.py
├── ml.py
├── polynomial
│ ├── __init__.py
│ ├── divide_conquer.py
│ ├── grad.py
│ ├── multiplication.py
│ └── sp.py
├── svm.py
└── utils.py
├── main.py
├── models
├── __init__.py
├── cifar.py
├── densenet.py
├── main.py
└── parser.py
├── scripts
├── cifar100_noise_ce.sh
├── cifar100_noise_entr.sh
├── cifar100_noise_lml.sh
├── cifar100_noise_ml.sh
├── cifar100_noise_svm.sh
├── eval.py
├── imagenet_split.py
├── imagenet_subsets.sh
├── imagenet_subsets_ce.sh
├── imagenet_subsets_entr.sh
├── imagenet_subsets_lml.sh
├── imagenet_subsets_svm.sh
├── perf-all.sh
└── perf.py
├── tests
├── __init__.py
├── py_ref.py
├── test_log.py
├── test_losses.py
├── test_sum_product.py
├── th_ref.py
└── utils.py
└── utils.py
/.gitignore:
--------------------------------------------------------------------------------
1 | checkpoints
2 | data*
3 |
4 | _ext
5 |
6 | *.o
7 | *.lprof
8 | *.pkl
9 | *.egg-info
10 | dist
11 | build
12 |
13 | neural-motifs/motifnet_*
--------------------------------------------------------------------------------
/LICENSE.mit:
--------------------------------------------------------------------------------
1 | The MIT License
2 |
3 | Copyright 2019 Intel AI, CMU, Bosch AI
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # The Limited Multi-Label Projection Layer
2 |
3 | This repository is by
4 | [Brandon Amos](http://bamos.github.io),
5 | [Vladlen Koltun](http://vladlen.info/),
6 | and
7 | [J. Zico Kolter](http://zicokolter.com/) and
8 | contains the PyTorch library and source code to reproduce the
9 | experiments in our tech report on
10 | [The Limited Multi-Label Projection Layer](https://arxiv.org/abs/1906.08707).
11 |
12 | 
13 | 
14 |
15 | ---
16 |
17 | We provide the LML layer as a PyTorch module in `lml.py`.
18 | You can install it with:
19 |
20 | ```
21 | pip install git+git://github.com/locuslab/lml.git
22 | ```
23 |
24 | A simple usage example to project a 5-dimensional vector
25 | onto the LML polytope with two active elements is:
26 |
27 | ```python
28 | import torch
29 | from lml import LML
30 |
31 | x = 10.*torch.randn(5) # tensor([ -4.0695, 10.8666, 13.0867, -7.1431, -14.7220])
32 | y = LML(N=2)(x) # tensor([5.8745e-04, 9.9945e-01, 9.9994e-01, 2.7187e-05, 1.3897e-08]))
33 | ```
34 |
35 | # Top-k Image Classification
36 | In the `smooth-topk` directory, we have connected the LML layer to the
37 | PyTorch experiments in the
38 | [oval-group/smooth-topk](https://github.com/oval-group/smooth-topk)
39 | repository.
40 | We ran these experiments with PyTorch 1.0.
41 |
42 | A single LML training run can be done from the `smooth-topk/src` directory with
43 |
44 | ```
45 | ./main.py --dataset cifar100 --model densenet40-40 --out-name /tmp/lml-cifar --loss lml --noise 0.0 --seed 0 --no-visdom
46 | ```
47 |
48 | Coordinating all of the CIFAR-100 experiments can be done with
49 | the `./scripts/cifar100_noise_*.sh` scripts.
50 |
51 | We have also added an option to use the
52 | [NVIDIA/DALI](https://github.com/NVIDIA/DALI)
53 | library for pre-processing ImageNet images on the GPU,
54 | but [DALI currently has known memory leaks](https://github.com/NVIDIA/DALI/issues/344)
55 | that currently cause the experiments to crash and
56 | run out of memory.
57 |
58 | # Neural Motifs: Scene Graph Generation
59 |
60 | In the `neural-motifs` directory, we have connected the LML layer to the
61 | PyTorch experiments in the
62 | [rowanz/neural-motifs](https://github.com/rowanz/neural-motifs)
63 | repository.
64 | The `README` in this directory provides more details about
65 | setting up and running the experiments.
66 | The original code has not been updated to the latest version of
67 | PyTorch and these experiments should be run with PyTorch 0.3.
68 |
69 | A single LML training run can be done from the `neural-motifs` directory with
70 |
71 | ```
72 | python3 models/train_rels.py -m predcls -model motifnet -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 -p 10 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -ckpt checkpoints/vg-faster-rcnn.tar -save_dir /tmp/lml-nm -nepoch 50 -use_bias --lml_topk 20
73 | ```
74 |
75 | Coordinating all of the experiments can be done with
76 | `/scripts/train_predcls.sh`.
77 |
78 | # Licensing and Citations
79 |
80 | Our LML layer in `lml.py` is licensed under the MIT license.
81 | All other code in this repository remains under the
82 | original licensing.
83 |
84 | If you find this repository helpful in your publications,
85 | please consider citing our paper.
86 |
87 | ```
88 | @article{amos2019limited,
89 | title={{The Limited Multi-Label Projection Layer}},
90 | author={Brandon Amos and Vladlen Koltun and J. Zico Kolter},
91 | journal={arXiv preprint arXiv:1906.08707},
92 | year={2019}
93 | }
94 | ```
95 |
--------------------------------------------------------------------------------
/images/lml.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/images/lml.png
--------------------------------------------------------------------------------
/images/polytope.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/images/polytope.png
--------------------------------------------------------------------------------
/neural-motifs/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 |
27 | # PyInstaller
28 | # Usually these files are written by a python script from a template
29 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 |
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 |
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *.cover
46 | .hypothesis/
47 |
48 | # Translations
49 | *.mo
50 | *.pot
51 |
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 |
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 |
60 | # Scrapy stuff:
61 | .scrapy
62 |
63 | # Sphinx documentation
64 | docs/_build/
65 |
66 | # PyBuilder
67 | target/
68 |
69 | # Jupyter Notebook
70 | .ipynb_checkpoints
71 |
72 | # pyenv
73 | .python-version
74 |
75 | # celery beat schedule file
76 | celerybeat-schedule
77 |
78 | # SageMath parsed files
79 | *.sage.py
80 |
81 | # dotenv
82 | .env
83 |
84 | # virtualenv
85 | .venv
86 | venv/
87 | ENV/
88 |
89 | # Spyder project settings
90 | .spyderproject
91 | .spyproject
92 |
93 | # Rope project settings
94 | .ropeproject
95 |
96 | # mkdocs documentation
97 | /site
98 |
99 | # mypy
100 | .mypy_cache/
101 |
--------------------------------------------------------------------------------
/neural-motifs/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Rowan Zellers
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/neural-motifs/Makefile:
--------------------------------------------------------------------------------
1 | export PATH := /usr/local/cuda-9.1/bin:$(PATH)
2 |
3 | all: draw_rectangles box_intersections nms roi_align lstm
4 |
5 | draw_rectangles:
6 | cd lib/draw_rectangles; python setup.py build_ext --inplace
7 | box_intersections:
8 | cd lib/fpn/box_intersections_cpu; python setup.py build_ext --inplace
9 | nms:
10 | cd lib/fpn/nms; make
11 | roi_align:
12 | cd lib/fpn/roi_align; make
13 | lstm:
14 | cd lib/lstm/highway_lstm_cuda; ./make.sh
15 |
--------------------------------------------------------------------------------
/neural-motifs/README.md:
--------------------------------------------------------------------------------
1 | This directory and README are from the
2 | [rowanz/neural-motifs](https://github.com/rowanz/neural-motifs)
3 | repository.
4 | Our modifications remain under the same license.
5 |
6 |
7 | ---
8 |
9 | # neural-motifs
10 | Code for Neural Motifs: Scene Graph Parsing with Global Context (CVPR 2018)
11 |
12 | This repository contains data and code for the paper [Neural Motifs: Scene Graph Parsing with Global Context](https://arxiv.org/abs/1711.06640v2) - now updated for the CVPR camera ready! this should be the same as what's available via the CVPR site when that appears, just it has the supplemental section rolled into the same PDF. For the project page (as well as links to the baseline checkpoints), check out [rowanzellers.com/neuralmotifs](https://rowanzellers.com/neuralmotifs). If the paper significantly inspires you, we request that you cite our work:
13 |
14 | ### Bibtex
15 |
16 | ```
17 | @inproceedings{zellers2018scenegraphs,
18 | title={Neural Motifs: Scene Graph Parsing with Global Context},
19 | author={Zellers, Rowan and Yatskar, Mark and Thomson, Sam and Choi, Yejin},
20 | booktitle = "Conference on Computer Vision and Pattern Recognition",
21 | year={2018}
22 | }
23 | ```
24 | # Setup
25 |
26 |
27 | 0. Install python3.6 and pytorch 3. I recommend the [Anaconda distribution](https://repo.continuum.io/archive/). To install PyTorch if you haven't already, use
28 | ```conda install pytorch=0.3.0 torchvision=0.2.0 cuda90 -c pytorch```.
29 |
30 | 1. Update the config file with the dataset paths. Specifically:
31 | - Visual Genome (the VG_100K folder, image_data.json, VG-SGG.h5, and VG-SGG-dicts.json). See data/stanford_filtered/README.md for the steps I used to download these.
32 | - You'll also need to fix your PYTHONPATH: ```export PYTHONPATH=/home/rowan/code/scene-graph```
33 |
34 | 2. Compile everything. run ```make``` in the main directory: this compiles the Bilinear Interpolation operation for the RoIs as well as the Highway LSTM.
35 |
36 | 3. Pretrain VG detection. The old version involved pretraining COCO as well, but we got rid of that for simplicity. Run ./scripts/pretrain_detector.sh
37 | Note: You might have to modify the learning rate and batch size, particularly if you don't have 3 Titan X GPUs (which is what I used). [You can also download the pretrained detector checkpoint here.](https://drive.google.com/open?id=11zKRr2OF5oclFL47kjFYBOxScotQzArX)
38 |
39 | 4. Train VG scene graph classification: run ./scripts/train_models_sgcls.sh 2 (will run on GPU 2). OR, download the MotifNet-cls checkpoint here: [Motifnet-SGCls/PredCls](https://drive.google.com/open?id=12qziGKYjFD3LAnoy4zDT3bcg5QLC0qN6).
40 | 5. Refine for detection: run ./scripts/refine_for_detection.sh 2 or download the [Motifnet-SGDet](https://drive.google.com/open?id=1thd_5uSamJQaXAPVGVOUZGAOfGCYZYmb) checkpoint.
41 | 6. Evaluate: Refer to the scripts ./scripts/eval_models_sg[cls/det].sh.
42 |
43 | # help
44 |
45 | Feel free to open an issue if you encounter trouble getting it to work!
46 |
--------------------------------------------------------------------------------
/neural-motifs/dataloaders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/dataloaders/__init__.py
--------------------------------------------------------------------------------
/neural-motifs/dataloaders/image_transforms.py:
--------------------------------------------------------------------------------
1 | # Some image transforms
2 |
3 | from PIL import Image, ImageOps, ImageFilter, ImageEnhance
4 | import numpy as np
5 | from random import randint
6 | # All of these need to be called on PIL imagez
7 |
8 | class SquarePad(object):
9 | def __call__(self, img):
10 | w, h = img.size
11 | img_padded = ImageOps.expand(img, border=(0, 0, max(h - w, 0), max(w - h, 0)),
12 | fill=(int(0.485 * 256), int(0.456 * 256), int(0.406 * 256)))
13 | return img_padded
14 |
15 |
16 | class Grayscale(object):
17 | """
18 | Converts to grayscale (not always, sometimes).
19 | """
20 | def __call__(self, img):
21 | factor = np.sqrt(np.sqrt(np.random.rand(1)))
22 | # print("gray {}".format(factor))
23 | enhancer = ImageEnhance.Color(img)
24 | return enhancer.enhance(factor)
25 |
26 |
27 | class Brightness(object):
28 | """
29 | Converts to grayscale (not always, sometimes).
30 | """
31 | def __call__(self, img):
32 | factor = np.random.randn(1)/6+1
33 | factor = min(max(factor, 0.5), 1.5)
34 | # print("brightness {}".format(factor))
35 |
36 | enhancer = ImageEnhance.Brightness(img)
37 | return enhancer.enhance(factor)
38 |
39 |
40 | class Contrast(object):
41 | """
42 | Converts to grayscale (not always, sometimes).
43 | """
44 | def __call__(self, img):
45 | factor = np.random.randn(1)/8+1.0
46 | factor = min(max(factor, 0.5), 1.5)
47 | # print("contrast {}".format(factor))
48 |
49 | enhancer = ImageEnhance.Contrast(img)
50 | return enhancer.enhance(factor)
51 |
52 |
53 | class Hue(object):
54 | """
55 | Converts to grayscale
56 | """
57 | def __call__(self, img):
58 | # 30 seems good
59 | factor = int(np.random.randn(1)*8)
60 | factor = min(max(factor, -30), 30)
61 | factor = np.array(factor, dtype=np.uint8)
62 |
63 | hsv = np.array(img.convert('HSV'))
64 | hsv[:,:,0] += factor
65 | new_img = Image.fromarray(hsv, 'HSV').convert('RGB')
66 |
67 | return new_img
68 |
69 |
70 | class Sharpness(object):
71 | """
72 | Converts to grayscale
73 | """
74 | def __call__(self, img):
75 | factor = 1.0 + np.random.randn(1)/5
76 | # print("sharpness {}".format(factor))
77 | enhancer = ImageEnhance.Sharpness(img)
78 | return enhancer.enhance(factor)
79 |
80 |
81 | def random_crop(img, boxes, box_scale, round_boxes=True, max_crop_fraction=0.1):
82 | """
83 | Randomly crops the image
84 | :param img: PIL image
85 | :param boxes: Ground truth boxes
86 | :param box_scale: This is the scale that the boxes are at (e.g. 1024 wide). We'll preserve that ratio
87 | :param round_boxes: Set this to true if we're going to round the boxes to ints
88 | :return: Cropped image, new boxes
89 | """
90 |
91 | w, h = img.size
92 |
93 | max_crop_w = int(w*max_crop_fraction)
94 | max_crop_h = int(h*max_crop_fraction)
95 | boxes_scaled = boxes * max(w,h) / box_scale
96 | max_to_crop_top = min(int(boxes_scaled[:, 1].min()), max_crop_h)
97 | max_to_crop_left = min(int(boxes_scaled[:, 0].min()), max_crop_w)
98 | max_to_crop_right = min(int(w - boxes_scaled[:, 2].max()), max_crop_w)
99 | max_to_crop_bottom = min(int(h - boxes_scaled[:, 3].max()), max_crop_h)
100 |
101 | crop_top = randint(0, max(max_to_crop_top, 0))
102 | crop_left = randint(0, max(max_to_crop_left, 0))
103 | crop_right = randint(0, max(max_to_crop_right, 0))
104 | crop_bottom = randint(0, max(max_to_crop_bottom, 0))
105 | img_cropped = img.crop((crop_left, crop_top, w - crop_right, h - crop_bottom))
106 |
107 | new_boxes = box_scale / max(img_cropped.size) * np.column_stack(
108 | (boxes_scaled[:,0]-crop_left, boxes_scaled[:,1]-crop_top, boxes_scaled[:,2]-crop_left, boxes_scaled[:,3]-crop_top))
109 |
110 | if round_boxes:
111 | new_boxes = np.round(new_boxes).astype(np.int32)
112 | return img_cropped, new_boxes
113 |
114 |
115 | class RandomOrder(object):
116 | """ Composes several transforms together in random order - or not at all!
117 | """
118 |
119 | def __init__(self, transforms):
120 | self.transforms = transforms
121 |
122 | def __call__(self, img):
123 | if self.transforms is None:
124 | return img
125 | num_to_pick = np.random.choice(len(self.transforms))
126 | if num_to_pick == 0:
127 | return img
128 |
129 | order = np.random.choice(len(self.transforms), size=num_to_pick, replace=False)
130 | for i in order:
131 | img = self.transforms[i](img)
132 | return img
--------------------------------------------------------------------------------
/neural-motifs/docs/LICENSE.md:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Heiswayi Nrird
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/neural-motifs/docs/_config.yaml:
--------------------------------------------------------------------------------
1 | exclude: [README.md, LICENSE.md]
2 |
3 | defaults:
4 | - values:
5 | layout: default
6 |
--------------------------------------------------------------------------------
/neural-motifs/docs/_includes/image.html:
--------------------------------------------------------------------------------
1 |
2 |

3 |
4 |
5 |
--------------------------------------------------------------------------------
/neural-motifs/docs/_layouts/default.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | {{ page.title }}
9 |
10 |
11 |
12 |
13 |
62 |
75 |
76 |
77 |
78 |
79 | {{ content }}
80 |
81 |
82 |
83 |
--------------------------------------------------------------------------------
/neural-motifs/docs/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | permalink: /
3 | title: Neural Motifs
4 | author: Rowan Zellers
5 | description: Scene Graph Parsing with Global Context (CVPR 2018)
6 | google_analytics_id: UA-84290243-3
7 | ---
8 | # Neural Motifs: Scene Graph Parsing with Global Context (CVPR 2018)
9 |
10 | ### by [Rowan Zellers](https://rowanzellers.com), [Mark Yatskar](https://homes.cs.washington.edu/~my89/), [Sam Thomson](https://http://samthomson.com/), [Yejin Choi](https://homes.cs.washington.edu/~yejin/)
11 |
12 |
13 | {% include image.html url="teaser.png" description="teaser" %}
14 |
15 | # Overview
16 |
17 | * In this work, we investigate the problem of producing structured graph representations of visual scenes. Similar to object detection, we must predict a box around each object. Here, we also need to predict an edge (with one of several labels, possibly `background`) between every ordered pair of boxes, producing a directed graph where the edges hopefully represent the semantics and interactions present in the scene.
18 | * We present an analysis of the [Visual Genome Scene Graphs dataset](http://visualgenome.org/). In particular:
19 | * Object labels (e.g. person, shirt) are highly predictive of edge labels (e.g. wearing), but **not vice versa**.
20 | * Over 90% of the edges in the dataset are non-semantic.
21 | * There is a significant amount of structure in the dataset, in the form of graph motifs (regularly appearing substructures).
22 | * Motivated by our analysis, we present a simple baseline that outperforms previous approaches.
23 | * We introduce Stacked Motif Networks (MotifNet), which is a novel architecture that is designed to capture higher order motifs in scene graphs. In doing so, it achieves a sizeable performance gain over prior state-of-the-art.
24 |
25 | # Read the paper!
26 | The old version of the paper is available at [arxiv link](https://arxiv.org/abs/1711.06640) - camera ready version coming soon!
27 |
28 | # Bibtex
29 | ```
30 | @inproceedings{zellers2018scenegraphs,
31 | title={Neural Motifs: Scene Graph Parsing with Global Context},
32 | author={Zellers, Rowan and Yatskar, Mark and Thomson, Sam and Choi, Yejin},
33 | booktitle = "Conference on Computer Vision and Pattern Recognition",
34 | year={2018}
35 | }
36 | ```
37 |
38 | # View some examples!
39 |
40 | Check out [this tool](https://rowanzellers.com/scenegraph2/) I made to visualize the scene graph predictions. Disclaimer: the predictions are from an earlier version of the model, but hopefully they're still helpful!
41 |
42 | # Code
43 |
44 | Visit the [`neural-motifs` GitHub repository](https://github.com/rowanz/neural-motifs) for our reference implementation and instructions for running our code.
45 |
46 | It is released under the MIT license.
47 |
48 | # Checkpoints available for download
49 | * [Pretrained Detector](https://drive.google.com/open?id=11zKRr2OF5oclFL47kjFYBOxScotQzArX)
50 | * [Motifnet-SGDet](https://drive.google.com/open?id=1thd_5uSamJQaXAPVGVOUZGAOfGCYZYmb)
51 | * [Motifnet-SGCls/PredCls](https://drive.google.com/open?id=12qziGKYjFD3LAnoy4zDT3bcg5QLC0qN6)
52 |
53 | # questions?
54 |
55 | Feel free to get in touch! My main website is at [rowanzellers.com](https://rowanzellers.com)
56 |
--------------------------------------------------------------------------------
/neural-motifs/docs/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/docs/teaser.png
--------------------------------------------------------------------------------
/neural-motifs/docs/upload.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | scp -r _site/* USERNAME@SITE:~/rowanzellers.com/neuralmotifs
--------------------------------------------------------------------------------
/neural-motifs/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/lib/__init__.py
--------------------------------------------------------------------------------
/neural-motifs/lib/draw_rectangles/draw_rectangles.pyx:
--------------------------------------------------------------------------------
1 | ######
2 | # Draws rectangles
3 | ######
4 |
5 | cimport cython
6 | import numpy as np
7 | cimport numpy as np
8 |
9 | DTYPE = np.float32
10 | ctypedef np.float32_t DTYPE_t
11 |
12 | def draw_union_boxes(bbox_pairs, pooling_size, padding=0):
13 | """
14 | Draws union boxes for the image.
15 | :param box_pairs: [num_pairs, 8]
16 | :param fmap_size: Size of the original feature map
17 | :param stride: ratio between fmap size and original img (<1)
18 | :param pooling_size: resize everything to this size
19 | :return: [num_pairs, 2, pooling_size, pooling_size arr
20 | """
21 | assert padding == 0, "Padding>0 not supported yet"
22 | return draw_union_boxes_c(bbox_pairs, pooling_size)
23 |
24 | cdef DTYPE_t minmax(DTYPE_t x):
25 | return min(max(x, 0), 1)
26 |
27 | cdef np.ndarray[DTYPE_t, ndim=4] draw_union_boxes_c(
28 | np.ndarray[DTYPE_t, ndim=2] box_pairs, unsigned int pooling_size):
29 | """
30 | Parameters
31 | ----------
32 | boxes: (N, 4) ndarray of float. everything has arbitrary ratios
33 | query_boxes: (K, 4) ndarray of float
34 | Returns
35 | -------
36 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes
37 | """
38 | cdef unsigned int N = box_pairs.shape[0]
39 |
40 | cdef np.ndarray[DTYPE_t, ndim = 4] uboxes = np.zeros(
41 | (N, 2, pooling_size, pooling_size), dtype=DTYPE)
42 | cdef DTYPE_t x1_union, y1_union, x2_union, y2_union, w, h, x1_box, y1_box, x2_box, y2_box, y_contrib, x_contrib
43 | cdef unsigned int n, i, j, k
44 |
45 | for n in range(N):
46 | x1_union = min(box_pairs[n, 0], box_pairs[n, 4])
47 | y1_union = min(box_pairs[n, 1], box_pairs[n, 5])
48 | x2_union = max(box_pairs[n, 2], box_pairs[n, 6])
49 | y2_union = max(box_pairs[n, 3], box_pairs[n, 7])
50 |
51 | w = x2_union - x1_union
52 | h = y2_union - y1_union
53 |
54 | for i in range(2):
55 | # Now everything is in the range [0, pooling_size].
56 | x1_box = (box_pairs[n, 0+4*i] - x1_union)*pooling_size / w
57 | y1_box = (box_pairs[n, 1+4*i] - y1_union)*pooling_size / h
58 | x2_box = (box_pairs[n, 2+4*i] - x1_union)*pooling_size / w
59 | y2_box = (box_pairs[n, 3+4*i] - y1_union)*pooling_size / h
60 | # print("{:.3f}, {:.3f}, {:.3f}, {:.3f}".format(x1_box, y1_box, x2_box, y2_box))
61 | for j in range(pooling_size):
62 | y_contrib = minmax(j+1-y1_box)*minmax(y2_box-j)
63 | for k in range(pooling_size):
64 | x_contrib = minmax(k+1-x1_box)*minmax(x2_box-k)
65 | # print("j {} yc {} k {} xc {}".format(j, y_contrib, k, x_contrib))
66 | uboxes[n,i,j,k] = x_contrib*y_contrib
67 | return uboxes
68 |
--------------------------------------------------------------------------------
/neural-motifs/lib/draw_rectangles/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from Cython.Build import cythonize
3 | import numpy
4 |
5 | setup(name="draw_rectangles_cython", ext_modules=cythonize('draw_rectangles.pyx'), include_dirs=[numpy.get_include()])
--------------------------------------------------------------------------------
/neural-motifs/lib/evaluation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/lib/evaluation/__init__.py
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/anchor_targets.py:
--------------------------------------------------------------------------------
1 | """
2 | Generates anchor targets to train the detector. Does this during the collate step in training
3 | as it's much cheaper to do this on a separate thread.
4 |
5 | Heavily adapted from faster_rcnn/rpn_msr/anchor_target_layer.py.
6 | """
7 | import numpy as np
8 | import numpy.random as npr
9 |
10 | from config import IM_SCALE, RPN_NEGATIVE_OVERLAP, RPN_POSITIVE_OVERLAP, \
11 | RPN_BATCHSIZE, RPN_FG_FRACTION, ANCHOR_SIZE, ANCHOR_SCALES, ANCHOR_RATIOS
12 | from lib.fpn.box_intersections_cpu.bbox import bbox_overlaps
13 | from lib.fpn.generate_anchors import generate_anchors
14 |
15 |
16 | def anchor_target_layer(gt_boxes, im_size,
17 | allowed_border=0):
18 | """
19 | Assign anchors to ground-truth targets. Produces anchor classification
20 | labels and bounding-box regression targets.
21 |
22 | for each (H, W) location i
23 | generate 3 anchor boxes centered on cell i
24 | filter out-of-image anchors
25 | measure GT overlap
26 |
27 | :param gt_boxes: [x1, y1, x2, y2] boxes. These are assumed to be at the same scale as
28 | the image (IM_SCALE)
29 | :param im_size: Size of the image (h, w). This is assumed to be scaled to IM_SCALE
30 | """
31 | if max(im_size) != IM_SCALE:
32 | raise ValueError("im size is {}".format(im_size))
33 | h, w = im_size
34 |
35 | # Get the indices of the anchors in the feature map.
36 | # h, w, A, 4
37 | ans_np = generate_anchors(base_size=ANCHOR_SIZE,
38 | feat_stride=16,
39 | anchor_scales=ANCHOR_SCALES,
40 | anchor_ratios=ANCHOR_RATIOS,
41 | )
42 | ans_np_flat = ans_np.reshape((-1, 4))
43 | inds_inside = np.where(
44 | (ans_np_flat[:, 0] >= -allowed_border) &
45 | (ans_np_flat[:, 1] >= -allowed_border) &
46 | (ans_np_flat[:, 2] < w + allowed_border) & # width
47 | (ans_np_flat[:, 3] < h + allowed_border) # height
48 | )[0]
49 | good_ans_flat = ans_np_flat[inds_inside]
50 | if good_ans_flat.size == 0:
51 | raise ValueError("There were no good anchors for an image of size {} with boxes {}".format(im_size, gt_boxes))
52 |
53 | # overlaps between the anchors and the gt boxes [num_anchors, num_gtboxes]
54 | overlaps = bbox_overlaps(good_ans_flat, gt_boxes)
55 | anchor_to_gtbox = overlaps.argmax(axis=1)
56 | max_overlaps = overlaps[np.arange(anchor_to_gtbox.shape[0]), anchor_to_gtbox]
57 | gtbox_to_anchor = overlaps.argmax(axis=0)
58 | gt_max_overlaps = overlaps[gtbox_to_anchor, np.arange(overlaps.shape[1])]
59 | gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
60 |
61 | # Good anchors are those that match SOMEWHERE within a decent tolerance
62 | # label: 1 is positive, 0 is negative, -1 is dont care.
63 | # assign bg labels first so that positive labels can clobber them
64 | labels = (-1) * np.ones(overlaps.shape[0], dtype=np.int64)
65 | labels[max_overlaps < RPN_NEGATIVE_OVERLAP] = 0
66 | labels[gt_argmax_overlaps] = 1
67 | labels[max_overlaps >= RPN_POSITIVE_OVERLAP] = 1
68 |
69 | # subsample positive labels if we have too many
70 | num_fg = int(RPN_FG_FRACTION * RPN_BATCHSIZE)
71 | fg_inds = np.where(labels == 1)[0]
72 | if len(fg_inds) > num_fg:
73 | labels[npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)] = -1
74 |
75 | # subsample negative labels if we have too many
76 | num_bg = RPN_BATCHSIZE - np.sum(labels == 1)
77 | bg_inds = np.where(labels == 0)[0]
78 | if len(bg_inds) > num_bg:
79 | labels[npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)] = -1
80 | # print("{} fg {} bg ratio{:.3f} inds inside {}".format(RPN_BATCHSIZE-num_bg, num_bg, (RPN_BATCHSIZE-num_bg)/RPN_BATCHSIZE, inds_inside.shape[0]))
81 |
82 |
83 | # Get the labels at the original size
84 | labels_unmap = (-1) * np.ones(ans_np_flat.shape[0], dtype=np.int64)
85 | labels_unmap[inds_inside] = labels
86 |
87 | # h, w, A
88 | labels_unmap_res = labels_unmap.reshape(ans_np.shape[:-1])
89 | anchor_inds = np.column_stack(np.where(labels_unmap_res >= 0))
90 |
91 | # These ought to be in the same order
92 | anchor_inds_flat = np.where(labels >= 0)[0]
93 | anchors = good_ans_flat[anchor_inds_flat]
94 | bbox_targets = gt_boxes[anchor_to_gtbox[anchor_inds_flat]]
95 | labels = labels[anchor_inds_flat]
96 |
97 | assert np.all(labels >= 0)
98 |
99 |
100 | # Anchors: [num_used, 4]
101 | # Anchor_inds: [num_used, 3] (h, w, A)
102 | # bbox_targets: [num_used, 4]
103 | # labels: [num_used]
104 |
105 | return anchors, anchor_inds, bbox_targets, labels
106 |
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/box_intersections_cpu/bbox.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Sergey Karayev
6 | # --------------------------------------------------------
7 |
8 | cimport cython
9 | import numpy as np
10 | cimport numpy as np
11 |
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 |
15 | def bbox_overlaps(boxes, query_boxes):
16 | cdef np.ndarray[DTYPE_t, ndim=2] boxes_contig = np.ascontiguousarray(boxes, dtype=DTYPE)
17 | cdef np.ndarray[DTYPE_t, ndim=2] query_contig = np.ascontiguousarray(query_boxes, dtype=DTYPE)
18 |
19 | return bbox_overlaps_c(boxes_contig, query_contig)
20 |
21 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_overlaps_c(
22 | np.ndarray[DTYPE_t, ndim=2] boxes,
23 | np.ndarray[DTYPE_t, ndim=2] query_boxes):
24 | """
25 | Parameters
26 | ----------
27 | boxes: (N, 4) ndarray of float
28 | query_boxes: (K, 4) ndarray of float
29 | Returns
30 | -------
31 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes
32 | """
33 | cdef unsigned int N = boxes.shape[0]
34 | cdef unsigned int K = query_boxes.shape[0]
35 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
36 | cdef DTYPE_t iw, ih, box_area
37 | cdef DTYPE_t ua
38 | cdef unsigned int k, n
39 | for k in range(K):
40 | box_area = (
41 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
42 | (query_boxes[k, 3] - query_boxes[k, 1] + 1)
43 | )
44 | for n in range(N):
45 | iw = (
46 | min(boxes[n, 2], query_boxes[k, 2]) -
47 | max(boxes[n, 0], query_boxes[k, 0]) + 1
48 | )
49 | if iw > 0:
50 | ih = (
51 | min(boxes[n, 3], query_boxes[k, 3]) -
52 | max(boxes[n, 1], query_boxes[k, 1]) + 1
53 | )
54 | if ih > 0:
55 | ua = float(
56 | (boxes[n, 2] - boxes[n, 0] + 1) *
57 | (boxes[n, 3] - boxes[n, 1] + 1) +
58 | box_area - iw * ih
59 | )
60 | overlaps[n, k] = iw * ih / ua
61 | return overlaps
62 |
63 |
64 | def bbox_intersections(boxes, query_boxes):
65 | cdef np.ndarray[DTYPE_t, ndim=2] boxes_contig = np.ascontiguousarray(boxes, dtype=DTYPE)
66 | cdef np.ndarray[DTYPE_t, ndim=2] query_contig = np.ascontiguousarray(query_boxes, dtype=DTYPE)
67 |
68 | return bbox_intersections_c(boxes_contig, query_contig)
69 |
70 |
71 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_intersections_c(
72 | np.ndarray[DTYPE_t, ndim=2] boxes,
73 | np.ndarray[DTYPE_t, ndim=2] query_boxes):
74 | """
75 | For each query box compute the intersection ratio covered by boxes
76 | ----------
77 | Parameters
78 | ----------
79 | boxes: (N, 4) ndarray of float
80 | query_boxes: (K, 4) ndarray of float
81 | Returns
82 | -------
83 | overlaps: (N, K) ndarray of intersec between boxes and query_boxes
84 | """
85 | cdef unsigned int N = boxes.shape[0]
86 | cdef unsigned int K = query_boxes.shape[0]
87 | cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
88 | cdef DTYPE_t iw, ih, box_area
89 | cdef DTYPE_t ua
90 | cdef unsigned int k, n
91 | for k in range(K):
92 | box_area = (
93 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
94 | (query_boxes[k, 3] - query_boxes[k, 1] + 1)
95 | )
96 | for n in range(N):
97 | iw = (
98 | min(boxes[n, 2], query_boxes[k, 2]) -
99 | max(boxes[n, 0], query_boxes[k, 0]) + 1
100 | )
101 | if iw > 0:
102 | ih = (
103 | min(boxes[n, 3], query_boxes[k, 3]) -
104 | max(boxes[n, 1], query_boxes[k, 1]) + 1
105 | )
106 | if ih > 0:
107 | intersec[n, k] = iw * ih / box_area
108 | return intersec
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/box_intersections_cpu/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from Cython.Build import cythonize
3 | import numpy
4 |
5 | setup(name="bbox_cython", ext_modules=cythonize('bbox.pyx'), include_dirs=[numpy.get_include()])
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/box_utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | from torch.nn import functional as F
4 | from lib.fpn.box_intersections_cpu.bbox import bbox_overlaps as bbox_overlaps_np
5 | from lib.fpn.box_intersections_cpu.bbox import bbox_intersections as bbox_intersections_np
6 |
7 |
8 | def bbox_loss(prior_boxes, deltas, gt_boxes, eps=1e-4, scale_before=1):
9 | """
10 | Computes the loss for predicting the GT boxes from prior boxes
11 | :param prior_boxes: [num_boxes, 4] (x1, y1, x2, y2)
12 | :param deltas: [num_boxes, 4] (tx, ty, th, tw)
13 | :param gt_boxes: [num_boxes, 4] (x1, y1, x2, y2)
14 | :return:
15 | """
16 | prior_centers = center_size(prior_boxes) #(cx, cy, w, h)
17 | gt_centers = center_size(gt_boxes) #(cx, cy, w, h)
18 |
19 | center_targets = (gt_centers[:, :2] - prior_centers[:, :2]) / prior_centers[:, 2:]
20 | size_targets = torch.log(gt_centers[:, 2:]) - torch.log(prior_centers[:, 2:])
21 | all_targets = torch.cat((center_targets, size_targets), 1)
22 |
23 | loss = F.smooth_l1_loss(deltas, all_targets, size_average=False)/(eps + prior_centers.size(0))
24 |
25 | return loss
26 |
27 |
28 | def bbox_preds(boxes, deltas):
29 | """
30 | Converts "deltas" (predicted by the network) along with prior boxes
31 | into (x1, y1, x2, y2) representation.
32 | :param boxes: Prior boxes, represented as (x1, y1, x2, y2)
33 | :param deltas: Offsets (tx, ty, tw, th)
34 | :param box_strides [num_boxes,] distance apart between boxes. anchor box can't go more than
35 | \pm box_strides/2 from its current position. If None then we'll use the widths
36 | and heights
37 | :return: Transformed boxes
38 | """
39 |
40 | if boxes.size(0) == 0:
41 | return boxes
42 | prior_centers = center_size(boxes)
43 |
44 | xys = prior_centers[:, :2] + prior_centers[:, 2:] * deltas[:, :2]
45 |
46 | whs = torch.exp(deltas[:, 2:]) * prior_centers[:, 2:]
47 |
48 | return point_form(torch.cat((xys, whs), 1))
49 |
50 |
51 | def center_size(boxes):
52 | """ Convert prior_boxes to (cx, cy, w, h)
53 | representation for comparison to center-size form ground truth data.
54 | Args:
55 | boxes: (tensor) point_form boxes
56 | Return:
57 | boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
58 | """
59 | wh = boxes[:, 2:] - boxes[:, :2] + 1.0
60 |
61 | if isinstance(boxes, np.ndarray):
62 | return np.column_stack((boxes[:, :2] + 0.5 * wh, wh))
63 | return torch.cat((boxes[:, :2] + 0.5 * wh, wh), 1)
64 |
65 |
66 | def point_form(boxes):
67 | """ Convert prior_boxes to (xmin, ymin, xmax, ymax)
68 | representation for comparison to point form ground truth data.
69 | Args:
70 | boxes: (tensor) center-size default boxes from priorbox layers.
71 | Return:
72 | boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
73 | """
74 | if isinstance(boxes, np.ndarray):
75 | return np.column_stack((boxes[:, :2] - 0.5 * boxes[:, 2:],
76 | boxes[:, :2] + 0.5 * (boxes[:, 2:] - 2.0)))
77 | return torch.cat((boxes[:, :2] - 0.5 * boxes[:, 2:],
78 | boxes[:, :2] + 0.5 * (boxes[:, 2:] - 2.0)), 1) # xmax, ymax
79 |
80 |
81 | ###########################################################################
82 | ### Torch Utils, creds to Max de Groot
83 | ###########################################################################
84 |
85 | def bbox_intersections(box_a, box_b):
86 | """ We resize both tensors to [A,B,2] without new malloc:
87 | [A,2] -> [A,1,2] -> [A,B,2]
88 | [B,2] -> [1,B,2] -> [A,B,2]
89 | Then we compute the area of intersect between box_a and box_b.
90 | Args:
91 | box_a: (tensor) bounding boxes, Shape: [A,4].
92 | box_b: (tensor) bounding boxes, Shape: [B,4].
93 | Return:
94 | (tensor) intersection area, Shape: [A,B].
95 | """
96 | if isinstance(box_a, np.ndarray):
97 | assert isinstance(box_b, np.ndarray)
98 | return bbox_intersections_np(box_a, box_b)
99 | A = box_a.size(0)
100 | B = box_b.size(0)
101 | max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
102 | box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
103 | min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
104 | box_b[:, :2].unsqueeze(0).expand(A, B, 2))
105 | inter = torch.clamp((max_xy - min_xy + 1.0), min=0)
106 | return inter[:, :, 0] * inter[:, :, 1]
107 |
108 |
109 | def bbox_overlaps(box_a, box_b):
110 | """Compute the jaccard overlap of two sets of boxes. The jaccard overlap
111 | is simply the intersection over union of two boxes. Here we operate on
112 | ground truth boxes and default boxes.
113 | E.g.:
114 | A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
115 | Args:
116 | box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
117 | box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
118 | Return:
119 | jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
120 | """
121 | if isinstance(box_a, np.ndarray):
122 | assert isinstance(box_b, np.ndarray)
123 | return bbox_overlaps_np(box_a, box_b)
124 |
125 | inter = bbox_intersections(box_a, box_b)
126 | area_a = ((box_a[:, 2] - box_a[:, 0] + 1.0) *
127 | (box_a[:, 3] - box_a[:, 1] + 1.0)).unsqueeze(1).expand_as(inter) # [A,B]
128 | area_b = ((box_b[:, 2] - box_b[:, 0] + 1.0) *
129 | (box_b[:, 3] - box_b[:, 1] + 1.0)).unsqueeze(0).expand_as(inter) # [A,B]
130 | union = area_a + area_b - inter
131 | return inter / union # [A,B]
132 |
133 |
134 | def nms_overlaps(boxes):
135 | """ get overlaps for each channel"""
136 | assert boxes.dim() == 3
137 | N = boxes.size(0)
138 | nc = boxes.size(1)
139 | max_xy = torch.min(boxes[:, None, :, 2:].expand(N, N, nc, 2),
140 | boxes[None, :, :, 2:].expand(N, N, nc, 2))
141 |
142 | min_xy = torch.max(boxes[:, None, :, :2].expand(N, N, nc, 2),
143 | boxes[None, :, :, :2].expand(N, N, nc, 2))
144 |
145 | inter = torch.clamp((max_xy - min_xy + 1.0), min=0)
146 |
147 | # n, n, 151
148 | inters = inter[:,:,:,0]*inter[:,:,:,1]
149 | boxes_flat = boxes.view(-1, 4)
150 | areas_flat = (boxes_flat[:,2]- boxes_flat[:,0]+1.0)*(
151 | boxes_flat[:,3]- boxes_flat[:,1]+1.0)
152 | areas = areas_flat.view(boxes.size(0), boxes.size(1))
153 | union = -inters + areas[None] + areas[:, None]
154 | return inters / union
155 |
156 |
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/generate_anchors.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Faster R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick and Sean Bell
6 | # --------------------------------------------------------
7 | from config import IM_SCALE
8 |
9 | import numpy as np
10 |
11 |
12 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
13 | #
14 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
15 | # >> anchors
16 | #
17 | # anchors =
18 | #
19 | # -83 -39 100 56
20 | # -175 -87 192 104
21 | # -359 -183 376 200
22 | # -55 -55 72 72
23 | # -119 -119 136 136
24 | # -247 -247 264 264
25 | # -35 -79 52 96
26 | # -79 -167 96 184
27 | # -167 -343 184 360
28 |
29 | # array([[ -83., -39., 100., 56.],
30 | # [-175., -87., 192., 104.],
31 | # [-359., -183., 376., 200.],
32 | # [ -55., -55., 72., 72.],
33 | # [-119., -119., 136., 136.],
34 | # [-247., -247., 264., 264.],
35 | # [ -35., -79., 52., 96.],
36 | # [ -79., -167., 96., 184.],
37 | # [-167., -343., 184., 360.]])
38 |
39 | def generate_anchors(base_size=16, feat_stride=16, anchor_scales=(8,16,32), anchor_ratios=(0.5,1,2)):
40 | """ A wrapper function to generate anchors given different scales
41 | Also return the number of anchors in variable 'length'
42 | """
43 | anchors = generate_base_anchors(base_size=base_size,
44 | ratios=np.array(anchor_ratios),
45 | scales=np.array(anchor_scales))
46 | A = anchors.shape[0]
47 | shift_x = np.arange(0, IM_SCALE // feat_stride) * feat_stride # Same as shift_x
48 | shift_x, shift_y = np.meshgrid(shift_x, shift_x)
49 |
50 | shifts = np.stack([shift_x, shift_y, shift_x, shift_y], -1) # h, w, 4
51 | all_anchors = shifts[:, :, None] + anchors[None, None] #h, w, A, 4
52 | return all_anchors
53 |
54 | # shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
55 | # K = shifts.shape[0]
56 | # # width changes faster, so here it is H, W, C
57 | # anchors = anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
58 | # anchors = anchors.reshape((K * A, 4)).astype(np.float32, copy=False)
59 | # length = np.int32(anchors.shape[0])
60 |
61 |
62 | def generate_base_anchors(base_size=16, ratios=[0.5, 1, 2], scales=2 ** np.arange(3, 6)):
63 | """
64 | Generate anchor (reference) windows by enumerating aspect ratios X
65 | scales wrt a reference (0, 0, 15, 15) window.
66 | """
67 |
68 | base_anchor = np.array([1, 1, base_size, base_size]) - 1
69 | ratio_anchors = _ratio_enum(base_anchor, ratios)
70 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
71 | for i in range(ratio_anchors.shape[0])])
72 | return anchors
73 |
74 |
75 | def _whctrs(anchor):
76 | """
77 | Return width, height, x center, and y center for an anchor (window).
78 | """
79 |
80 | w = anchor[2] - anchor[0] + 1
81 | h = anchor[3] - anchor[1] + 1
82 | x_ctr = anchor[0] + 0.5 * (w - 1)
83 | y_ctr = anchor[1] + 0.5 * (h - 1)
84 | return w, h, x_ctr, y_ctr
85 |
86 |
87 | def _mkanchors(ws, hs, x_ctr, y_ctr):
88 | """
89 | Given a vector of widths (ws) and heights (hs) around a center
90 | (x_ctr, y_ctr), output a set of anchors (windows).
91 | """
92 |
93 | ws = ws[:, np.newaxis]
94 | hs = hs[:, np.newaxis]
95 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
96 | y_ctr - 0.5 * (hs - 1),
97 | x_ctr + 0.5 * (ws - 1),
98 | y_ctr + 0.5 * (hs - 1)))
99 | return anchors
100 |
101 |
102 | def _ratio_enum(anchor, ratios):
103 | """
104 | Enumerate a set of anchors for each aspect ratio wrt an anchor.
105 | """
106 |
107 | w, h, x_ctr, y_ctr = _whctrs(anchor)
108 | size = w * h
109 | size_ratios = size / ratios
110 | # NOTE: CHANGED TO NOT HAVE ROUNDING
111 | ws = np.sqrt(size_ratios)
112 | hs = ws * ratios
113 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
114 | return anchors
115 |
116 |
117 | def _scale_enum(anchor, scales):
118 | """
119 | Enumerate a set of anchors for each scale wrt an anchor.
120 | """
121 |
122 | w, h, x_ctr, y_ctr = _whctrs(anchor)
123 | ws = w * scales
124 | hs = h * scales
125 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
126 | return anchors
127 |
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | cd anchors
4 | python setup.py build_ext --inplace
5 | cd ..
6 |
7 | cd box_intersections_cpu
8 | python setup.py build_ext --inplace
9 | cd ..
10 |
11 | cd cpu_nms
12 | python build.py
13 | cd ..
14 |
15 | cd roi_align
16 | python build.py -C src/cuda clean
17 | python build.py -C src/cuda clean
18 | cd ..
19 |
20 | echo "Done compiling hopefully"
21 |
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/nms/Makefile:
--------------------------------------------------------------------------------
1 | all: src/cuda/nms.cu.o
2 | python build.py
3 |
4 | src/cuda/nms.cu.o: src/cuda/nms_kernel.cu
5 | $(MAKE) -C src/cuda
6 |
7 | clean:
8 | $(MAKE) -C src/cuda clean
9 |
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/nms/build.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from torch.utils.ffi import create_extension
4 | # Might have to export PATH=/usr/local/cuda-8.0/bin${PATH:+:${PATH}}
5 |
6 | sources = []
7 | headers = []
8 | defines = []
9 | with_cuda = False
10 |
11 | if torch.cuda.is_available():
12 | print('Including CUDA code.')
13 | sources += ['src/nms_cuda.c']
14 | headers += ['src/nms_cuda.h']
15 | defines += [('WITH_CUDA', None)]
16 | with_cuda = True
17 |
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects = ['src/cuda/nms.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 |
23 | ffi = create_extension(
24 | '_ext.nms',
25 | headers=headers,
26 | sources=sources,
27 | define_macros=defines,
28 | relative_to=__file__,
29 | with_cuda=with_cuda,
30 | extra_objects=extra_objects
31 | )
32 |
33 | if __name__ == '__main__':
34 | ffi.build()
35 |
36 |
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/nms/functions/nms.py:
--------------------------------------------------------------------------------
1 | # Le code for doing NMS
2 | import torch
3 | import numpy as np
4 | from .._ext import nms
5 |
6 |
7 | def apply_nms(scores, boxes, pre_nms_topn=12000, post_nms_topn=2000, boxes_per_im=None,
8 | nms_thresh=0.7):
9 | """
10 | Note - this function is non-differentiable so everything is assumed to be a tensor, not
11 | a variable.
12 | """
13 | just_inds = boxes_per_im is None
14 | if boxes_per_im is None:
15 | boxes_per_im = [boxes.size(0)]
16 |
17 |
18 | s = 0
19 | keep = []
20 | im_per = []
21 | for bpi in boxes_per_im:
22 | e = s + int(bpi)
23 | keep_im = _nms_single_im(scores[s:e], boxes[s:e], pre_nms_topn, post_nms_topn, nms_thresh)
24 | keep.append(keep_im + s)
25 | im_per.append(keep_im.size(0))
26 |
27 | s = e
28 |
29 | inds = torch.cat(keep, 0)
30 | if just_inds:
31 | return inds
32 | return inds, im_per
33 |
34 |
35 | def _nms_single_im(scores, boxes, pre_nms_topn=12000, post_nms_topn=2000, nms_thresh=0.7):
36 | keep = torch.IntTensor(scores.size(0))
37 | vs, idx = torch.sort(scores, dim=0, descending=True)
38 | if idx.size(0) > pre_nms_topn:
39 | idx = idx[:pre_nms_topn]
40 | boxes_sorted = boxes[idx].contiguous()
41 | num_out = nms.nms_apply(keep, boxes_sorted, nms_thresh)
42 | num_out = min(num_out, post_nms_topn)
43 | keep = keep[:num_out].long()
44 | keep = idx[keep.cuda(scores.get_device())]
45 | return keep
46 |
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/nms/src/cuda/Makefile:
--------------------------------------------------------------------------------
1 | all: nms_kernel.cu nms_kernel.h
2 | /usr/local/cuda/bin/nvcc -c -o nms.cu.o nms_kernel.cu --compiler-options -fPIC -gencode arch=compute_52,code=sm_52 -gencode arch=compute_61,code=sm_61
3 | # /usr/local/cuda/bin/nvcc -c -o nms.cu.o nms_kernel.cu --compiler-options -fPIC -gencode arch=compute_52,code=sm_52
4 | clean:
5 | rm nms.cu.o
6 |
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/nms/src/cuda/nms_kernel.cu:
--------------------------------------------------------------------------------
1 | // ------------------------------------------------------------------
2 | // Faster R-CNN
3 | // Copyright (c) 2015 Microsoft
4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
5 | // Written by Shaoqing Ren
6 | // ------------------------------------------------------------------
7 |
8 | #include
9 | #include
10 |
11 | #define CUDA_CHECK(condition) \
12 | /* Code block avoids redefinition of cudaError_t error */ \
13 | do { \
14 | cudaError_t error = condition; \
15 | if (error != cudaSuccess) { \
16 | std::cout << cudaGetErrorString(error) << std::endl; \
17 | } \
18 | } while (0)
19 |
20 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
21 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
22 |
23 | __device__ inline float devIoU(float const * const a, float const * const b) {
24 | float left = max(a[0], b[0]), right = min(a[2], b[2]);
25 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
26 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
27 | float interS = width * height;
28 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
29 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
30 | return interS / (Sa + Sb - interS);
31 | }
32 |
33 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
34 | const float *dev_boxes, unsigned long long *dev_mask) {
35 | const int row_start = blockIdx.y;
36 | const int col_start = blockIdx.x;
37 |
38 | // if (row_start > col_start) return;
39 |
40 | const int row_size =
41 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
42 | const int col_size =
43 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
44 |
45 | __shared__ float block_boxes[threadsPerBlock * 5];
46 | if (threadIdx.x < col_size) {
47 | block_boxes[threadIdx.x * 4 + 0] =
48 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 4 + 0];
49 | block_boxes[threadIdx.x * 4 + 1] =
50 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 4 + 1];
51 | block_boxes[threadIdx.x * 4 + 2] =
52 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 4 + 2];
53 | block_boxes[threadIdx.x * 4 + 3] =
54 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 4 + 3];
55 | }
56 | __syncthreads();
57 |
58 | if (threadIdx.x < row_size) {
59 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
60 | const float *cur_box = dev_boxes + cur_box_idx * 4;
61 | int i = 0;
62 | unsigned long long t = 0;
63 | int start = 0;
64 | if (row_start == col_start) {
65 | start = threadIdx.x + 1;
66 | }
67 | for (i = start; i < col_size; i++) {
68 | if (devIoU(cur_box, block_boxes + i * 4) > nms_overlap_thresh) {
69 | t |= 1ULL << i;
70 | }
71 | }
72 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
73 | dev_mask[cur_box_idx * col_blocks + col_start] = t;
74 | }
75 | }
76 |
77 | void _set_device(int device_id) {
78 | int current_device;
79 | CUDA_CHECK(cudaGetDevice(¤t_device));
80 | if (current_device == device_id) {
81 | return;
82 | }
83 | // The call to cudaSetDevice must come before any calls to Get, which
84 | // may perform initialization using the GPU.
85 | CUDA_CHECK(cudaSetDevice(device_id));
86 | }
87 |
88 | extern "C" int ApplyNMSGPU(int* keep_out, const float* boxes_dev, const int boxes_num,
89 | float nms_overlap_thresh, int device_id) {
90 | _set_device(device_id);
91 |
92 | unsigned long long* mask_dev = NULL;
93 |
94 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
95 |
96 | CUDA_CHECK(cudaMalloc(&mask_dev,
97 | boxes_num * col_blocks * sizeof(unsigned long long)));
98 |
99 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
100 | DIVUP(boxes_num, threadsPerBlock));
101 | dim3 threads(threadsPerBlock);
102 | nms_kernel<<>>(boxes_num,
103 | nms_overlap_thresh,
104 | boxes_dev,
105 | mask_dev);
106 |
107 | std::vector mask_host(boxes_num * col_blocks);
108 | CUDA_CHECK(cudaMemcpy(&mask_host[0],
109 | mask_dev,
110 | sizeof(unsigned long long) * boxes_num * col_blocks,
111 | cudaMemcpyDeviceToHost));
112 |
113 | std::vector remv(col_blocks);
114 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
115 |
116 | int num_to_keep = 0;
117 | for (int i = 0; i < boxes_num; i++) {
118 | int nblock = i / threadsPerBlock;
119 | int inblock = i % threadsPerBlock;
120 |
121 | if (!(remv[nblock] & (1ULL << inblock))) {
122 | keep_out[num_to_keep++] = i;
123 | unsigned long long *p = &mask_host[0] + i * col_blocks;
124 | for (int j = nblock; j < col_blocks; j++) {
125 | remv[j] |= p[j];
126 | }
127 | }
128 | }
129 |
130 | CUDA_CHECK(cudaFree(mask_dev));
131 | return num_to_keep;
132 | }
133 |
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/nms/src/cuda/nms_kernel.h:
--------------------------------------------------------------------------------
1 | int ApplyNMSGPU(int* keep_out, const float* boxes_dev, const int boxes_num,
2 | float nms_overlap_thresh, int device_id);
3 |
4 |
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "cuda/nms_kernel.h"
4 |
5 | extern THCState *state;
6 |
7 | int nms_apply(THIntTensor* keep, THCudaTensor* boxes_sorted, const float nms_thresh)
8 | {
9 | int* keep_data = THIntTensor_data(keep);
10 | const float* boxes_sorted_data = THCudaTensor_data(state, boxes_sorted);
11 |
12 | const int boxes_num = THCudaTensor_size(state, boxes_sorted, 0);
13 |
14 | const int devId = THCudaTensor_getDevice(state, boxes_sorted);
15 |
16 | int numTotalKeep = ApplyNMSGPU(keep_data, boxes_sorted_data, boxes_num, nms_thresh, devId);
17 | return numTotalKeep;
18 | }
19 |
20 |
21 |
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | int nms_apply(THIntTensor* keep, THCudaTensor* boxes_sorted, const float nms_thresh);
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/proposal_assignments/proposal_assignments_det.py:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | import numpy.random as npr
4 | from config import BG_THRESH_HI, BG_THRESH_LO, FG_FRACTION, ROIS_PER_IMG
5 | from lib.fpn.box_utils import bbox_overlaps
6 | from lib.pytorch_misc import to_variable
7 | import torch
8 |
9 | #############################################################
10 | # The following is only for object detection
11 | @to_variable
12 | def proposal_assignments_det(rpn_rois, gt_boxes, gt_classes, image_offset, fg_thresh=0.5):
13 | """
14 | Assign object detection proposals to ground-truth targets. Produces proposal
15 | classification labels and bounding-box regression targets.
16 | :param rpn_rois: [img_ind, x1, y1, x2, y2]
17 | :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1
18 | :param gt_classes: [num_boxes, 2] array of [img_ind, class]
19 | :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
20 | :return:
21 | rois: [num_rois, 5]
22 | labels: [num_rois] array of labels
23 | bbox_targets [num_rois, 4] array of targets for the labels.
24 | """
25 | fg_rois_per_image = int(np.round(ROIS_PER_IMG * FG_FRACTION))
26 |
27 | gt_img_inds = gt_classes[:, 0] - image_offset
28 |
29 | all_boxes = torch.cat([rpn_rois[:, 1:], gt_boxes], 0)
30 |
31 | ims_per_box = torch.cat([rpn_rois[:, 0].long(), gt_img_inds], 0)
32 |
33 | im_sorted, idx = torch.sort(ims_per_box, 0)
34 | all_boxes = all_boxes[idx]
35 |
36 | # Assume that the GT boxes are already sorted in terms of image id
37 | num_images = int(im_sorted[-1]) + 1
38 |
39 | labels = []
40 | rois = []
41 | bbox_targets = []
42 | for im_ind in range(num_images):
43 | g_inds = (gt_img_inds == im_ind).nonzero()
44 |
45 | if g_inds.dim() == 0:
46 | continue
47 | g_inds = g_inds.squeeze(1)
48 | g_start = g_inds[0]
49 | g_end = g_inds[-1] + 1
50 |
51 | t_inds = (im_sorted == im_ind).nonzero().squeeze(1)
52 | t_start = t_inds[0]
53 | t_end = t_inds[-1] + 1
54 |
55 | # Max overlaps: for each predicted box, get the max ROI
56 | # Get the indices into the GT boxes too (must offset by the box start)
57 | ious = bbox_overlaps(all_boxes[t_start:t_end], gt_boxes[g_start:g_end])
58 | max_overlaps, gt_assignment = ious.max(1)
59 | max_overlaps = max_overlaps.cpu().numpy()
60 | # print("Best overlap is {}".format(max_overlaps.max()))
61 | # print("\ngt assignment is {} while g_start is {} \n ---".format(gt_assignment, g_start))
62 | gt_assignment += g_start
63 |
64 | keep_inds_np, num_fg = _sel_inds(max_overlaps, fg_thresh, fg_rois_per_image,
65 | ROIS_PER_IMG)
66 |
67 | if keep_inds_np.size == 0:
68 | continue
69 |
70 | keep_inds = torch.LongTensor(keep_inds_np).cuda(rpn_rois.get_device())
71 |
72 | labels_ = gt_classes[:, 1][gt_assignment[keep_inds]]
73 | bbox_target_ = gt_boxes[gt_assignment[keep_inds]]
74 |
75 | # Clamp labels_ for the background RoIs to 0
76 | if num_fg < labels_.size(0):
77 | labels_[num_fg:] = 0
78 |
79 | rois_ = torch.cat((
80 | im_sorted[t_start:t_end, None][keep_inds].float(),
81 | all_boxes[t_start:t_end][keep_inds],
82 | ), 1)
83 |
84 | labels.append(labels_)
85 | rois.append(rois_)
86 | bbox_targets.append(bbox_target_)
87 |
88 | rois = torch.cat(rois, 0)
89 | labels = torch.cat(labels, 0)
90 | bbox_targets = torch.cat(bbox_targets, 0)
91 | return rois, labels, bbox_targets
92 |
93 |
94 | def _sel_inds(max_overlaps, fg_thresh=0.5, fg_rois_per_image=128, rois_per_image=256):
95 | # Select foreground RoIs as those with >= FG_THRESH overlap
96 | fg_inds = np.where(max_overlaps >= fg_thresh)[0]
97 |
98 | # Guard against the case when an image has fewer than fg_rois_per_image
99 | # foreground RoIs
100 | fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.shape[0])
101 | # Sample foreground regions without replacement
102 | if fg_inds.size > 0:
103 | fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
104 |
105 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
106 | bg_inds = np.where((max_overlaps < BG_THRESH_HI) & (max_overlaps >= BG_THRESH_LO))[0]
107 |
108 | # Compute number of background RoIs to take from this image (guarding
109 | # against there being fewer than desired)
110 | bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
111 | bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
112 | # Sample background regions without replacement
113 | if bg_inds.size > 0:
114 | bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
115 |
116 | return np.append(fg_inds, bg_inds), fg_rois_per_this_image
117 |
118 |
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/proposal_assignments/proposal_assignments_gtbox.py:
--------------------------------------------------------------------------------
1 | from lib.pytorch_misc import enumerate_by_image, gather_nd, random_choose
2 | from lib.fpn.box_utils import bbox_preds, center_size, bbox_overlaps
3 | import torch
4 | from lib.pytorch_misc import diagonal_inds, to_variable
5 | from config import RELS_PER_IMG, REL_FG_FRACTION
6 |
7 |
8 | @to_variable
9 | def proposal_assignments_gtbox(rois, gt_boxes, gt_classes, gt_rels, image_offset, fg_thresh=0.5):
10 | """
11 | Assign object detection proposals to ground-truth targets. Produces proposal
12 | classification labels and bounding-box regression targets.
13 | :param rpn_rois: [img_ind, x1, y1, x2, y2]
14 | :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1]. Not needed it seems
15 | :param gt_classes: [num_boxes, 2] array of [img_ind, class]
16 | Note, the img_inds here start at image_offset
17 | :param gt_rels [num_boxes, 4] array of [img_ind, box_0, box_1, rel type].
18 | Note, the img_inds here start at image_offset
19 | :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
20 | :return:
21 | rois: [num_rois, 5]
22 | labels: [num_rois] array of labels
23 | bbox_targets [num_rois, 4] array of targets for the labels.
24 | rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type)
25 | """
26 | im_inds = rois[:,0].long()
27 |
28 | num_im = im_inds[-1] + 1
29 |
30 | # Offset the image indices in fg_rels to refer to absolute indices (not just within img i)
31 | fg_rels = gt_rels.clone()
32 | fg_rels[:,0] -= image_offset
33 | offset = {}
34 | for i, s, e in enumerate_by_image(im_inds):
35 | offset[i] = s
36 | for i, s, e in enumerate_by_image(fg_rels[:, 0]):
37 | fg_rels[s:e, 1:3] += offset[i]
38 |
39 | # Try ALL things, not just intersections.
40 | is_cand = (im_inds[:, None] == im_inds[None])
41 | is_cand.view(-1)[diagonal_inds(is_cand)] = 0
42 |
43 | # # Compute salience
44 | # gt_inds = fg_rels[:, 1:3].contiguous().view(-1)
45 | # labels_arange = labels.data.new(labels.size(0))
46 | # torch.arange(0, labels.size(0), out=labels_arange)
47 | # salience_labels = ((gt_inds[:, None] == labels_arange[None]).long().sum(0) > 0).long()
48 | # labels = torch.stack((labels, salience_labels), 1)
49 |
50 | # Add in some BG labels
51 |
52 | # NOW WE HAVE TO EXCLUDE THE FGs.
53 | # TODO: check if this causes an error if many duplicate GTs havent been filtered out
54 |
55 | is_cand.view(-1)[fg_rels[:,1]*im_inds.size(0) + fg_rels[:,2]] = 0
56 | is_bgcand = is_cand.nonzero()
57 | # TODO: make this sample on a per image case
58 | # If too many then sample
59 | num_fg = min(fg_rels.size(0), int(RELS_PER_IMG * REL_FG_FRACTION * num_im))
60 | if num_fg < fg_rels.size(0):
61 | fg_rels = random_choose(fg_rels, num_fg)
62 |
63 | # If too many then sample
64 | num_bg = min(is_bgcand.size(0) if is_bgcand.dim() > 0 else 0,
65 | int(RELS_PER_IMG * num_im) - num_fg)
66 | if num_bg > 0:
67 | bg_rels = torch.cat((
68 | im_inds[is_bgcand[:, 0]][:, None],
69 | is_bgcand,
70 | (is_bgcand[:, 0, None] < -10).long(),
71 | ), 1)
72 |
73 | if num_bg < is_bgcand.size(0):
74 | bg_rels = random_choose(bg_rels, num_bg)
75 | rel_labels = torch.cat((fg_rels, bg_rels), 0)
76 | else:
77 | rel_labels = fg_rels
78 |
79 |
80 | # last sort by rel.
81 | _, perm = torch.sort(rel_labels[:, 0]*(gt_boxes.size(0)**2) +
82 | rel_labels[:,1]*gt_boxes.size(0) + rel_labels[:,2])
83 |
84 | rel_labels = rel_labels[perm].contiguous()
85 |
86 | labels = gt_classes[:,1].contiguous()
87 | return rois, labels, rel_labels
88 |
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/proposal_assignments/proposal_assignments_postnms.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Goal: assign ROIs to targets
3 | # --------------------------------------------------------
4 |
5 |
6 | import numpy as np
7 | import numpy.random as npr
8 | from .proposal_assignments_rel import _sel_rels
9 | from lib.fpn.box_utils import bbox_overlaps
10 | from lib.pytorch_misc import to_variable
11 | import torch
12 |
13 |
14 | @to_variable
15 | def proposal_assignments_postnms(
16 | rois, gt_boxes, gt_classes, gt_rels, nms_inds, image_offset, fg_thresh=0.5,
17 | max_objs=100, max_rels=100, rand_val=0.01):
18 | """
19 | Assign object detection proposals to ground-truth targets. Produces proposal
20 | classification labels and bounding-box regression targets.
21 | :param rpn_rois: [img_ind, x1, y1, x2, y2]
22 | :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1]
23 | :param gt_classes: [num_boxes, 2] array of [img_ind, class]
24 | :param gt_rels [num_boxes, 4] array of [img_ind, box_0, box_1, rel type]
25 | :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
26 | :return:
27 | rois: [num_rois, 5]
28 | labels: [num_rois] array of labels
29 | rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type)
30 | """
31 | pred_inds_np = rois[:, 0].cpu().numpy().astype(np.int64)
32 | pred_boxes_np = rois[:, 1:].cpu().numpy()
33 | nms_inds_np = nms_inds.cpu().numpy()
34 | sup_inds_np = np.setdiff1d(np.arange(pred_boxes_np.shape[0]), nms_inds_np)
35 |
36 | # split into chosen and suppressed
37 | chosen_inds_np = pred_inds_np[nms_inds_np]
38 | chosen_boxes_np = pred_boxes_np[nms_inds_np]
39 |
40 | suppre_inds_np = pred_inds_np[sup_inds_np]
41 | suppre_boxes_np = pred_boxes_np[sup_inds_np]
42 |
43 | gt_boxes_np = gt_boxes.cpu().numpy()
44 | gt_classes_np = gt_classes.cpu().numpy()
45 | gt_rels_np = gt_rels.cpu().numpy()
46 |
47 | gt_classes_np[:, 0] -= image_offset
48 | gt_rels_np[:, 0] -= image_offset
49 |
50 | num_im = gt_classes_np[:, 0].max()+1
51 |
52 | rois = []
53 | obj_labels = []
54 | rel_labels = []
55 | num_box_seen = 0
56 |
57 | for im_ind in range(num_im):
58 | chosen_ind = np.where(chosen_inds_np == im_ind)[0]
59 | suppre_ind = np.where(suppre_inds_np == im_ind)[0]
60 |
61 | gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0]
62 | gt_boxes_i = gt_boxes_np[gt_ind]
63 | gt_classes_i = gt_classes_np[gt_ind, 1]
64 | gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:]
65 |
66 | # Get IOUs between chosen and GT boxes and if needed we'll add more in
67 |
68 | chosen_boxes_i = chosen_boxes_np[chosen_ind]
69 | suppre_boxes_i = suppre_boxes_np[suppre_ind]
70 |
71 | n_chosen = chosen_boxes_i.shape[0]
72 | n_suppre = suppre_boxes_i.shape[0]
73 | n_gt_box = gt_boxes_i.shape[0]
74 |
75 | # add a teensy bit of random noise because some GT boxes might be duplicated, etc.
76 | pred_boxes_i = np.concatenate((chosen_boxes_i, suppre_boxes_i, gt_boxes_i), 0)
77 | ious = bbox_overlaps(pred_boxes_i, gt_boxes_i) + rand_val*(
78 | np.random.rand(pred_boxes_i.shape[0], gt_boxes_i.shape[0])-0.5)
79 |
80 | # Let's say that a box can only be assigned ONCE for now because we've already done
81 | # the NMS and stuff.
82 | is_hit = ious > fg_thresh
83 |
84 | obj_assignments_i = is_hit.argmax(1)
85 | obj_assignments_i[~is_hit.any(1)] = -1
86 |
87 | vals, first_occurance_ind = np.unique(obj_assignments_i, return_index=True)
88 | obj_assignments_i[np.setdiff1d(
89 | np.arange(obj_assignments_i.shape[0]), first_occurance_ind)] = -1
90 |
91 | extra_to_add = np.where(obj_assignments_i[n_chosen:] != -1)[0] + n_chosen
92 |
93 | # Add them in somewhere at random
94 | num_inds_to_have = min(max_objs, n_chosen + extra_to_add.shape[0])
95 | boxes_i = np.zeros((num_inds_to_have, 4), dtype=np.float32)
96 | labels_i = np.zeros(num_inds_to_have, dtype=np.int64)
97 |
98 | inds_from_nms = np.sort(np.random.choice(num_inds_to_have, size=n_chosen, replace=False))
99 | inds_from_elsewhere = np.setdiff1d(np.arange(num_inds_to_have), inds_from_nms)
100 |
101 | boxes_i[inds_from_nms] = chosen_boxes_i
102 | labels_i[inds_from_nms] = gt_classes_i[obj_assignments_i[:n_chosen]]
103 |
104 | boxes_i[inds_from_elsewhere] = pred_boxes_i[extra_to_add]
105 | labels_i[inds_from_elsewhere] = gt_classes_i[obj_assignments_i[extra_to_add]]
106 |
107 | # Now, we do the relationships. same as for rle
108 | all_rels_i = _sel_rels(bbox_overlaps(boxes_i, gt_boxes_i),
109 | boxes_i,
110 | labels_i,
111 | gt_classes_i,
112 | gt_rels_i,
113 | fg_thresh=fg_thresh,
114 | fg_rels_per_image=100)
115 | all_rels_i[:,0:2] += num_box_seen
116 |
117 | rois.append(np.column_stack((
118 | im_ind * np.ones(boxes_i.shape[0], dtype=np.float32),
119 | boxes_i,
120 | )))
121 | obj_labels.append(labels_i)
122 | rel_labels.append(np.column_stack((
123 | im_ind*np.ones(all_rels_i.shape[0], dtype=np.int64),
124 | all_rels_i,
125 | )))
126 | num_box_seen += boxes_i.size
127 |
128 | rois = torch.FloatTensor(np.concatenate(rois, 0)).cuda(gt_boxes.get_device(), async=True)
129 | labels = torch.LongTensor(np.concatenate(obj_labels, 0)).cuda(gt_boxes.get_device(), async=True)
130 | rel_labels = torch.LongTensor(np.concatenate(rel_labels, 0)).cuda(gt_boxes.get_device(),
131 | async=True)
132 |
133 | return rois, labels, rel_labels
134 |
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/proposal_assignments/rel_assignments.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Goal: assign ROIs to targets
3 | # --------------------------------------------------------
4 |
5 |
6 | import numpy as np
7 | import numpy.random as npr
8 | from config import BG_THRESH_HI, BG_THRESH_LO, REL_FG_FRACTION, RELS_PER_IMG_REFINE
9 | from lib.fpn.box_utils import bbox_overlaps
10 | from lib.pytorch_misc import to_variable, nonintersecting_2d_inds
11 | from collections import defaultdict
12 | import torch
13 |
14 | @to_variable
15 | def rel_assignments(im_inds, rpn_rois, roi_gtlabels, gt_boxes, gt_classes, gt_rels, image_offset,
16 | fg_thresh=0.5, num_sample_per_gt=4, filter_non_overlap=True):
17 | """
18 | Assign object detection proposals to ground-truth targets. Produces proposal
19 | classification labels and bounding-box regression targets.
20 | :param rpn_rois: [img_ind, x1, y1, x2, y2]
21 | :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1]
22 | :param gt_classes: [num_boxes, 2] array of [img_ind, class]
23 | :param gt_rels [num_boxes, 4] array of [img_ind, box_0, box_1, rel type]
24 | :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
25 | :return:
26 | rois: [num_rois, 5]
27 | labels: [num_rois] array of labels
28 | bbox_targets [num_rois, 4] array of targets for the labels.
29 | rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type)
30 | """
31 | fg_rels_per_image = int(np.round(REL_FG_FRACTION * 64))
32 |
33 | pred_inds_np = im_inds.cpu().numpy()
34 | pred_boxes_np = rpn_rois.cpu().numpy()
35 | pred_boxlabels_np = roi_gtlabels.cpu().numpy()
36 | gt_boxes_np = gt_boxes.cpu().numpy()
37 | gt_classes_np = gt_classes.cpu().numpy()
38 | gt_rels_np = gt_rels.cpu().numpy()
39 |
40 | gt_classes_np[:, 0] -= image_offset
41 | gt_rels_np[:, 0] -= image_offset
42 |
43 | num_im = gt_classes_np[:, 0].max()+1
44 |
45 | # print("Pred inds {} pred boxes {} pred box labels {} gt classes {} gt rels {}".format(
46 | # pred_inds_np, pred_boxes_np, pred_boxlabels_np, gt_classes_np, gt_rels_np
47 | # ))
48 |
49 | rel_labels = []
50 | num_box_seen = 0
51 | for im_ind in range(num_im):
52 | pred_ind = np.where(pred_inds_np == im_ind)[0]
53 |
54 | gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0]
55 | gt_boxes_i = gt_boxes_np[gt_ind]
56 | gt_classes_i = gt_classes_np[gt_ind, 1]
57 | gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:]
58 |
59 | # [num_pred, num_gt]
60 | pred_boxes_i = pred_boxes_np[pred_ind]
61 | pred_boxlabels_i = pred_boxlabels_np[pred_ind]
62 |
63 | ious = bbox_overlaps(pred_boxes_i, gt_boxes_i)
64 | is_match = (pred_boxlabels_i[:,None] == gt_classes_i[None]) & (ious >= fg_thresh)
65 |
66 | # FOR BG. Limit ourselves to only IOUs that overlap, but are not the exact same box
67 | pbi_iou = bbox_overlaps(pred_boxes_i, pred_boxes_i)
68 | if filter_non_overlap:
69 | rel_possibilities = (pbi_iou < 1) & (pbi_iou > 0)
70 | rels_intersect = rel_possibilities
71 | else:
72 | rel_possibilities = np.ones((pred_boxes_i.shape[0], pred_boxes_i.shape[0]),
73 | dtype=np.int64) - np.eye(pred_boxes_i.shape[0],
74 | dtype=np.int64)
75 | rels_intersect = (pbi_iou < 1) & (pbi_iou > 0)
76 |
77 | # ONLY select relations between ground truth because otherwise we get useless data
78 | rel_possibilities[pred_boxlabels_i == 0] = 0
79 | rel_possibilities[:, pred_boxlabels_i == 0] = 0
80 |
81 | # Sample the GT relationships.
82 | fg_rels = []
83 | p_size = []
84 | for i, (from_gtind, to_gtind, rel_id) in enumerate(gt_rels_i):
85 | fg_rels_i = []
86 | fg_scores_i = []
87 |
88 | for from_ind in np.where(is_match[:, from_gtind])[0]:
89 | for to_ind in np.where(is_match[:, to_gtind])[0]:
90 | if from_ind != to_ind:
91 | fg_rels_i.append((from_ind, to_ind, rel_id))
92 | fg_scores_i.append((ious[from_ind, from_gtind] * ious[to_ind, to_gtind]))
93 | rel_possibilities[from_ind, to_ind] = 0
94 | if len(fg_rels_i) == 0:
95 | continue
96 | p = np.array(fg_scores_i)
97 | p = p / p.sum()
98 | p_size.append(p.shape[0])
99 | num_to_add = min(p.shape[0], num_sample_per_gt)
100 | for rel_to_add in npr.choice(p.shape[0], p=p, size=num_to_add, replace=False):
101 | fg_rels.append(fg_rels_i[rel_to_add])
102 |
103 | fg_rels = np.array(fg_rels, dtype=np.int64)
104 | if fg_rels.size > 0 and fg_rels.shape[0] > fg_rels_per_image:
105 | fg_rels = fg_rels[npr.choice(fg_rels.shape[0], size=fg_rels_per_image, replace=False)]
106 | elif fg_rels.size == 0:
107 | fg_rels = np.zeros((0, 3), dtype=np.int64)
108 |
109 | bg_rels = np.column_stack(np.where(rel_possibilities))
110 | bg_rels = np.column_stack((bg_rels, np.zeros(bg_rels.shape[0], dtype=np.int64)))
111 |
112 | num_bg_rel = min(64 - fg_rels.shape[0], bg_rels.shape[0])
113 | if bg_rels.size > 0:
114 | # Sample 4x as many intersecting relationships as non-intersecting.
115 | # bg_rels_intersect = rels_intersect[bg_rels[:, 0], bg_rels[:, 1]]
116 | # p = bg_rels_intersect.astype(np.float32)
117 | # p[bg_rels_intersect == 0] = 0.2
118 | # p[bg_rels_intersect == 1] = 0.8
119 | # p /= p.sum()
120 | bg_rels = bg_rels[
121 | np.random.choice(bg_rels.shape[0],
122 | #p=p,
123 | size=num_bg_rel, replace=False)]
124 | else:
125 | bg_rels = np.zeros((0, 3), dtype=np.int64)
126 |
127 | if fg_rels.size == 0 and bg_rels.size == 0:
128 | # Just put something here
129 | bg_rels = np.array([[0, 0, 0]], dtype=np.int64)
130 |
131 | # print("GTR {} -> AR {} vs {}".format(gt_rels.shape, fg_rels.shape, bg_rels.shape))
132 | all_rels_i = np.concatenate((fg_rels, bg_rels), 0)
133 | all_rels_i[:,0:2] += num_box_seen
134 |
135 | all_rels_i = all_rels_i[np.lexsort((all_rels_i[:,1], all_rels_i[:,0]))]
136 |
137 | rel_labels.append(np.column_stack((
138 | im_ind*np.ones(all_rels_i.shape[0], dtype=np.int64),
139 | all_rels_i,
140 | )))
141 |
142 | num_box_seen += pred_boxes_i.shape[0]
143 | rel_labels = torch.LongTensor(
144 | np.concatenate(rel_labels, 0)).cuda(rpn_rois.get_device(), async=True
145 | )
146 | return rel_labels
147 |
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/roi_align/Makefile:
--------------------------------------------------------------------------------
1 | all: src/cuda/roi_align.cu.o
2 | python build.py
3 |
4 | src/cuda/roi_align.cu.o: src/cuda/roi_align_kernel.cu
5 | $(MAKE) -C src/cuda
6 |
7 | clean:
8 | $(MAKE) -C src/cuda clean
9 |
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/roi_align/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/lib/fpn/roi_align/__init__.py
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/roi_align/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/lib/fpn/roi_align/_ext/__init__.py
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/roi_align/_ext/roi_align/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._roi_align import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | if callable(fn):
10 | locals[symbol] = _wrap_function(fn, _ffi)
11 | else:
12 | locals[symbol] = fn
13 | __all__.append(symbol)
14 |
15 | _import_symbols(locals())
16 |
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/roi_align/build.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from torch.utils.ffi import create_extension
4 | # Might have to export PATH=/usr/local/cuda-8.0/bin${PATH:+:${PATH}}
5 |
6 | # sources = ['src/roi_align.c']
7 | # headers = ['src/roi_align.h']
8 | sources = []
9 | headers = []
10 | defines = []
11 | with_cuda = False
12 |
13 | if torch.cuda.is_available():
14 | print('Including CUDA code.')
15 | sources += ['src/roi_align_cuda.c']
16 | headers += ['src/roi_align_cuda.h']
17 | defines += [('WITH_CUDA', None)]
18 | with_cuda = True
19 |
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/cuda/roi_align.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 |
25 | ffi = create_extension(
26 | '_ext.roi_align',
27 | headers=headers,
28 | sources=sources,
29 | define_macros=defines,
30 | relative_to=__file__,
31 | with_cuda=with_cuda,
32 | extra_objects=extra_objects
33 | )
34 |
35 | if __name__ == '__main__':
36 | ffi.build()
37 |
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/roi_align/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/lib/fpn/roi_align/functions/__init__.py
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/roi_align/functions/roi_align.py:
--------------------------------------------------------------------------------
1 | """
2 | performs ROI aligning
3 | """
4 |
5 | import torch
6 | from torch.autograd import Function
7 | from .._ext import roi_align
8 |
9 | class RoIAlignFunction(Function):
10 | def __init__(self, aligned_height, aligned_width, spatial_scale):
11 | self.aligned_width = int(aligned_width)
12 | self.aligned_height = int(aligned_height)
13 | self.spatial_scale = float(spatial_scale)
14 |
15 | self.feature_size = None
16 |
17 | def forward(self, features, rois):
18 | self.save_for_backward(rois)
19 |
20 | rois_normalized = rois.clone()
21 |
22 | self.feature_size = features.size()
23 | batch_size, num_channels, data_height, data_width = self.feature_size
24 |
25 | height = (data_height -1) / self.spatial_scale
26 | width = (data_width - 1) / self.spatial_scale
27 |
28 | rois_normalized[:,1] /= width
29 | rois_normalized[:,2] /= height
30 | rois_normalized[:,3] /= width
31 | rois_normalized[:,4] /= height
32 |
33 |
34 | num_rois = rois.size(0)
35 |
36 | output = features.new(num_rois, num_channels, self.aligned_height,
37 | self.aligned_width).zero_()
38 |
39 | if features.is_cuda:
40 | res = roi_align.roi_align_forward_cuda(self.aligned_height,
41 | self.aligned_width,
42 | self.spatial_scale, features,
43 | rois_normalized, output)
44 | assert res == 1
45 | else:
46 | raise ValueError
47 |
48 | return output
49 |
50 | def backward(self, grad_output):
51 | assert(self.feature_size is not None and grad_output.is_cuda)
52 |
53 | rois = self.saved_tensors[0]
54 |
55 | rois_normalized = rois.clone()
56 |
57 | batch_size, num_channels, data_height, data_width = self.feature_size
58 |
59 | height = (data_height -1) / self.spatial_scale
60 | width = (data_width - 1) / self.spatial_scale
61 |
62 | rois_normalized[:,1] /= width
63 | rois_normalized[:,2] /= height
64 | rois_normalized[:,3] /= width
65 | rois_normalized[:,4] /= height
66 |
67 | grad_input = rois_normalized.new(batch_size, num_channels, data_height,
68 | data_width).zero_()
69 | res = roi_align.roi_align_backward_cuda(self.aligned_height,
70 | self.aligned_width,
71 | self.spatial_scale, grad_output,
72 | rois_normalized, grad_input)
73 | assert res == 1
74 | return grad_input, None
75 |
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/roi_align/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/lib/fpn/roi_align/modules/__init__.py
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/roi_align/modules/roi_align.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from torch.nn.functional import avg_pool2d, max_pool2d
3 | from ..functions.roi_align import RoIAlignFunction
4 |
5 |
6 | class RoIAlign(Module):
7 | def __init__(self, aligned_height, aligned_width, spatial_scale):
8 | super(RoIAlign, self).__init__()
9 |
10 | self.aligned_width = int(aligned_width)
11 | self.aligned_height = int(aligned_height)
12 | self.spatial_scale = float(spatial_scale)
13 |
14 | def forward(self, features, rois):
15 | return RoIAlignFunction(self.aligned_height, self.aligned_width,
16 | self.spatial_scale)(features, rois)
17 |
18 | class RoIAlignAvg(Module):
19 | def __init__(self, aligned_height, aligned_width, spatial_scale):
20 | super(RoIAlignAvg, self).__init__()
21 |
22 | self.aligned_width = int(aligned_width)
23 | self.aligned_height = int(aligned_height)
24 | self.spatial_scale = float(spatial_scale)
25 |
26 | def forward(self, features, rois):
27 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
28 | self.spatial_scale)(features, rois)
29 | return avg_pool2d(x, kernel_size=2, stride=1)
30 |
31 | class RoIAlignMax(Module):
32 | def __init__(self, aligned_height, aligned_width, spatial_scale):
33 | super(RoIAlignMax, self).__init__()
34 |
35 | self.aligned_width = int(aligned_width)
36 | self.aligned_height = int(aligned_height)
37 | self.spatial_scale = float(spatial_scale)
38 |
39 | def forward(self, features, rois):
40 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
41 | self.spatial_scale)(features, rois)
42 | return max_pool2d(x, kernel_size=2, stride=1)
43 |
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/roi_align/src/cuda/Makefile:
--------------------------------------------------------------------------------
1 | all: roi_align_kernel.cu roi_align_kernel.h
2 | /usr/local/cuda/bin/nvcc -c -o roi_align.cu.o roi_align_kernel.cu --compiler-options -fPIC -gencode arch=compute_52,code=sm_52 -gencode arch=compute_61,code=sm_61
3 | # /usr/local/cuda/bin/nvcc -c -o roi_align.cu.o roi_align_kernel.cu --compiler-options -fPIC -gencode arch=compute_52,code=sm_52
4 | clean:
5 | rm roi_align.cu.o
6 |
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/roi_align/src/cuda/roi_align_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _ROI_ALIGN_KERNEL
2 | #define _ROI_ALIGN_KERNEL
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | __global__ void ROIAlignForward(const int nthreads, const float* image_ptr, const float* boxes_ptr, int num_boxes, int batch, int image_height, int image_width, int crop_height,
9 | int crop_width, int depth, float extrapolation_value, float* crops_ptr);
10 |
11 | int ROIAlignForwardLaucher(
12 | const float* image_ptr, const float* boxes_ptr,
13 | int num_boxes, int batch, int image_height, int image_width, int crop_height,
14 | int crop_width, int depth, float extrapolation_value, float* crops_ptr, cudaStream_t stream);
15 |
16 | __global__ void ROIAlignBackward(const int nthreads, const float* grads_ptr,
17 | const float* boxes_ptr, int num_boxes, int batch, int image_height,
18 | int image_width, int crop_height, int crop_width, int depth,
19 | float* grads_image_ptr);
20 |
21 | int ROIAlignBackwardLaucher(const float* grads_ptr, const float* boxes_ptr, int num_boxes,
22 | int batch, int image_height, int image_width, int crop_height,
23 | int crop_width, int depth, float* grads_image_ptr, cudaStream_t stream);
24 |
25 | #ifdef __cplusplus
26 | }
27 | #endif
28 |
29 | #endif
30 |
31 |
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/roi_align/src/roi_align_cuda.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "cuda/roi_align_kernel.h"
4 |
5 | extern THCState *state;
6 |
7 | int roi_align_forward_cuda(int crop_height, int crop_width, float spatial_scale,
8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
9 | {
10 | // Grab the input tensor
11 | float * image_ptr = THCudaTensor_data(state, features);
12 | float * boxes_ptr = THCudaTensor_data(state, rois);
13 |
14 | float * crops_ptr = THCudaTensor_data(state, output);
15 |
16 | // Number of ROIs
17 | int num_boxes = THCudaTensor_size(state, rois, 0);
18 | int size_rois = THCudaTensor_size(state, rois, 1);
19 | if (size_rois != 5)
20 | {
21 | return 0;
22 | }
23 |
24 | // batch size
25 | int batch = THCudaTensor_size(state, features, 0);
26 | // data height
27 | int image_height = THCudaTensor_size(state, features, 2);
28 | // data width
29 | int image_width = THCudaTensor_size(state, features, 3);
30 | // Number of channels
31 | int depth = THCudaTensor_size(state, features, 1);
32 |
33 | cudaStream_t stream = THCState_getCurrentStream(state);
34 | float extrapolation_value = 0.0;
35 |
36 | ROIAlignForwardLaucher(
37 | image_ptr, boxes_ptr, num_boxes, batch, image_height, image_width,
38 | crop_height, crop_width, depth, extrapolation_value, crops_ptr,
39 | stream);
40 |
41 | return 1;
42 | }
43 |
44 | int roi_align_backward_cuda(int crop_height, int crop_width, float spatial_scale,
45 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
46 | {
47 | // Grab the input tensor
48 | float * grads_ptr = THCudaTensor_data(state, top_grad);
49 | float * boxes_ptr = THCudaTensor_data(state, rois);
50 |
51 | float * grads_image_ptr = THCudaTensor_data(state, bottom_grad);
52 |
53 | // Number of ROIs
54 | int num_boxes = THCudaTensor_size(state, rois, 0);
55 | int size_rois = THCudaTensor_size(state, rois, 1);
56 | if (size_rois != 5)
57 | {
58 | return 0;
59 | }
60 |
61 | // batch size
62 | int batch = THCudaTensor_size(state, bottom_grad, 0);
63 | // data height
64 | int image_height = THCudaTensor_size(state, bottom_grad, 2);
65 | // data width
66 | int image_width = THCudaTensor_size(state, bottom_grad, 3);
67 | // Number of channels
68 | int depth = THCudaTensor_size(state, bottom_grad, 1);
69 |
70 | cudaStream_t stream = THCState_getCurrentStream(state);
71 |
72 | ROIAlignBackwardLaucher(
73 | grads_ptr, boxes_ptr, num_boxes, batch, image_height, image_width,
74 | crop_height, crop_width, depth, grads_image_ptr, stream);
75 | return 1;
76 | }
77 |
--------------------------------------------------------------------------------
/neural-motifs/lib/fpn/roi_align/src/roi_align_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_align_forward_cuda(int crop_height, int crop_width, float spatial_scale,
2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output);
3 |
4 | int roi_align_backward_cuda(int crop_height, int crop_width, float spatial_scale,
5 | THCudaTensor * top_grad, THCudaTensor * rois,
6 | THCudaTensor * bottom_grad);
7 |
--------------------------------------------------------------------------------
/neural-motifs/lib/get_dataset_counts.py:
--------------------------------------------------------------------------------
1 | """
2 | Get counts of all of the examples in the dataset. Used for creating the baseline
3 | dictionary model
4 | """
5 |
6 | import numpy as np
7 | from dataloaders.visual_genome import VG
8 | from lib.fpn.box_intersections_cpu.bbox import bbox_overlaps
9 | from lib.pytorch_misc import nonintersecting_2d_inds
10 |
11 |
12 | def get_counts(train_data=VG(mode='train', filter_duplicate_rels=False, num_val_im=5000), must_overlap=True):
13 | """
14 | Get counts of all of the relations. Used for modeling directly P(rel | o1, o2)
15 | :param train_data:
16 | :param must_overlap:
17 | :return:
18 | """
19 | fg_matrix = np.zeros((
20 | train_data.num_classes,
21 | train_data.num_classes,
22 | train_data.num_predicates,
23 | ), dtype=np.int64)
24 |
25 | bg_matrix = np.zeros((
26 | train_data.num_classes,
27 | train_data.num_classes,
28 | ), dtype=np.int64)
29 |
30 | for ex_ind in range(len(train_data)):
31 | gt_classes = train_data.gt_classes[ex_ind].copy()
32 | gt_relations = train_data.relationships[ex_ind].copy()
33 | gt_boxes = train_data.gt_boxes[ex_ind].copy()
34 |
35 | # For the foreground, we'll just look at everything
36 | o1o2 = gt_classes[gt_relations[:, :2]]
37 | for (o1, o2), gtr in zip(o1o2, gt_relations[:,2]):
38 | fg_matrix[o1, o2, gtr] += 1
39 |
40 | # For the background, get all of the things that overlap.
41 | o1o2_total = gt_classes[np.array(
42 | box_filter(gt_boxes, must_overlap=must_overlap), dtype=int)]
43 | for (o1, o2) in o1o2_total:
44 | bg_matrix[o1, o2] += 1
45 |
46 | return fg_matrix, bg_matrix
47 |
48 |
49 | def box_filter(boxes, must_overlap=False):
50 | """ Only include boxes that overlap as possible relations.
51 | If no overlapping boxes, use all of them."""
52 | n_cands = boxes.shape[0]
53 |
54 | overlaps = bbox_overlaps(boxes.astype(np.float), boxes.astype(np.float)) > 0
55 | np.fill_diagonal(overlaps, 0)
56 |
57 | all_possib = np.ones_like(overlaps, dtype=np.bool)
58 | np.fill_diagonal(all_possib, 0)
59 |
60 | if must_overlap:
61 | possible_boxes = np.column_stack(np.where(overlaps))
62 |
63 | if possible_boxes.size == 0:
64 | possible_boxes = np.column_stack(np.where(all_possib))
65 | else:
66 | possible_boxes = np.column_stack(np.where(all_possib))
67 | return possible_boxes
68 |
69 | if __name__ == '__main__':
70 | fg, bg = get_counts(must_overlap=False)
71 |
--------------------------------------------------------------------------------
/neural-motifs/lib/get_union_boxes.py:
--------------------------------------------------------------------------------
1 | """
2 | credits to https://github.com/ruotianluo/pytorch-faster-rcnn/blob/master/lib/nets/network.py#L91
3 | """
4 |
5 | import torch
6 | from torch.autograd import Variable
7 | from torch.nn import functional as F
8 | from lib.fpn.roi_align.functions.roi_align import RoIAlignFunction
9 | from lib.draw_rectangles.draw_rectangles import draw_union_boxes
10 | import numpy as np
11 | from torch.nn.modules.module import Module
12 | from torch import nn
13 | from config import BATCHNORM_MOMENTUM
14 |
15 | class UnionBoxesAndFeats(Module):
16 | def __init__(self, pooling_size=7, stride=16, dim=256, concat=False, use_feats=True):
17 | """
18 | :param pooling_size: Pool the union boxes to this dimension
19 | :param stride: pixel spacing in the entire image
20 | :param dim: Dimension of the feats
21 | :param concat: Whether to concat (yes) or add (False) the representations
22 | """
23 | super(UnionBoxesAndFeats, self).__init__()
24 |
25 | self.pooling_size = pooling_size
26 | self.stride = stride
27 |
28 | self.dim = dim
29 | self.use_feats = use_feats
30 |
31 | self.conv = nn.Sequential(
32 | nn.Conv2d(2, dim //2, kernel_size=7, stride=2, padding=3, bias=True),
33 | nn.ReLU(inplace=True),
34 | nn.BatchNorm2d(dim//2, momentum=BATCHNORM_MOMENTUM),
35 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
36 | nn.Conv2d(dim // 2, dim, kernel_size=3, stride=1, padding=1, bias=True),
37 | nn.ReLU(inplace=True),
38 | nn.BatchNorm2d(dim, momentum=BATCHNORM_MOMENTUM),
39 | )
40 | self.concat = concat
41 |
42 | def forward(self, fmap, rois, union_inds):
43 | union_pools = union_boxes(fmap, rois, union_inds, pooling_size=self.pooling_size, stride=self.stride)
44 | if not self.use_feats:
45 | return union_pools.detach()
46 |
47 | pair_rois = torch.cat((rois[:, 1:][union_inds[:, 0]], rois[:, 1:][union_inds[:, 1]]),1).data.cpu().numpy()
48 | # rects_np = get_rect_features(pair_rois, self.pooling_size*2-1) - 0.5
49 | rects_np = draw_union_boxes(pair_rois, self.pooling_size*4-1) - 0.5
50 | rects = Variable(torch.FloatTensor(rects_np).cuda(fmap.get_device()), volatile=fmap.volatile)
51 | if self.concat:
52 | return torch.cat((union_pools, self.conv(rects)), 1)
53 | return union_pools + self.conv(rects)
54 |
55 | # def get_rect_features(roi_pairs, pooling_size):
56 | # rects_np = draw_union_boxes(roi_pairs, pooling_size)
57 | # # add union + intersection
58 | # stuff_to_cat = [
59 | # rects_np.max(1),
60 | # rects_np.min(1),
61 | # np.minimum(1-rects_np[:,0], rects_np[:,1]),
62 | # np.maximum(1-rects_np[:,0], rects_np[:,1]),
63 | # np.minimum(rects_np[:,0], 1-rects_np[:,1]),
64 | # np.maximum(rects_np[:,0], 1-rects_np[:,1]),
65 | # np.minimum(1-rects_np[:,0], 1-rects_np[:,1]),
66 | # np.maximum(1-rects_np[:,0], 1-rects_np[:,1]),
67 | # ]
68 | # rects_np = np.concatenate([rects_np] + [x[:,None] for x in stuff_to_cat], 1)
69 | # return rects_np
70 |
71 |
72 | def union_boxes(fmap, rois, union_inds, pooling_size=14, stride=16):
73 | """
74 | :param fmap: (batch_size, d, IM_SIZE/stride, IM_SIZE/stride)
75 | :param rois: (num_rois, 5) with [im_ind, x1, y1, x2, y2]
76 | :param union_inds: (num_urois, 2) with [roi_ind1, roi_ind2]
77 | :param pooling_size: we'll resize to this
78 | :param stride:
79 | :return:
80 | """
81 | assert union_inds.size(1) == 2
82 | im_inds = rois[:,0][union_inds[:,0]]
83 | assert (im_inds.data == rois.data[:,0][union_inds[:,1]]).sum() == union_inds.size(0)
84 | union_rois = torch.cat((
85 | im_inds[:,None],
86 | torch.min(rois[:, 1:3][union_inds[:, 0]], rois[:, 1:3][union_inds[:, 1]]),
87 | torch.max(rois[:, 3:5][union_inds[:, 0]], rois[:, 3:5][union_inds[:, 1]]),
88 | ),1)
89 |
90 | # (num_rois, d, pooling_size, pooling_size)
91 | union_pools = RoIAlignFunction(pooling_size, pooling_size,
92 | spatial_scale=1/stride)(fmap, union_rois)
93 | return union_pools
94 |
95 |
--------------------------------------------------------------------------------
/neural-motifs/lib/lstm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/lib/lstm/__init__.py
--------------------------------------------------------------------------------
/neural-motifs/lib/lstm/highway_lstm_cuda/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/lib/lstm/highway_lstm_cuda/__init__.py
--------------------------------------------------------------------------------
/neural-motifs/lib/lstm/highway_lstm_cuda/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/lib/lstm/highway_lstm_cuda/_ext/__init__.py
--------------------------------------------------------------------------------
/neural-motifs/lib/lstm/highway_lstm_cuda/_ext/highway_lstm_layer/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._highway_lstm_layer import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | if callable(fn):
10 | locals[symbol] = _wrap_function(fn, _ffi)
11 | else:
12 | locals[symbol] = fn
13 | __all__.append(symbol)
14 |
15 | _import_symbols(locals())
16 |
--------------------------------------------------------------------------------
/neural-motifs/lib/lstm/highway_lstm_cuda/build.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=invalid-name
2 | import os
3 | import torch
4 | from torch.utils.ffi import create_extension
5 |
6 | if not torch.cuda.is_available():
7 | raise Exception('HighwayLSTM can only be compiled with CUDA')
8 |
9 | sources = ['src/highway_lstm_cuda.c']
10 | headers = ['src/highway_lstm_cuda.h']
11 | defines = [('WITH_CUDA', None)]
12 | with_cuda = True
13 |
14 | this_file = os.path.dirname(os.path.realpath(__file__))
15 | extra_objects = ['src/highway_lstm_kernel.cu.o']
16 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
17 |
18 | ffi = create_extension(
19 | '_ext.highway_lstm_layer',
20 | headers=headers,
21 | sources=sources,
22 | define_macros=defines,
23 | relative_to=__file__,
24 | with_cuda=with_cuda,
25 | extra_objects=extra_objects
26 | )
27 |
28 | if __name__ == '__main__':
29 | ffi.build()
30 |
--------------------------------------------------------------------------------
/neural-motifs/lib/lstm/highway_lstm_cuda/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | CUDA_PATH=/usr/local/cuda/
4 |
5 | # Which CUDA capabilities do we want to pre-build for?
6 | # https://developer.nvidia.com/cuda-gpus
7 | # Compute/shader model Cards
8 | # 61 P4, P40, Titan X
9 | # 60 P100
10 | # 52 M40
11 | # 37 K80
12 | # 35 K40, K20
13 | # 30 K10, Grid K520 (AWS G2)
14 |
15 | CUDA_MODELS=(52 61)
16 |
17 | # Nvidia doesn't guarantee binary compatability across GPU versions.
18 | # However, binary compatibility within one GPU generation can be guaranteed
19 | # under certain conditions because they share the basic instruction set.
20 | # This is the case between two GPU versions that do not show functional
21 | # differences at all (for instance when one version is a scaled down version
22 | # of the other), or when one version is functionally included in the other.
23 |
24 | # To fix this problem, we can create a 'fat binary' which generates multiple
25 | # translations of the CUDA source. The most appropriate version is chosen at
26 | # runtime by the CUDA driver. See:
27 | # http://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#gpu-compilation
28 | # http://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#fatbinaries
29 | CUDA_MODEL_TARGETS=""
30 | for i in "${CUDA_MODELS[@]}"
31 | do
32 | CUDA_MODEL_TARGETS+=" -gencode arch=compute_${i},code=sm_${i}"
33 | done
34 |
35 | echo "Building kernel for following target architectures: "
36 | echo $CUDA_MODEL_TARGETS
37 |
38 | cd src
39 | echo "Compiling kernel"
40 | /usr/local/cuda/bin/nvcc -c -o highway_lstm_kernel.cu.o highway_lstm_kernel.cu --compiler-options -fPIC $CUDA_MODEL_TARGETS
41 | cd ../
42 | python build.py
43 |
--------------------------------------------------------------------------------
/neural-motifs/lib/lstm/highway_lstm_cuda/src/highway_lstm_cuda.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include "highway_lstm_kernel.h"
3 |
4 | extern THCState *state;
5 |
6 | int highway_lstm_forward_cuda(int inputSize, int hiddenSize, int miniBatch,
7 | int numLayers, int seqLength,
8 | THCudaTensor *x,
9 | THIntTensor *lengths,
10 | THCudaTensor *h_data,
11 | THCudaTensor *c_data,
12 | THCudaTensor *tmp_i,
13 | THCudaTensor *tmp_h,
14 | THCudaTensor *T,
15 | THCudaTensor *bias,
16 | THCudaTensor *dropout,
17 | THCudaTensor *gates,
18 | int isTraining) {
19 |
20 | float * x_ptr = THCudaTensor_data(state, x);
21 | int * lengths_ptr = THIntTensor_data(lengths);
22 | float * h_data_ptr = THCudaTensor_data(state, h_data);
23 | float * c_data_ptr = THCudaTensor_data(state, c_data);
24 | float * tmp_i_ptr = THCudaTensor_data(state, tmp_i);
25 | float * tmp_h_ptr = THCudaTensor_data(state, tmp_h);
26 | float * T_ptr = THCudaTensor_data(state, T);
27 | float * bias_ptr = THCudaTensor_data(state, bias);
28 | float * dropout_ptr = THCudaTensor_data(state, dropout);
29 | float * gates_ptr;
30 | if (isTraining == 1) {
31 | gates_ptr = THCudaTensor_data(state, gates);
32 | } else {
33 | gates_ptr = NULL;
34 | }
35 |
36 | cudaStream_t stream = THCState_getCurrentStream(state);
37 | cublasHandle_t handle = THCState_getCurrentBlasHandle(state);
38 |
39 | highway_lstm_forward_ongpu(inputSize, hiddenSize, miniBatch, numLayers,
40 | seqLength, x_ptr, lengths_ptr, h_data_ptr, c_data_ptr, tmp_i_ptr,
41 | tmp_h_ptr, T_ptr, bias_ptr, dropout_ptr, gates_ptr,
42 | isTraining, stream, handle);
43 |
44 | return 1;
45 |
46 | }
47 |
48 | int highway_lstm_backward_cuda(int inputSize, int hiddenSize, int miniBatch, int numLayers, int seqLength,
49 | THCudaTensor *out_grad,
50 | THIntTensor *lengths,
51 | THCudaTensor *h_data_grad,
52 | THCudaTensor *c_data_grad,
53 | THCudaTensor *x,
54 | THCudaTensor *h_data,
55 | THCudaTensor *c_data,
56 | THCudaTensor *T,
57 | THCudaTensor *gates_out,
58 | THCudaTensor *dropout_in,
59 | THCudaTensor *h_gates_grad,
60 | THCudaTensor *i_gates_grad,
61 | THCudaTensor *h_out_grad,
62 | THCudaTensor *x_grad,
63 | THCudaTensor *T_grad,
64 | THCudaTensor *bias_grad,
65 | int isTraining,
66 | int do_weight_grad) {
67 |
68 | float * out_grad_ptr = THCudaTensor_data(state, out_grad);
69 | int * lengths_ptr = THIntTensor_data(lengths);
70 | float * h_data_grad_ptr = THCudaTensor_data(state, h_data_grad);
71 | float * c_data_grad_ptr = THCudaTensor_data(state, c_data_grad);
72 | float * x_ptr = THCudaTensor_data(state, x);
73 | float * h_data_ptr = THCudaTensor_data(state, h_data);
74 | float * c_data_ptr = THCudaTensor_data(state, c_data);
75 | float * T_ptr = THCudaTensor_data(state, T);
76 | float * gates_out_ptr = THCudaTensor_data(state, gates_out);
77 | float * dropout_in_ptr = THCudaTensor_data(state, dropout_in);
78 | float * h_gates_grad_ptr = THCudaTensor_data(state, h_gates_grad);
79 | float * i_gates_grad_ptr = THCudaTensor_data(state, i_gates_grad);
80 | float * h_out_grad_ptr = THCudaTensor_data(state, h_out_grad);
81 | float * x_grad_ptr = THCudaTensor_data(state, x_grad);
82 | float * T_grad_ptr = THCudaTensor_data(state, T_grad);
83 | float * bias_grad_ptr = THCudaTensor_data(state, bias_grad);
84 |
85 | cudaStream_t stream = THCState_getCurrentStream(state);
86 | cublasHandle_t handle = THCState_getCurrentBlasHandle(state);
87 |
88 | highway_lstm_backward_ongpu(inputSize, hiddenSize, miniBatch, numLayers,
89 | seqLength, out_grad_ptr, lengths_ptr, h_data_grad_ptr, c_data_grad_ptr,
90 | x_ptr, h_data_ptr, c_data_ptr, T_ptr, gates_out_ptr, dropout_in_ptr,
91 | h_gates_grad_ptr, i_gates_grad_ptr, h_out_grad_ptr,
92 | x_grad_ptr, T_grad_ptr, bias_grad_ptr, isTraining, do_weight_grad,
93 | stream, handle);
94 |
95 | return 1;
96 |
97 | }
98 |
--------------------------------------------------------------------------------
/neural-motifs/lib/lstm/highway_lstm_cuda/src/highway_lstm_cuda.h:
--------------------------------------------------------------------------------
1 | int highway_lstm_forward_cuda(int inputSize, int hiddenSize, int miniBatch, int numLayers, int seqLength,
2 | THCudaTensor *x, THIntTensor *lengths, THCudaTensor *h_data,
3 | THCudaTensor *c_data, THCudaTensor *tmp_i,
4 | THCudaTensor *tmp_h, THCudaTensor *T, THCudaTensor *bias,
5 | THCudaTensor *dropout, THCudaTensor *gates, int isTraining);
6 |
7 | int highway_lstm_backward_cuda(int inputSize, int hiddenSize, int miniBatch,
8 | int numLayers, int seqLength, THCudaTensor *out_grad, THIntTensor *lengths,
9 | THCudaTensor *h_data_grad, THCudaTensor *c_data_grad, THCudaTensor *x,
10 | THCudaTensor *h_data, THCudaTensor *c_data, THCudaTensor *T,
11 | THCudaTensor *gates_out, THCudaTensor *dropout_in,
12 | THCudaTensor *h_gates_grad, THCudaTensor *i_gates_grad,
13 | THCudaTensor *h_out_grad, THCudaTensor *x_grad, THCudaTensor *T_grad,
14 | THCudaTensor *bias_grad, int isTraining, int do_weight_grad);
15 |
--------------------------------------------------------------------------------
/neural-motifs/lib/lstm/highway_lstm_cuda/src/highway_lstm_kernel.h:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #ifdef __cplusplus
4 | extern "C" {
5 | #endif
6 |
7 | void highway_lstm_forward_ongpu(int inputSize, int hiddenSize, int miniBatch, int numLayers, int seqLength, float *x, int *lengths, float*h_data, float *c_data, float *tmp_i, float *tmp_h, float *T, float *bias, float *dropout, float *gates, int is_training, cudaStream_t stream, cublasHandle_t handle);
8 |
9 | void highway_lstm_backward_ongpu(int inputSize, int hiddenSize, int miniBatch, int numLayers, int seqLength, float *out_grad, int *lengths, float *h_data_grad, float *c_data_grad, float *x, float *h_data, float *c_data, float *T, float *gates_out, float *dropout_in, float *h_gates_grad, float *i_gates_grad, float *h_out_grad, float *x_grad, float *T_grad, float *bias_grad, int isTraining, int do_weight_grad, cudaStream_t stream, cublasHandle_t handle);
10 |
11 | #ifdef __cplusplus
12 | }
13 | #endif
14 |
--------------------------------------------------------------------------------
/neural-motifs/lib/resnet.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import math
3 | import torch.utils.model_zoo as model_zoo
4 | from torchvision.models.resnet import model_urls, conv3x3, BasicBlock
5 | from torchvision.models.vgg import vgg16
6 | from config import BATCHNORM_MOMENTUM
7 |
8 | class Bottleneck(nn.Module):
9 | expansion = 4
10 |
11 | def __init__(self, inplanes, planes, stride=1, downsample=None, relu_end=True):
12 | super(Bottleneck, self).__init__()
13 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
14 | self.bn1 = nn.BatchNorm2d(planes, momentum=BATCHNORM_MOMENTUM)
15 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
16 | padding=1, bias=False)
17 | self.bn2 = nn.BatchNorm2d(planes, momentum=BATCHNORM_MOMENTUM)
18 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
19 | self.bn3 = nn.BatchNorm2d(planes * 4, momentum=BATCHNORM_MOMENTUM)
20 | self.relu = nn.ReLU(inplace=True)
21 | self.downsample = downsample
22 | self.stride = stride
23 | self.relu_end = relu_end
24 |
25 | def forward(self, x):
26 | residual = x
27 |
28 | out = self.conv1(x)
29 | out = self.bn1(out)
30 | out = self.relu(out)
31 |
32 | out = self.conv2(out)
33 | out = self.bn2(out)
34 | out = self.relu(out)
35 |
36 | out = self.conv3(out)
37 | out = self.bn3(out)
38 |
39 | if self.downsample is not None:
40 | residual = self.downsample(x)
41 |
42 | out += residual
43 |
44 | if self.relu_end:
45 | out = self.relu(out)
46 | return out
47 |
48 |
49 | class ResNet(nn.Module):
50 |
51 | def __init__(self, block, layers, num_classes=1000):
52 | self.inplanes = 64
53 | super(ResNet, self).__init__()
54 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
55 | bias=False)
56 | self.bn1 = nn.BatchNorm2d(64, momentum=BATCHNORM_MOMENTUM)
57 | self.relu = nn.ReLU(inplace=True)
58 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
59 | self.layer1 = self._make_layer(block, 64, layers[0])
60 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
61 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
62 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1) # HACK
63 | self.avgpool = nn.AvgPool2d(7)
64 | self.fc = nn.Linear(512 * block.expansion, num_classes)
65 |
66 | for m in self.modules():
67 | if isinstance(m, nn.Conv2d):
68 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
69 | m.weight.data.normal_(0, math.sqrt(2. / n))
70 | elif isinstance(m, nn.BatchNorm2d):
71 | m.weight.data.fill_(1)
72 | m.bias.data.zero_()
73 |
74 | def _make_layer(self, block, planes, blocks, stride=1):
75 | downsample = None
76 | if stride != 1 or self.inplanes != planes * block.expansion:
77 | downsample = nn.Sequential(
78 | nn.Conv2d(self.inplanes, planes * block.expansion,
79 | kernel_size=1, stride=stride, bias=False),
80 | nn.BatchNorm2d(planes * block.expansion, momentum=BATCHNORM_MOMENTUM),
81 | )
82 |
83 | layers = []
84 | layers.append(block(self.inplanes, planes, stride, downsample))
85 | self.inplanes = planes * block.expansion
86 | for i in range(1, blocks):
87 | layers.append(block(self.inplanes, planes))
88 |
89 | return nn.Sequential(*layers)
90 |
91 | def forward(self, x):
92 | x = self.conv1(x)
93 | x = self.bn1(x)
94 | x = self.relu(x)
95 | x = self.maxpool(x)
96 |
97 | x = self.layer1(x)
98 | x = self.layer2(x)
99 | x = self.layer3(x)
100 | x = self.layer4(x)
101 |
102 | x = self.avgpool(x)
103 | x = x.view(x.size(0), -1)
104 | x = self.fc(x)
105 |
106 | return x
107 |
108 | def resnet101(pretrained=False, **kwargs):
109 | """Constructs a ResNet-101 model.
110 |
111 | Args:
112 | pretrained (bool): If True, returns a model pre-trained on ImageNet
113 | """
114 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
115 | if pretrained:
116 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
117 | return model
118 |
119 | def resnet_l123():
120 | model = resnet101(pretrained=True)
121 | del model.layer4
122 | del model.avgpool
123 | del model.fc
124 | return model
125 |
126 | def resnet_l4(relu_end=True):
127 | model = resnet101(pretrained=True)
128 | l4 = model.layer4
129 | if not relu_end:
130 | l4[-1].relu_end = False
131 | l4[0].conv2.stride = (1, 1)
132 | l4[0].downsample[0].stride = (1, 1)
133 | return l4
134 |
135 | def vgg_fc(relu_end=True, linear_end=True):
136 | model = vgg16(pretrained=True)
137 | vfc = model.classifier
138 | del vfc._modules['6'] # Get rid of linear layer
139 | del vfc._modules['5'] # Get rid of linear layer
140 | if not relu_end:
141 | del vfc._modules['4'] # Get rid of linear layer
142 | if not linear_end:
143 | del vfc._modules['3']
144 | return vfc
145 |
146 |
147 |
--------------------------------------------------------------------------------
/neural-motifs/lib/sparse_targets.py:
--------------------------------------------------------------------------------
1 | from lib.word_vectors import obj_edge_vectors
2 | import torch.nn as nn
3 | import torch
4 | from torch.autograd import Variable
5 | import numpy as np
6 | from config import DATA_PATH
7 | import os
8 | from lib.get_dataset_counts import get_counts
9 |
10 |
11 | class FrequencyBias(nn.Module):
12 | """
13 | The goal of this is to provide a simplified way of computing
14 | P(predicate | obj1, obj2, img).
15 | """
16 |
17 | def __init__(self, eps=1e-3):
18 | super(FrequencyBias, self).__init__()
19 |
20 | fg_matrix, bg_matrix = get_counts(must_overlap=True)
21 | bg_matrix += 1
22 | fg_matrix[:, :, 0] = bg_matrix
23 |
24 | pred_dist = np.log(fg_matrix / fg_matrix.sum(2)[:, :, None] + eps)
25 |
26 | self.num_objs = pred_dist.shape[0]
27 | pred_dist = torch.FloatTensor(pred_dist).view(-1, pred_dist.shape[2])
28 |
29 | self.obj_baseline = nn.Embedding(pred_dist.size(0), pred_dist.size(1))
30 | self.obj_baseline.weight.data = pred_dist
31 |
32 | def index_with_labels(self, labels):
33 | """
34 | :param labels: [batch_size, 2]
35 | :return:
36 | """
37 | return self.obj_baseline(labels[:, 0] * self.num_objs + labels[:, 1])
38 |
39 | def forward(self, obj_cands0, obj_cands1):
40 | """
41 | :param obj_cands0: [batch_size, 151] prob distibution over cands.
42 | :param obj_cands1: [batch_size, 151] prob distibution over cands.
43 | :return: [batch_size, #predicates] array, which contains potentials for
44 | each possibility
45 | """
46 | # [batch_size, 151, 151] repr of the joint distribution
47 | joint_cands = obj_cands0[:, :, None] * obj_cands1[:, None]
48 |
49 | # [151, 151, 51] of targets per.
50 | baseline = joint_cands.view(joint_cands.size(0), -1) @ self.obj_baseline.weight
51 |
52 | return baseline
53 |
--------------------------------------------------------------------------------
/neural-motifs/lib/surgery.py:
--------------------------------------------------------------------------------
1 | # create predictions from the other stuff
2 | """
3 | Go from proposals + scores to relationships.
4 |
5 | pred-cls: No bbox regression, obj dist is exactly known
6 | sg-cls : No bbox regression
7 | sg-det : Bbox regression
8 |
9 | in all cases we'll return:
10 | boxes, objs, rels, pred_scores
11 |
12 | """
13 |
14 | import numpy as np
15 | import torch
16 | from lib.pytorch_misc import unravel_index
17 | from lib.fpn.box_utils import bbox_overlaps
18 | # from ad3 import factor_graph as fg
19 | from time import time
20 |
21 | def filter_dets(boxes, obj_scores, obj_classes, rel_inds, pred_scores):
22 | """
23 | Filters detections....
24 | :param boxes: [num_box, topk, 4] if bbox regression else [num_box, 4]
25 | :param obj_scores: [num_box] probabilities for the scores
26 | :param obj_classes: [num_box] class labels for the topk
27 | :param rel_inds: [num_rel, 2] TENSOR consisting of (im_ind0, im_ind1)
28 | :param pred_scores: [topk, topk, num_rel, num_predicates]
29 | :param use_nms: True if use NMS to filter dets.
30 | :return: boxes, objs, rels, pred_scores
31 |
32 | """
33 | if boxes.dim() != 2:
34 | raise ValueError("Boxes needs to be [num_box, 4] but its {}".format(boxes.size()))
35 |
36 | num_box = boxes.size(0)
37 | assert obj_scores.size(0) == num_box
38 |
39 | assert obj_classes.size() == obj_scores.size()
40 | num_rel = rel_inds.size(0)
41 | assert rel_inds.size(1) == 2
42 | assert pred_scores.size(0) == num_rel
43 |
44 | obj_scores0 = obj_scores.data[rel_inds[:,0]]
45 | obj_scores1 = obj_scores.data[rel_inds[:,1]]
46 |
47 | pred_scores_max, pred_classes_argmax = pred_scores.data[:,1:].max(1)
48 | pred_classes_argmax = pred_classes_argmax + 1
49 |
50 | rel_scores_argmaxed = pred_scores_max * obj_scores0 * obj_scores1
51 | rel_scores_vs, rel_scores_idx = torch.sort(rel_scores_argmaxed.view(-1), dim=0, descending=True)
52 |
53 | rels = rel_inds[rel_scores_idx].cpu().numpy()
54 | pred_scores_sorted = pred_scores[rel_scores_idx].data.cpu().numpy()
55 | obj_scores_np = obj_scores.data.cpu().numpy()
56 | objs_np = obj_classes.data.cpu().numpy()
57 | boxes_out = boxes.data.cpu().numpy()
58 |
59 | return boxes_out, objs_np, obj_scores_np, rels, pred_scores_sorted
60 |
61 | # def _get_similar_boxes(boxes, obj_classes_topk, nms_thresh=0.3):
62 | # """
63 | # Assuming bg is NOT A LABEL.
64 | # :param boxes: [num_box, topk, 4] if bbox regression else [num_box, 4]
65 | # :param obj_classes: [num_box, topk] class labels
66 | # :return: num_box, topk, num_box, topk array containing similarities.
67 | # """
68 | # topk = obj_classes_topk.size(1)
69 | # num_box = boxes.size(0)
70 | #
71 | # box_flat = boxes.view(-1, 4) if boxes.dim() == 3 else boxes[:, None].expand(
72 | # num_box, topk, 4).contiguous().view(-1, 4)
73 | # jax = bbox_overlaps(box_flat, box_flat).data > nms_thresh
74 | # # Filter out things that are not gonna compete.
75 | # classes_eq = obj_classes_topk.data.view(-1)[:, None] == obj_classes_topk.data.view(-1)[None, :]
76 | # jax &= classes_eq
77 | # boxes_are_similar = jax.view(num_box, topk, num_box, topk)
78 | # return boxes_are_similar.cpu().numpy().astype(np.bool)
79 |
--------------------------------------------------------------------------------
/neural-motifs/lib/word_vectors.py:
--------------------------------------------------------------------------------
1 | """
2 | Adapted from PyTorch's text library.
3 | """
4 |
5 | import array
6 | import os
7 | import zipfile
8 |
9 | import six
10 | import torch
11 | from six.moves.urllib.request import urlretrieve
12 | from tqdm import tqdm
13 |
14 | from config import DATA_PATH
15 |
16 |
17 | def obj_edge_vectors(names, wv_type='glove.6B', wv_dir=DATA_PATH, wv_dim=300):
18 | wv_dict, wv_arr, wv_size = load_word_vectors(wv_dir, wv_type, wv_dim)
19 |
20 | vectors = torch.Tensor(len(names), wv_dim)
21 | vectors.normal_(0,1)
22 |
23 | for i, token in enumerate(names):
24 | wv_index = wv_dict.get(token, None)
25 | if wv_index is not None:
26 | vectors[i] = wv_arr[wv_index]
27 | else:
28 | # Try the longest word (hopefully won't be a preposition
29 | lw_token = sorted(token.split(' '), key=lambda x: len(x), reverse=True)[0]
30 | print("{} -> {} ".format(token, lw_token))
31 | wv_index = wv_dict.get(lw_token, None)
32 | if wv_index is not None:
33 | vectors[i] = wv_arr[wv_index]
34 | else:
35 | print("fail on {}".format(token))
36 |
37 | return vectors
38 |
39 | URL = {
40 | 'glove.42B': 'http://nlp.stanford.edu/data/glove.42B.300d.zip',
41 | 'glove.840B': 'http://nlp.stanford.edu/data/glove.840B.300d.zip',
42 | 'glove.twitter.27B': 'http://nlp.stanford.edu/data/glove.twitter.27B.zip',
43 | 'glove.6B': 'http://nlp.stanford.edu/data/glove.6B.zip',
44 | }
45 |
46 |
47 | def load_word_vectors(root, wv_type, dim):
48 | """Load word vectors from a path, trying .pt, .txt, and .zip extensions."""
49 | if isinstance(dim, int):
50 | dim = str(dim) + 'd'
51 | fname = os.path.join(root, wv_type + '.' + dim)
52 | if os.path.isfile(fname + '.pt'):
53 | fname_pt = fname + '.pt'
54 | print('loading word vectors from', fname_pt)
55 | return torch.load(fname_pt)
56 | if os.path.isfile(fname + '.txt'):
57 | fname_txt = fname + '.txt'
58 | cm = open(fname_txt, 'rb')
59 | cm = [line for line in cm]
60 | elif os.path.basename(wv_type) in URL:
61 | url = URL[wv_type]
62 | print('downloading word vectors from {}'.format(url))
63 | filename = os.path.basename(fname)
64 | if not os.path.exists(root):
65 | os.makedirs(root)
66 | with tqdm(unit='B', unit_scale=True, miniters=1, desc=filename) as t:
67 | fname, _ = urlretrieve(url, fname, reporthook=reporthook(t))
68 | with zipfile.ZipFile(fname, "r") as zf:
69 | print('extracting word vectors into {}'.format(root))
70 | zf.extractall(root)
71 | if not os.path.isfile(fname + '.txt'):
72 | raise RuntimeError('no word vectors of requested dimension found')
73 | return load_word_vectors(root, wv_type, dim)
74 | else:
75 | raise RuntimeError('unable to load word vectors')
76 |
77 | wv_tokens, wv_arr, wv_size = [], array.array('d'), None
78 | if cm is not None:
79 | for line in tqdm(range(len(cm)), desc="loading word vectors from {}".format(fname_txt)):
80 | entries = cm[line].strip().split(b' ')
81 | word, entries = entries[0], entries[1:]
82 | if wv_size is None:
83 | wv_size = len(entries)
84 | try:
85 | if isinstance(word, six.binary_type):
86 | word = word.decode('utf-8')
87 | except:
88 | print('non-UTF8 token', repr(word), 'ignored')
89 | continue
90 | wv_arr.extend(float(x) for x in entries)
91 | wv_tokens.append(word)
92 |
93 | wv_dict = {word: i for i, word in enumerate(wv_tokens)}
94 | wv_arr = torch.Tensor(wv_arr).view(-1, wv_size)
95 | ret = (wv_dict, wv_arr, wv_size)
96 | torch.save(ret, fname + '.pt')
97 | return ret
98 |
99 | def reporthook(t):
100 | """https://github.com/tqdm/tqdm"""
101 | last_b = [0]
102 |
103 | def inner(b=1, bsize=1, tsize=None):
104 | """
105 | b: int, optionala
106 | Number of blocks just transferred [default: 1].
107 | bsize: int, optional
108 | Size of each block (in tqdm units) [default: 1].
109 | tsize: int, optional
110 | Total size (in tqdm units). If [default: None] remains unchanged.
111 | """
112 | if tsize is not None:
113 | t.total = tsize
114 | t.update((b - last_b[0]) * bsize)
115 | last_b[0] = b
116 | return inner
117 |
--------------------------------------------------------------------------------
/neural-motifs/misc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/misc/__init__.py
--------------------------------------------------------------------------------
/neural-motifs/models/eval_rels.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 |
4 | import os, sys
5 | script_dir = os.path.dirname(os.path.realpath(__file__))
6 | sys.path.append('{}/../'.format(script_dir))
7 |
8 | from dataloaders.visual_genome import VGDataLoader, VG
9 | import numpy as np
10 | import torch
11 |
12 | from config import ModelConfig
13 | from lib.pytorch_misc import optimistic_restore
14 | from lib.evaluation.sg_eval import BasicSceneGraphEvaluator
15 | from tqdm import tqdm
16 | from config import BOX_SCALE, IM_SCALE
17 | import dill as pkl
18 | import os
19 |
20 | import sys
21 | from IPython.core import ultratb
22 | sys.excepthook = ultratb.FormattedTB(mode='Verbose',
23 | color_scheme='Linux', call_pdb=1)
24 |
25 | conf = ModelConfig()
26 | if conf.model == 'motifnet':
27 | from lib.rel_model import RelModel
28 | elif conf.model == 'stanford':
29 | from lib.rel_model_stanford import RelModelStanford as RelModel
30 | else:
31 | raise ValueError()
32 |
33 | train, val, test = VG.splits(
34 | num_val_im=conf.val_size, filter_duplicate_rels=True,
35 | use_proposals=conf.use_proposals,
36 | filter_non_overlap=conf.mode == 'sgdet')
37 | if conf.test:
38 | val = test
39 | train_loader, val_loader = VGDataLoader.splits(
40 | train, val, mode='rel',
41 | batch_size=conf.batch_size,
42 | num_workers=conf.num_workers,
43 | num_gpus=conf.num_gpus
44 | )
45 |
46 | detector = RelModel(
47 | classes=train.ind_to_classes, rel_classes=train.ind_to_predicates,
48 | num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True,
49 | use_resnet=conf.use_resnet, order=conf.order,
50 | nl_edge=conf.nl_edge, nl_obj=conf.nl_obj, hidden_dim=conf.hidden_dim,
51 | use_proposals=conf.use_proposals,
52 | pass_in_obj_feats_to_decoder=conf.pass_in_obj_feats_to_decoder,
53 | pass_in_obj_feats_to_edge=conf.pass_in_obj_feats_to_edge,
54 | pooling_dim=conf.pooling_dim,
55 | rec_dropout=conf.rec_dropout,
56 | use_bias=conf.use_bias,
57 | use_tanh=conf.use_tanh,
58 | limit_vision=conf.limit_vision
59 | )
60 |
61 |
62 | detector.cuda()
63 | ckpt = torch.load(conf.ckpt)
64 |
65 | optimistic_restore(detector, ckpt['state_dict'])
66 | # if conf.mode == 'sgdet':
67 | # det_ckpt = torch.load('checkpoints/new_vgdet/vg-19.tar')['state_dict']
68 | # detector.detector.bbox_fc.weight.data.copy_(det_ckpt['bbox_fc.weight'])
69 | # detector.detector.bbox_fc.bias.data.copy_(det_ckpt['bbox_fc.bias'])
70 | # detector.detector.score_fc.weight.data.copy_(det_ckpt['score_fc.weight'])
71 | # detector.detector.score_fc.bias.data.copy_(det_ckpt['score_fc.bias'])
72 |
73 | all_pred_entries = []
74 | def val_batch(batch_num, b, evaluator, thrs=(20, 50, 100)):
75 | result, result_preds = detector[b]
76 | assert conf.num_gpus == 1
77 | # if conf.num_gpus == 1:
78 | # result_preds = [result_preds]
79 |
80 |
81 | for i, (boxes_i, objs_i, obj_scores_i, rels_i, pred_scores_i) in enumerate(result_preds):
82 | gt_entry = {
83 | 'gt_classes': val.gt_classes[batch_num + i].copy(),
84 | 'gt_relations': val.relationships[batch_num + i].copy(),
85 | 'gt_boxes': val.gt_boxes[batch_num + i].copy(),
86 | }
87 | assert np.all(objs_i[rels_i[:,0]] > 0) and np.all(objs_i[rels_i[:,1]] > 0)
88 | # assert np.all(rels_i[:,2] > 0)
89 |
90 | pred_entry = {
91 | 'pred_boxes': boxes_i * BOX_SCALE/IM_SCALE,
92 | 'pred_classes': objs_i,
93 | 'pred_rel_inds': rels_i,
94 | 'obj_scores': obj_scores_i,
95 | 'rel_scores': pred_scores_i,
96 | }
97 | all_pred_entries.append(pred_entry)
98 |
99 | evaluator[conf.mode].evaluate_scene_graph_entry(
100 | gt_entry,
101 | pred_entry,
102 | )
103 |
104 | evaluator = BasicSceneGraphEvaluator.all_modes(multiple_preds=conf.multi_pred)
105 | if conf.cache is not None and os.path.exists(conf.cache):
106 | print("Found {}! Loading from it".format(conf.cache))
107 | with open(conf.cache,'rb') as f:
108 | all_pred_entries = pkl.load(f)
109 | for i, pred_entry in enumerate(tqdm(all_pred_entries)):
110 | gt_entry = {
111 | 'gt_classes': val.gt_classes[i].copy(),
112 | 'gt_relations': val.relationships[i].copy(),
113 | 'gt_boxes': val.gt_boxes[i].copy(),
114 | }
115 | evaluator[conf.mode].evaluate_scene_graph_entry(
116 | gt_entry,
117 | pred_entry,
118 | )
119 | evaluator[conf.mode].print_stats()
120 | else:
121 | detector.eval()
122 | for val_b, batch in enumerate(tqdm(val_loader)):
123 | val_batch(conf.num_gpus*val_b, batch, evaluator)
124 |
125 | evaluator[conf.mode].print_stats()
126 |
127 | if conf.cache is not None:
128 | with open(conf.cache,'wb') as f:
129 | pkl.dump(all_pred_entries, f)
130 |
131 | import pickle as pkl
132 | tag = 'test' if conf.test else 'train'
133 | if conf.multi_pred:
134 | tag += '.multi_pred'
135 | for N in [20, 50, 100]:
136 | pkl.dump(evaluator['predcls'].result_dict['predcls_recall'][N],
137 | open('{}.{}.pkl'.format(tag, N), 'wb'))
138 |
--------------------------------------------------------------------------------
/neural-motifs/scripts/eval_models_sgcls.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # This is a script that will evaluate all models for SGCLS
4 | export CUDA_VISIBLE_DEVICES=$1
5 |
6 | if [ $1 == "0" ]; then
7 | echo "EVALING THE BASELINE"
8 | python models/eval_rels.py -m sgcls -model motifnet -nl_obj 0 -nl_edge 0 -b 6 \
9 | -clip 5 -p 100 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -ckpt checkpoints/baseline-sgcls/vgrel-11.tar \
10 | -nepoch 50 -use_bias -test -cache baseline_sgcls
11 | python models/eval_rels.py -m predcls -model motifnet -nl_obj 0 -nl_edge 0 -b 6 \
12 | -clip 5 -p 100 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -ckpt checkpoints/baseline-sgcls/vgrel-11.tar \
13 | -nepoch 50 -use_bias -test -cache baseline_predcls
14 | elif [ $1 == "1" ]; then
15 | echo "EVALING MESSAGE PASSING"
16 | python models/eval_rels.py -m sgcls -model stanford -b 6 -p 100 -lr 1e-3 -ngpu 1 -clip 5 \
17 | -ckpt checkpoints/stanford-sgcls/vgrel-11.tar -test -cache stanford_sgcls
18 | python models/eval_rels.py -m predcls -model stanford -b 6 -p 100 -lr 1e-3 -ngpu 1 -clip 5 \
19 | -ckpt checkpoints/stanford-sgcls/vgrel-11.tar -test -cache stanford_predcls
20 | elif [ $1 == "2" ]; then
21 | echo "EVALING MOTIFNET"
22 | python models/eval_rels.py -m sgcls -model motifnet -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \
23 | -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -test -ckpt checkpoints/vgrel-motifnet-sgcls.tar -nepoch 50 -use_bias -cache motifnet_sgcls
24 | python models/eval_rels.py -m predcls -model motifnet -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \
25 | -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -test -ckpt checkpoints/vgrel-motifnet-sgcls.tar -nepoch 50 -use_bias -cache motifnet_predcls
26 | fi
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/neural-motifs/scripts/eval_models_sgdet.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # This is a script that will evaluate all the models for SGDET
4 | export CUDA_VISIBLE_DEVICES=$1
5 |
6 | if [ $1 == "0" ]; then
7 | echo "EVALING THE BASELINE"
8 | python models/eval_rels.py -m sgdet -model motifnet -nl_obj 0 -nl_edge 0 -b 6 \
9 | -clip 5 -p 100 -pooling_dim 4096 -ngpu 1 -ckpt checkpoints/baseline-sgdet/vgrel-17.tar \
10 | -nepoch 50 -use_bias -cache baseline_sgdet.pkl -test
11 | elif [ $1 == "1" ]; then
12 | echo "EVALING MESSAGE PASSING"
13 |
14 | python models/eval_rels.py -m sgdet -model stanford -b 6 -p 100 -lr 1e-3 -ngpu 1 -clip 5 \
15 | -ckpt checkpoints/stanford-sgdet/vgrel-18.tar -cache stanford_sgdet.pkl -test
16 | elif [ $1 == "2" ]; then
17 | echo "EVALING MOTIFNET"
18 | python models/eval_rels.py -m sgdet -model motifnet -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \
19 | -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -test -ckpt checkpoints/motifnet-sgdet/vgrel-14.tar -nepoch 50 -cache motifnet_sgdet.pkl -use_bias
20 | fi
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/neural-motifs/scripts/pretrain_detector.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # Train the model without COCO pretraining
3 | python models/train_detector.py -b 6 -lr 1e-3 -save_dir checkpoints/vgdet -nepoch 50 -ngpu 3 -nwork 3 -p 100 -clip 5
4 |
5 | # If you want to evaluate on the frequency baseline now, run this command (replace the checkpoint with the
6 | # best checkpoint you found).
7 | #export CUDA_VISIBLE_DEVICES=0
8 | #python models/eval_rel_count.py -ngpu 1 -b 6 -ckpt checkpoints/vgdet/vg-24.tar -nwork 1 -p 100 -test
9 | #export CUDA_VISIBLE_DEVICES=1
10 | #python models/eval_rel_count.py -ngpu 1 -b 6 -ckpt checkpoints/vgdet/vg-28.tar -nwork 1 -p 100 -test
11 | #export CUDA_VISIBLE_DEVICES=2
12 | #python models/eval_rel_count.py -ngpu 1 -b 6 -ckpt checkpoints/vgdet/vg-28.tar -nwork 1 -p 100 -test
13 | #
14 | #
15 |
--------------------------------------------------------------------------------
/neural-motifs/scripts/refine_for_detection.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Refine Motifnet for detection
4 |
5 |
6 | export CUDA_VISIBLE_DEVICES=$1
7 |
8 | if [ $1 == "0" ]; then
9 | echo "TRAINING THE BASELINE"
10 | python models/train_rels.py -m sgdet -model motifnet -nl_obj 0 -nl_edge 0 -b 6 \
11 | -clip 5 -p 100 -pooling_dim 4096 -lr 1e-4 -ngpu 1 -ckpt checkpoints/baseline-sgcls/vgrel-11.tar -save_dir checkpoints/baseline-sgdet \
12 | -nepoch 50 -use_bias
13 | elif [ $1 == "1" ]; then
14 | echo "TRAINING STANFORD"
15 | python models/train_rels.py -m sgdet -model stanford -b 6 -p 100 -lr 1e-4 -ngpu 1 -clip 5 \
16 | -ckpt checkpoints/stanford-sgcls/vgrel-11.tar -save_dir checkpoints/stanford-sgdet
17 | elif [ $1 == "2" ]; then
18 | echo "Refining Motifnet for detection!"
19 | python models/train_rels.py -m sgdet -model motifnet -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \
20 | -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-4 -ngpu 1 -ckpt checkpoints/motifnet-sgcls/vgrel-7.tar \
21 | -save_dir checkpoints/motifnet-sgdet -nepoch 10 -use_bias
22 | fi
--------------------------------------------------------------------------------
/neural-motifs/scripts/train_models_sgcls.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # This is a script that will train all of the models for scene graph classification and then evaluate them.
4 | # export CUDA_VISIBLE_DEVICES=$1
5 | export CUDA_VISIBLE_DEVICES=1
6 |
7 | if [ $1 == "0" ]; then
8 | echo "TRAINING THE BASELINE"
9 | python models/train_rels.py -m sgcls -model motifnet -nl_obj 0 -nl_edge 0 -b 6 \
10 | -clip 5 -p 100 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -ckpt checkpoints/vgdet/vg-24.tar -save_dir checkpoints/baseline2 \
11 | -nepoch 50 -use_bias
12 | elif [ $1 == "1" ]; then
13 | echo "TRAINING MESSAGE PASSING"
14 |
15 | python models/train_rels.py -m sgcls -model stanford -b 6 -p 100 -lr 1e-3 -ngpu 1 -clip 5 \
16 | -ckpt checkpoints/vgdet/vg-24.tar -save_dir checkpoints/stanford2
17 | elif [ $1 == "2" ]; then
18 | echo "TRAINING MOTIFNET"
19 |
20 | python models/train_rels.py -m sgcls -model motifnet -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \
21 | -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -ckpt checkpoints/vg-faster-rcnn.tar \
22 | -save_dir checkpoints/motifnet2 -nepoch 50 -use_bias
23 | fi
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/neural-motifs/scripts/train_motifnet.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Train Motifnet using different orderings
4 |
5 | export CUDA_VISIBLE_DEVICES=$1
6 |
7 | if [ $1 == "0" ]; then
8 | echo "TRAINING MOTIFNET V1"
9 | python models/train_rels.py -m sgcls -model motifnet -order size -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \
10 | -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -ckpt checkpoints/vgdet/vg-24.tar \
11 | -save_dir checkpoints/motifnet-size-sgcls -nepoch 50 -use_bias
12 | elif [ $1 == "1" ]; then
13 | echo "TRAINING MOTIFNET V2"
14 | python models/train_rels.py -m sgcls -model motifnet -order random -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \
15 | -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -ckpt checkpoints/vgdet/vg-24.tar \
16 | -save_dir checkpoints/motifnet-random-sgcls -nepoch 50 -use_bias
17 | elif [ $1 == "2" ]; then
18 | echo "TRAINING MOTIFNET V3"
19 | python models/train_rels.py -m sgcls -model motifnet -order confidence -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \
20 | -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -ckpt checkpoints/vgdet/vg-24.tar \
21 | -save_dir checkpoints/motifnet-conf-sgcls -nepoch 50 -use_bias
22 | fi
--------------------------------------------------------------------------------
/neural-motifs/scripts/train_predcls.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | cd $(dirname $0)/..
4 |
5 | run_baseline() {
6 | SEED=$1
7 | # No manual seed is set internally, so just give the
8 | # output directory a different name.
9 | python3 models/train_rels.py -m predcls -model motifnet \
10 | -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \
11 | -p 10 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 \
12 | -ngpu 1 -ckpt checkpoints/vg-faster-rcnn.tar \
13 | -save_dir checkpoints/baseline_predcls.$SEED \
14 | -nepoch 30 -use_bias &> /dev/null &
15 | }
16 |
17 | run_lml() {
18 | TOPK=$1
19 | SEED=$2
20 | # No manual seed is set internally, so just give the
21 | # output directory a different name.
22 | python3 models/train_rels.py -m predcls -model motifnet \
23 | -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \
24 | -p 10 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 \
25 | -ngpu 1 -ckpt checkpoints/vg-faster-rcnn.tar \
26 | -save_dir checkpoints/lml_predcls.$TOPK.$SEED \
27 | -nepoch 30 -use_bias -lml_topk $TOPK &> /dev/null &
28 | }
29 |
30 | run_entr() {
31 | TOPK=$1
32 | SEED=$2
33 | # No manual seed is set internally, so just give the
34 | # output directory a different name.
35 | python3 models/train_rels.py -m predcls -model motifnet \
36 | -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \
37 | -p 10 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 \
38 | -ngpu 1 -ckpt checkpoints/vg-faster-rcnn.tar \
39 | -save_dir checkpoints/entr_predcls.$TOPK.$SEED \
40 | -nepoch 30 -use_bias -entr_topk $TOPK &> logs/entr.$TOPK.log &
41 | }
42 |
43 | run_ml() {
44 | SEED=$1
45 | # No manual seed is set internally, so just give the
46 | # output directory a different name.
47 | python3 models/train_rels.py -m predcls -model motifnet \
48 | -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \
49 | -p 10 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 \
50 | -ngpu 1 -ckpt checkpoints/vg-faster-rcnn.tar \
51 | -save_dir checkpoints/ml_predcls.$SEED \
52 | -nepoch 30 -use_bias -ml_loss &> logs/ml.log &
53 | }
54 |
55 | SEED=0
56 |
57 | # export CUDA_VISIBLE_DEVICES=0
58 | # run_baseline $SEED
59 |
60 | # export CUDA_VISIBLE_DEVICES=1
61 | # run_lml 20 $SEED
62 |
63 | # export CUDA_VISIBLE_DEVICES=0
64 | # run_lml 50 $SEED
65 |
66 | # export CUDA_VISIBLE_DEVICES=1
67 | # run_lml 100 $SEED
68 |
69 | # export CUDA_VISIBLE_DEVICES=0
70 | # run_entr 20 $SEED
71 |
72 | # export CUDA_VISIBLE_DEVICES=1
73 | # run_entr 50 $SEED
74 |
75 | # export CUDA_VISIBLE_DEVICES=2
76 | # run_entr 100 $SEED
77 |
78 | export CUDA_VISIBLE_DEVICES=0
79 | run_ml
80 |
81 | wait
82 |
--------------------------------------------------------------------------------
/neural-motifs/scripts/train_stanford.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | python models/train_rels.py -m sgcls -model stanford -b 4 -p 400 -lr 1e-4 -ngpu 1 -ckpt checkpoints/vgdet/vg-24.tar -save_dir checkpoints/stanford -adam
4 |
5 | # To test you can run this command
6 | # python models/eval_rels.py -m sgcls -model stanford -ngpu 1 -ckpt checkpoints/stanford/vgrel-28.tar -test
7 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import find_packages, setup
2 |
3 | setup(
4 | name='lml',
5 | version='0.0.1',
6 | description="The limited multi-label projection layer.",
7 | author='Brandon Amos',
8 | author_email='brandon.amos.cs@gmail.com',
9 | platforms=['any'],
10 | license="MIT",
11 | url='https://github.com/locuslab/lml',
12 | py_modules=['lml'],
13 | install_requires=[
14 | 'numpy>=1<2',
15 | 'semantic_version',
16 | ]
17 | )
18 |
--------------------------------------------------------------------------------
/smooth-topk/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | /site
99 |
100 | # mypy
101 | .mypy_cache/
102 |
103 | # experiments logged
104 | xp/
105 |
--------------------------------------------------------------------------------
/smooth-topk/README.md:
--------------------------------------------------------------------------------
1 | This directory and README are from the
2 | [oval-group/smooth-topk](https://github.com/oval-group/smooth-topk)
3 | repository.
4 | Our modifications remain under the same license.
5 |
6 | ---
7 |
8 | # Smooth Loss Functions for Deep Top-k Classification
9 |
10 | This repository contains the implementation of the paper [Smooth Loss Functions for Deep Top-k Classification](https://openreview.net/forum?id=Hk5elxbRW¬eId=Hk5elxbRW) in pytorch. If you use this work for your research, please cite the paper:
11 |
12 | ```
13 | @Article{berrada2018smooth,
14 | author = {Berrada, Leonard and Zisserman, Andrew and Kumar, M Pawan},
15 | title = {Smooth Loss Functions for Deep Top-k Classification},
16 | journal = {International Conference on Learning Representations},
17 | year = {2018},
18 | }
19 | ```
20 |
21 | The core algorithm to compute the smooth top-k loss function with the elementary symmetric polynomials is contained in the `losses` module. Algorithms for the elementary symmetric polynomials and their gradients are in `losses/polynomial`.
22 |
23 | ## Requirements
24 |
25 | This code has been tested for pytorch 0.3.0. Detailed requirements are available in `requirements.txt`. The code should be compatible with python 2 and 3 (developed in 2.7).
26 |
27 | ## Reproducing the results
28 |
29 | ### CIFAR-100
30 |
31 | To reproduce the experiments with gpu `1`:
32 | * `scripts/cifar100_noise_ce.sh 1`
33 | * `scripts/cifar100_noise_svm.sh 1`
34 |
35 | ### ImageNet
36 |
37 | We use the official validation set of ImageNet as a test set. Therefore we create our own balanced validation set made of 50,000 training images. This can be done with `scripts/imagenet_split.py`.
38 |
39 | To reproduce the experiments:
40 | * `scripts/imagenet_subsets_ce.sh`
41 | * `scripts/imagenet_subsets_svm.sh`
42 |
43 | Warning: these scripts will use all available GPUs. To restrict the devices used, use the environment variable `CUDA_VISIBLE_DEVICES`. For example, to train the SVM models on GPUS `0` and `1`, you can run `CUDA_VISIBLE_DEVICES=0,1 scripts/imagenet_subsets_svm.sh`.
44 |
45 | The performance of the resulting models can then be obtained by executing `python scripts/eval.py`. This script evaluates the performance of the best models and writes the results in a text file.
46 |
47 | ### Algorithms Comparison
48 |
49 | The script `scripts/perf.py` allows to compare the speed and numerical stability of different algorithms, including the standard algorithm to evaluate the Elementary Symmetric Functions (ESF).
50 |
51 | ## Acknowledgments
52 |
53 | The DenseNet implementation is from [densenet-pytorch](https://github.com/andreasveit/densenet-pytorch).
54 |
55 |
--------------------------------------------------------------------------------
/smooth-topk/requirements.txt:
--------------------------------------------------------------------------------
1 | tqdm
2 | torch==0.3.0
3 | future
4 | scipy
5 | numpy
6 | git+git://github.com/oval-group/logger#egg=v0.3
7 | torchvision==0.2.0
8 |
--------------------------------------------------------------------------------
/smooth-topk/src/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/smooth-topk/src/data/__init__.py
--------------------------------------------------------------------------------
/smooth-topk/src/data/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch.utils.data as data
3 | import torchvision.datasets as datasets
4 |
5 | from collections import defaultdict
6 |
7 |
8 | class LabelNoise(data.Dataset):
9 | def __init__(self, dataset, k, n_labels, p=1):
10 |
11 | assert n_labels % k == 0
12 |
13 | self.dataset = dataset
14 | self.k = k
15 | # random label between 0 and k-1
16 | self.noise = np.random.choice(k, size=len(self.dataset))
17 | # noisy labels are introduced for each sample with probability p
18 | self.p = np.random.binomial(1, p, size=len(self.dataset))
19 |
20 | print('Noisy labels (p={})'.format(p))
21 |
22 | def __getitem__(self, idx):
23 | img, label = self.dataset[idx]
24 | if self.p[idx]:
25 | label = label - label % self.k + self.noise[idx]
26 | return img, label
27 |
28 | def __len__(self):
29 | return len(self.dataset)
30 |
31 |
32 | class Subset(data.Dataset):
33 | def __init__(self, dataset, indices=None):
34 | """
35 | Subset of dataset given by indices.
36 | """
37 | super(Subset, self).__init__()
38 | self.dataset = dataset
39 | self.indices = indices
40 |
41 | if self.indices is None:
42 | self.n_samples = len(self.dataset)
43 | else:
44 | self.n_samples = len(self.indices)
45 | assert self.n_samples >= 0 and \
46 | self.n_samples <= len(self.dataset), \
47 | "length of {} incompatible with dataset of size {}"\
48 | .format(self.n_samples, len(self.dataset))
49 |
50 | def __getitem__(self, idx):
51 | if self.indices is None:
52 | return self.dataset[idx]
53 | else:
54 | return self.dataset[self.indices[idx]]
55 |
56 | def __len__(self):
57 | return self.n_samples
58 |
59 |
60 | def random_subsets(subset_sizes, n_total, seed=None, replace=False):
61 | """
62 | Return subsets of indices, with sizes given by the iterable
63 | subset_sizes, drawn from {0, ..., n_total - 1}
64 | Subsets may be distinct or not according to the replace option.
65 | Optional seed for deterministic draw.
66 | """
67 | # save current random state
68 | state = np.random.get_state()
69 | sum_sizes = sum(subset_sizes)
70 | assert sum_sizes <= n_total
71 |
72 | np.random.seed(seed)
73 |
74 | total_subset = np.random.choice(n_total, size=sum_sizes,
75 | replace=replace)
76 | perm = np.random.permutation(total_subset)
77 | res = []
78 | start = 0
79 | for size in subset_sizes:
80 | res.append(perm[start: start + size])
81 | start += size
82 | # restore initial random state
83 | np.random.set_state(state)
84 | return res
85 |
86 |
87 | def split_dataset(dataset_train, dataset_val, train_size, val_size):
88 | if isinstance(dataset_train, datasets.ImageFolder):
89 | n_classes = len(dataset_train.classes)
90 | if train_size < len(dataset_train):
91 | train_size_per_class = train_size // n_classes
92 | else:
93 | train_size_per_class = float('inf')
94 | assert train_size_per_class > 0
95 | my_dict = defaultdict(list)
96 | [my_dict[e[1]].append(e[0]) for e in dataset_train.imgs]
97 | train_imgs = []
98 | for k in my_dict.keys():
99 | imgs = my_dict[k]
100 | adapted_train_size = min(train_size_per_class, len(imgs))
101 | train_indices, = random_subsets((adapted_train_size,),
102 | len(imgs),
103 | seed=1234 + int(k))
104 | train_imgs += [(imgs[idx], int(k)) for idx in train_indices]
105 | np.random.shuffle(train_imgs)
106 | dataset_train.imgs = train_imgs
107 | dataset_train.samples = train_imgs
108 | else:
109 | train_indices, val_indices = random_subsets((train_size, val_size),
110 | len(dataset_train),
111 | seed=1234)
112 |
113 | dataset_train = Subset(dataset_train, train_indices)
114 | dataset_val = Subset(dataset_val, val_indices)
115 |
116 | return dataset_train, dataset_val
117 |
--------------------------------------------------------------------------------
/smooth-topk/src/epoch.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.autograd as ag
3 |
4 | from utils import regularization, accuracy, print_stats, dump_results
5 | from tqdm import tqdm
6 |
7 |
8 | def data_to_var(data, target, cuda, volatile=False):
9 |
10 | if cuda:
11 | data = data.cuda()
12 | target = target.cuda()
13 | data = ag.Variable(data, volatile=volatile)
14 | target = ag.Variable(target)
15 |
16 | return data, target
17 |
18 |
19 | def train(model, loss, optimizer, loader, xp, args):
20 |
21 | if args.use_dali:
22 | loader.reset()
23 | loader_len = loader._size // loader.batch_size
24 | else:
25 | loader_len = len(loader)
26 |
27 | if not loader_len:
28 | return 0
29 |
30 | model.train()
31 |
32 | xp.Parent_Train.reset()
33 |
34 | for batch_idx, batch in tqdm(enumerate(loader), desc='Train Epoch',
35 | leave=False, total=loader_len):
36 | if args.use_dali:
37 | data = batch[0]['data']
38 | target = batch[0]['label'].squeeze().cuda().long()
39 | else:
40 | data, target = batch
41 | data, target = data_to_var(data, target, args.cuda)
42 |
43 | output = model(data)
44 | obj = loss(output, target)
45 | if obj.item() != obj.item():
46 | print('NaN Erorr')
47 | import sys
48 | sys.exit(-1)
49 |
50 | optimizer.zero_grad()
51 | obj.backward()
52 | optimizer.step()
53 |
54 | prec1 = accuracy(output.data, target.data, topk=1)
55 | preck = accuracy(output.data, target.data, topk=xp.config['topk'])
56 | xp.Parent_Train.update(loss=obj.data.item(), acck=preck, acc1=prec1, n=data.size(0))
57 |
58 | # compute objective function (including regularization)
59 | obj = xp.Loss_Train.get() + regularization(model, xp.mu)
60 | xp.Obj_Train.update(obj)
61 | # measure elapsed time
62 | xp.Timer_Train.update()
63 |
64 | xp.log_with_tag('train')
65 |
66 | if args.verbosity:
67 | print_stats(xp, 'train')
68 |
69 |
70 | def test(model, loss, loader, xp, args):
71 | if 'dali' in loader.__module__:
72 | loader.reset()
73 | loader_len = loader._size // loader.batch_size
74 | else:
75 | loader_len = len(loader)
76 | if not loader_len:
77 | return 0
78 |
79 | model.eval()
80 |
81 | metrics = xp.get_metric(tag=loader.tag, name='parent')
82 | timer = xp.get_metric(tag=loader.tag, name='timer')
83 |
84 | metrics.reset()
85 |
86 | if args.multiple_crops:
87 | epoch_test_multiple_crops(model, loader, xp, args.cuda)
88 | else:
89 | epoch_test(model, loader, xp, args.cuda)
90 |
91 | # measure elapsed time
92 | timer.update()
93 | xp.log_with_tag(loader.tag)
94 |
95 | if loader.tag == 'val':
96 | xp.Acc1_Val_Best.update(xp.acc1_val).log()
97 | xp.Acck_Val_Best.update(xp.acck_val).log()
98 |
99 | if args.verbosity:
100 | print_stats(xp, loader.tag)
101 |
102 | if args.eval:
103 | dump_results(xp, args)
104 |
105 |
106 | def epoch_test(model, loader, xp, cuda):
107 | if 'dali' in loader.__module__:
108 | loader_len = loader._size // loader.batch_size
109 | else:
110 | loader_len = len(loader)
111 | metrics = xp.get_metric(tag=loader.tag, name='parent')
112 | for batch_idx, batch in tqdm(enumerate(loader), desc='Test Epoch',
113 | leave=False, total=loader_len):
114 | if 'dali' in loader.__module__:
115 | data = batch[0]['data']
116 | target = batch[0]['label'].squeeze().cuda().long()
117 | else:
118 | data, target = batch
119 | data, target = data_to_var(data, target, cuda, volatile=True)
120 | output = model(data)
121 |
122 | prec1 = accuracy(output.data, target.data, topk=1)
123 | preck = accuracy(output.data, target.data, topk=xp.config['topk'])
124 | metrics.update(acck=preck, acc1=prec1, n=data.size(0))
125 |
126 |
127 | def epoch_test_multiple_crops(model, loader, xp, cuda):
128 | metrics = xp.get_metric(tag=loader.tag, name='parent')
129 | xp.Temperature.update()
130 | for batch_idx, (data, target) in tqdm(enumerate(loader), desc='Test Epoch',
131 | leave=False, total=len(loader)):
132 |
133 | target = ag.Variable(target.cuda())
134 | avg = 0
135 | for img in data:
136 | img = ag.Variable(img.cuda(), volatile=True)
137 | output = model(img)
138 | # cross-entropy
139 | if xp.temperature == -1:
140 | avg += nn.functional.softmax(output).data
141 | # smooth-svm
142 | else:
143 | avg += output.data
144 | # avg += torch.exp(output.data / xp.temperature)
145 |
146 | prec1 = accuracy(avg, target.data, topk=1)
147 | preck = accuracy(avg, target.data, topk=xp.config['topk'])
148 | metrics.update(acck=preck, acc1=prec1, n=target.size(0))
149 |
--------------------------------------------------------------------------------
/smooth-topk/src/losses/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/smooth-topk/src/losses/__init__.py
--------------------------------------------------------------------------------
/smooth-topk/src/losses/entr.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 | class EntrLoss(nn.Module):
5 | def __init__(self, n_classes, k=5, tau=1.0):
6 | super(EntrLoss, self).__init__()
7 | self.n_classes = n_classes
8 | self.k = k
9 | self.tau = tau
10 |
11 | def forward(self, x, y):
12 | n_batch = x.shape[0]
13 |
14 | x = x/self.tau
15 | x_sorted, I = x.sort(dim=1, descending=True)
16 | x_sorted_last = x_sorted[:,self.k:]
17 | I_last = I[:,self.k:]
18 |
19 | fy = x.gather(1, y.unsqueeze(1))
20 | J = (I_last != y.unsqueeze(1)).type_as(x)
21 |
22 | # Could potentially be improved numerically by using
23 | # \log\sum\exp{x_} = c + \log\sum\exp{x_-c}
24 | safe_z = torch.clamp(x_sorted_last-fy, max=80)
25 | losses = torch.log(1.+torch.sum(safe_z.exp()*J, dim=1))
26 |
27 | return losses.mean()
28 |
--------------------------------------------------------------------------------
/smooth-topk/src/losses/functional.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.autograd as ag
3 |
4 | from losses.polynomial.sp import log_sum_exp, LogSumExp
5 | from losses.logarithm import LogTensor
6 | from losses.utils import delta, split
7 |
8 |
9 | def Top1_Hard_SVM(labels, alpha=1.):
10 | def fun(x, y):
11 | # max oracle
12 | max_, _ = (x + delta(y, labels, alpha)).max(1)
13 | # subtract ground truth
14 | loss = max_ - x.gather(1, y[:, None]).squeeze()
15 | return loss
16 | return fun
17 |
18 |
19 | def Topk_Hard_SVM(labels, k, alpha=1.):
20 | def fun(x, y):
21 | x_1, x_2 = split(x, y, labels)
22 |
23 | max_1, _ = (x_1 + alpha).topk(k, dim=1)
24 | max_1 = max_1.mean(1)
25 |
26 | max_2, _ = x_1.topk(k - 1, dim=1)
27 | max_2 = (max_2.sum(1) + x_2) / k
28 |
29 | loss = torch.clamp(max_1 - max_2, min=0)
30 |
31 | return loss
32 | return fun
33 |
34 |
35 | def Top1_Smooth_SVM(labels, tau, alpha=1.):
36 | def fun(x, y):
37 | # add loss term and subtract ground truth score
38 | x = x + delta(y, labels, alpha) - x.gather(1, y[:, None])
39 | # compute loss
40 | loss = tau * log_sum_exp(x / tau)
41 |
42 | return loss
43 | return fun
44 |
45 |
46 | def Topk_Smooth_SVM(labels, k, tau, alpha=1.):
47 |
48 | lsp = LogSumExp(k)
49 |
50 | def fun(x, y):
51 | x_1, x_2 = split(x, y, labels)
52 | # all scores are divided by (k * tau)
53 | x_1.div_(k * tau)
54 | x_2.div_(k * tau)
55 |
56 | # term 1: all terms that will *not* include the ground truth score
57 | # term 2: all terms that will include the ground truth score
58 | res = lsp(x_1)
59 | term_1, term_2 = res[1], res[0]
60 | term_1, term_2 = LogTensor(term_1), LogTensor(term_2)
61 |
62 | X_2 = LogTensor(x_2)
63 | cst = x_2.data.new(1).fill_(float(alpha) / tau)
64 | One_by_tau = LogTensor(ag.Variable(cst, requires_grad=False))
65 | Loss_ = term_2 * X_2
66 |
67 | loss_pos = (term_1 * One_by_tau + Loss_).torch()
68 | loss_neg = Loss_.torch()
69 | loss = tau * (loss_pos - loss_neg)
70 |
71 | return loss
72 | return fun
73 |
--------------------------------------------------------------------------------
/smooth-topk/src/losses/lml_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 | import sys
5 | sys.path.append('../../') # TODO
6 |
7 |
8 | from lml import LML
9 |
10 | class LMLLoss(nn.Module):
11 | def __init__(self, n_classes, k=5, tau=1.0):
12 | super(LMLLoss, self).__init__()
13 | self.n_classes = n_classes
14 | self.k = k
15 | self.tau = tau
16 |
17 | def forward(self, x, y):
18 | n_batch = x.shape[0]
19 |
20 | p = LML(N=self.k, eps=1e-4)(x/self.tau)
21 | losses = -torch.log(p.gather(1, y.unsqueeze(1)) + 1e-8)
22 | return losses.mean()
23 |
--------------------------------------------------------------------------------
/smooth-topk/src/losses/logarithm.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.autograd as ag
3 |
4 | from numbers import Number
5 |
6 |
7 | def log(x, like):
8 | """
9 | Get log-value of x.
10 | If x is a LogTensor, simply access its stored data
11 | If x is a Number, transform it to a tensor / variable,
12 | in the log space, with the same type and size as like.
13 | """
14 | if isinstance(x, LogTensor):
15 | return x.torch()
16 |
17 | if not isinstance(x, Number):
18 | raise TypeError('Not supported type: received {}, '
19 | 'was expected LogTensor or Number'
20 | .format(type(x)))
21 |
22 | # transform x to variable / tensor of
23 | # same type and size as like
24 | like_is_var = isinstance(like, ag.Variable)
25 | data = like.data if like_is_var else like
26 | new = data.new(1).fill_(x).log_().expand_as(data)
27 | new = ag.Variable(new) if like_is_var else new
28 | return new
29 |
30 |
31 | def _imul_inplace(x1, x2):
32 | return x1.add_(x2)
33 |
34 |
35 | def _imul_outofplace(x1, x2):
36 | return x1 + x2
37 |
38 |
39 | def _add_inplace(x1, x2):
40 | M = torch.max(x1, x2)
41 | M.add_(((x1 - M).exp_().add_((x2 - M).exp_())).log_())
42 | return M
43 |
44 |
45 | def _add_outofplace(x1, x2):
46 | M = torch.max(x1, x2)
47 | return M + torch.log(torch.exp(x1 - M) + torch.exp(x2 - M))
48 |
49 |
50 | class LogTensor(object):
51 | """
52 | Stable log-representation for torch tensors
53 | _x stores the value in the log space
54 | """
55 | def __init__(self, x):
56 | super(LogTensor, self).__init__()
57 |
58 | self.var = isinstance(x, ag.Variable)
59 | self._x = x
60 | self.add = _add_outofplace if self.var else _add_inplace
61 | self.imul = _imul_outofplace if self.var else _imul_inplace
62 |
63 | def __add__(self, other):
64 | other_x = log(other, like=self._x)
65 | return LogTensor(self.add(self._x, other_x))
66 |
67 | def __imul__(self, other):
68 | other_x = log(other, like=self._x)
69 | self._x = self.imul(self._x, other_x)
70 | return self
71 |
72 | def __iadd__(self, other):
73 | other_x = log(other, like=self._x)
74 | self._x = self.add(self._x, other_x)
75 | return self
76 |
77 | def __radd__(self, other):
78 | """
79 | Addition is commutative.
80 | """
81 | return self.__add__(other)
82 |
83 | def __sub__(self, other):
84 | """
85 | NB: assumes self - other > 0.
86 | Will return nan otherwise.
87 | """
88 | other_x = log(other, like=self._x)
89 | diff = other_x - self._x
90 | x = self._x + log1mexp(diff)
91 | return LogTensor(x)
92 |
93 | def __pow__(self, power):
94 | return LogTensor(self._x * power)
95 |
96 | def __mul__(self, other):
97 | other_x = log(other, like=self._x)
98 | x = self._x + other_x
99 | return LogTensor(x)
100 |
101 | def __rmul__(self, other):
102 | """
103 | Multiplication is commutative.
104 | """
105 | return self.__mul__(other)
106 |
107 | def __div__(self, other):
108 | """
109 | Division (python 2)
110 | """
111 | other_x = log(other, like=self._x)
112 | x = self._x - other_x
113 | return LogTensor(x)
114 |
115 | def __truediv__(self, other):
116 | """
117 | Division (python 3)
118 | """
119 | return self.__div__(other)
120 |
121 | def torch(self):
122 | """
123 | Returns value of tensor in torch format (either variable or tensor)
124 | """
125 | return self._x
126 |
127 | def __repr__(self):
128 | tensor = self._x.data if self.var else self._x
129 | s = 'Log Tensor with value:\n{}'.format(tensor)
130 | return s
131 |
132 |
133 | def log1mexp(U, eps=1e-3):
134 | """
135 | Compute log(1 - exp(u)) for u <= 0.
136 | """
137 | res = torch.log1p(-torch.exp(U))
138 |
139 | # |U| << 1 requires care for numerical stability:
140 | # 1 - exp(U) = -U + o(U)
141 | small = torch.lt(U.abs(), eps)
142 | res[small] = torch.log(-U[small])
143 |
144 | return res
145 |
--------------------------------------------------------------------------------
/smooth-topk/src/losses/main.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | from losses.svm import SmoothSVM
3 | from losses.lml_loss import LMLLoss
4 | from losses.ml import MLLoss
5 | from losses.entr import EntrLoss
6 |
7 | def get_loss(xp, args):
8 | if args.loss == "svm":
9 | print("Using SVM loss")
10 | loss = SmoothSVM(n_classes=args.num_classes, k=args.topk, tau=args.tau, alpha=args.alpha)
11 | elif args.loss == 'ce':
12 | print("Using CE loss")
13 | loss = nn.CrossEntropyLoss()
14 | loss.tau = -1
15 | elif args.loss == 'lml':
16 | print("Using LML loss")
17 | loss = LMLLoss(n_classes=args.num_classes, k=args.topk, tau=args.tau)
18 | elif args.loss == 'ml':
19 | loss = MLLoss(n_classes=args.num_classes)
20 | elif args.loss == 'entr':
21 | print("Using truncated entr (Lapin) loss")
22 | loss = EntrLoss(n_classes=args.num_classes, k=args.topk, tau=args.tau)
23 | else:
24 | raise ValueError('Invalid choice of loss ({})'.format(args.loss))
25 |
26 | xp.Temperature.set_fun(lambda: loss.tau)
27 |
28 | return loss
29 |
--------------------------------------------------------------------------------
/smooth-topk/src/losses/ml.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 | class MLLoss(nn.Module):
5 | def __init__(self, n_classes):
6 | super(MLLoss, self).__init__()
7 | self.n_classes = n_classes
8 | self.tau = 1.0
9 |
10 | def forward(self, x, y):
11 | n_batch = x.shape[0]
12 | y_onehot = torch.zeros(n_batch, self.n_classes).type_as(x)
13 | y_onehot.scatter_(1, y.unsqueeze(1), 1)
14 | loss = nn.BCEWithLogitsLoss()(x, y_onehot)
15 | return loss
16 |
--------------------------------------------------------------------------------
/smooth-topk/src/losses/polynomial/__init__.py:
--------------------------------------------------------------------------------
1 | from losses.polynomial.sp import LogSumExp, log_sum_exp
2 |
--------------------------------------------------------------------------------
/smooth-topk/src/losses/polynomial/divide_conquer.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | def divide_and_conquer(x, k, mul):
5 | """
6 | Divide and conquer method for polynomial expansion
7 | x is a 2d tensor of size (n_classes, n_roots)
8 | The objective is to obtain the k first coefficients of the expanded
9 | polynomial
10 | """
11 |
12 | to_merge = []
13 |
14 | while x[0].dim() > 1 and x[0].size(0) > 1:
15 | size = x[0].size(0)
16 | half = size // 2
17 | if 2 * half < size:
18 | to_merge.append([t[-1] for t in x])
19 | x = mul([t[:half] for t in x],
20 | [t[half: 2 * half] for t in x])
21 |
22 | for row in to_merge:
23 | x = mul(x, row)
24 | x = torch.cat(x)
25 | return x
26 |
--------------------------------------------------------------------------------
/smooth-topk/src/losses/polynomial/grad.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from future.builtins import range
4 | from losses.logarithm import LogTensor
5 |
6 |
7 | def recursion(S, X, j):
8 | """
9 | Apply recursive formula to compute the gradient
10 | for coefficient of degree j.
11 | d S[j] / d X = S[j-1] - X * (S[j-2] - X * (S[j-3] - ...) ... )
12 | = S[j-1] + X ** 2 * S[j-3] + ...
13 | - (X * S[j-2] + X ** 3 * S[j-4] + ...)
14 | """
15 |
16 | # Compute positive and negative parts separately
17 | _P_ = sum(S[i] * X ** (j - 1 - i) for i in range(j - 1, -1, -2))
18 | _N_ = sum(S[i] * X ** (j - 1 - i) for i in range(j - 2, -1, -2))
19 |
20 | return _N_, _P_
21 |
22 |
23 | def approximation(S, X, j, p):
24 | """
25 | Compute p-th order approximation for d S[j] / d X:
26 | d S[j] / d X ~ S[j] / X - S[j + 1] / X ** 2 + ...
27 | + (-1) ** (p - 1) * S[j + p - 1] / X ** p
28 | """
29 |
30 | # Compute positive and negative parts separately
31 | _P_ = sum(S[j + i] / X ** (i + 1) for i in range(0, p, 2))
32 | _N_ = sum(S[j + i] / X ** (i + 1) for i in range(1, p, 2))
33 |
34 | return _N_, _P_
35 |
36 |
37 | def d_logS_d_expX(S, X, j, p, grad, thresh, eps=1e-5):
38 | """
39 | Compute the gradient of log S[j] w.r.t. exp(X).
40 | For unstable cases, use p-th order approximnation.
41 | """
42 |
43 | # ------------------------------------------------------------------------
44 | # Detect unstabilites
45 | # ------------------------------------------------------------------------
46 |
47 | _X_ = LogTensor(X)
48 | _S_ = [LogTensor(S[i]) for i in range(S.size(0))]
49 |
50 | # recursion of gradient formula (separate terms for stability)
51 | _N_, _P_ = recursion(_S_, _X_, j)
52 |
53 | # detect instability: small relative difference in log-space
54 | P, N = _P_.torch(), _N_.torch()
55 | diff = (P - N) / (N.abs() + eps)
56 |
57 | # split into stable and unstable indices
58 | u_indices = torch.lt(diff, thresh) # unstable
59 | s_indices = u_indices.eq(0) # stable
60 |
61 | # ------------------------------------------------------------------------
62 | # Compute d S[j] / d X
63 | # ------------------------------------------------------------------------
64 |
65 | # make grad match size and type of X
66 | grad = grad.type_as(X).resize_as_(X)
67 |
68 | # exact gradient for s_indices (stable) elements
69 | if s_indices.sum():
70 | # re-use positive and negative parts of recursion (separate for stability)
71 | _N_ = LogTensor(_N_.torch()[s_indices])
72 | _P_ = LogTensor(_P_.torch()[s_indices])
73 | _X_ = LogTensor(X[s_indices])
74 | _S_ = [LogTensor(S[i][s_indices]) for i in range(S.size(0))]
75 |
76 | # d log S[j] / d exp(X) = (d S[j] / d X) * X / S[j]
77 | _SG_ = (_P_ - _N_) * _X_ / _S_[j]
78 | grad.masked_scatter_(s_indices, _SG_.torch().exp())
79 |
80 | # approximate gradients for u_indices (unstable) elements
81 | if u_indices.sum():
82 | _X_ = LogTensor(X[u_indices])
83 | _S_ = [LogTensor(S[i][u_indices]) for i in range(S.size(0))]
84 |
85 | # positive and negative parts of approximation (separate for stability)
86 | _N_, _P_ = approximation(_S_, _X_, j, p)
87 |
88 | # d log S[j] / d exp(X) = (d S[j] / d X) * X / S[j]
89 | _UG_ = (_P_ - _N_) * _X_ / _S_[j]
90 | grad.masked_scatter_(u_indices, _UG_.torch().exp())
91 |
92 | return grad
93 |
--------------------------------------------------------------------------------
/smooth-topk/src/losses/polynomial/multiplication.py:
--------------------------------------------------------------------------------
1 | import operator
2 | import itertools
3 |
4 | from future.builtins import range
5 | from functools import reduce
6 | from losses.logarithm import LogTensor
7 |
8 |
9 | def Multiplication(k):
10 | """
11 | Generate a function that performs a polynomial multiplication and return coefficients up to degree k
12 | """
13 | assert isinstance(k, int) and k > 0
14 |
15 | def isum(factors):
16 | init = next(factors)
17 | return reduce(operator.iadd, factors, init)
18 |
19 | def mul_function(x1, x2):
20 |
21 | # prepare indices for convolution
22 | l1, l2 = len(x1), len(x2)
23 | M = min(k + 1, l1 + l2 - 1)
24 | indices = [[] for _ in range(M)]
25 | for (i, j) in itertools.product(range(l1), range(l2)):
26 | if i + j >= M:
27 | continue
28 | indices[i + j].append((i, j))
29 |
30 | # wrap with log-tensors for stability
31 | X1 = [LogTensor(x1[i]) for i in range(l1)]
32 | X2 = [LogTensor(x2[i]) for i in range(l2)]
33 |
34 | # perform convolution
35 | coeff = []
36 | for c in range(M):
37 | coeff.append(isum(X1[i] * X2[j] for (i, j) in indices[c]).torch())
38 | return coeff
39 |
40 | return mul_function
41 |
--------------------------------------------------------------------------------
/smooth-topk/src/losses/polynomial/sp.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.autograd as ag
4 |
5 | from losses.polynomial.divide_conquer import divide_and_conquer
6 | from losses.polynomial.multiplication import Multiplication
7 | from losses.polynomial.grad import d_logS_d_expX
8 |
9 |
10 | class LogSumExp(nn.Module):
11 | def __init__(self, k, p=None, thresh=1e-5):
12 | super(LogSumExp, self).__init__()
13 | self.k = k
14 | self.p = int(1 + 0.2 * k) if p is None else p
15 | self.mul = Multiplication(self.k + self.p - 1)
16 | self.thresh = thresh
17 |
18 | self.register_buffer('grad_k', torch.Tensor(0))
19 | self.register_buffer('grad_km1', torch.Tensor(0))
20 |
21 | self.buffers = (self.grad_km1, self.grad_k)
22 |
23 | def forward(self, x):
24 | f = LogSumExp_F(self.k, self.p, self.thresh, self.mul, self.buffers)
25 | return f(x)
26 |
27 |
28 | class LogSumExp_F(ag.Function):
29 | def __init__(self, k, p, thresh, mul, buffers):
30 | self.k = k
31 | self.p = p
32 | self.mul = mul
33 | self.thresh = thresh
34 |
35 | # unpack buffers
36 | self.grad_km1, self.grad_k = buffers
37 |
38 | def forward(self, x):
39 | """
40 | Returns a matrix of size (2, n_samples) with sigma_{k-1} and sigma_{k}
41 | for each sample of the mini-batch.
42 | """
43 | self.save_for_backward(x)
44 |
45 | # number of samples and number of coefficients to compute
46 | n_s = x.size(0)
47 | kp = self.k + self.p - 1
48 |
49 | assert kp <= x.size(1)
50 |
51 | # clone to allow in-place operations
52 | x = x.clone()
53 |
54 | # pre-compute normalization
55 | x_summed = x.sum(1)
56 |
57 | # invert in log-space
58 | x.t_().mul_(-1)
59 |
60 | # initialize polynomials (in log-space)
61 | x = [x, x.clone().fill_(0)]
62 |
63 | # polynomial multiplications
64 | log_res = divide_and_conquer(x, kp, mul=self.mul)
65 |
66 | # re-normalize
67 | coeff = log_res + x_summed[None, :]
68 |
69 | # avoid broadcasting issues (in particular if n_s = 1)
70 | coeff = coeff.view(kp + 1, n_s)
71 |
72 | # save all coeff for backward
73 | self.saved_coeff = coeff
74 |
75 | return coeff[self.k - 1: self.k + 1]
76 |
77 | def backward(self, grad_sk):
78 | """
79 | Compute backward pass of LogSumExp.
80 | Python variables with an upper case first letter are in
81 | log-space, other are in standard space.
82 | """
83 |
84 | # tensors from forward pass
85 | X, = self.saved_tensors
86 | S = self.saved_coeff
87 |
88 | # extend to shape (self.k + 1, n_samples, n_classes) for backward
89 | S = S.unsqueeze(2).expand(S.size(0), X.size(0), X.size(1))
90 |
91 | # compute gradients for coeff of degree k and k - 1
92 | self.grad_km1 = d_logS_d_expX(S, X, self.k - 1, self.p, self.grad_km1, self.thresh)
93 | self.grad_k = d_logS_d_expX(S, X, self.k, self.p, self.grad_k, self.thresh)
94 |
95 | # chain rule: combine with incoming gradients (broadcast to all classes on third dim)
96 | grad_x = grad_sk[0, :, None] * self.grad_km1 + grad_sk[1, :, None] * self.grad_k
97 |
98 | return grad_x
99 |
100 |
101 | def log_sum_exp(x):
102 | """
103 | Compute log(sum(exp(x), 1)) in a numerically stable way.
104 | Assumes x is 2d.
105 | """
106 | max_score, _ = x.max(1)
107 | return max_score + torch.log(torch.sum(torch.exp(x - max_score[:, None]), 1))
108 |
--------------------------------------------------------------------------------
/smooth-topk/src/losses/svm.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import numpy as np
4 | import losses.functional as F
5 |
6 | from losses.utils import detect_large
7 |
8 |
9 | def SmoothSVM(n_classes, alpha=None, tau=1., k=5):
10 | if k == 1:
11 | return SmoothTop1SVM(n_classes, alpha, tau)
12 | else:
13 | return SmoothTopkSVM(n_classes, alpha, tau, k)
14 |
15 |
16 | class _SVMLoss(nn.Module):
17 |
18 | def __init__(self, n_classes, alpha):
19 |
20 | assert isinstance(n_classes, int)
21 |
22 | assert n_classes > 0
23 | assert alpha is None or alpha >= 0
24 |
25 | super(_SVMLoss, self).__init__()
26 | self.alpha = alpha if alpha is not None else 1
27 | self.register_buffer('labels', torch.from_numpy(np.arange(n_classes)))
28 | self.n_classes = n_classes
29 | self._tau = None
30 |
31 | def forward(self, x, y):
32 |
33 | raise NotImplementedError("Forward needs to be re-implemented for each loss")
34 |
35 | @property
36 | def tau(self):
37 | return self._tau
38 |
39 | @tau.setter
40 | def tau(self, tau):
41 | if self._tau != tau:
42 | print("Setting tau to {}".format(tau))
43 | self._tau = float(tau)
44 | self.get_losses()
45 |
46 | def cuda(self, device=None):
47 | nn.Module.cuda(self, device)
48 | self.get_losses()
49 |
50 | def cpu(self):
51 | nn.Module.cpu()
52 | self.get_losses()
53 |
54 |
55 | class MaxTop1SVM(_SVMLoss):
56 |
57 | def __init__(self, n_classes, alpha=None):
58 |
59 | super(MaxTop1SVM, self).__init__(n_classes=n_classes,
60 | alpha=alpha)
61 | self.get_losses()
62 |
63 | def forward(self, x, y):
64 | return self.F(x, y).mean()
65 |
66 | def get_losses(self):
67 | self.F = F.Top1_Hard_SVM(self.labels, self.alpha)
68 |
69 |
70 | class MaxTopkSVM(_SVMLoss):
71 |
72 | def __init__(self, n_classes, alpha=None, k=5):
73 |
74 | super(MaxTopkSVM, self).__init__(n_classes=n_classes,
75 | alpha=alpha)
76 | self.k = k
77 | self.get_losses()
78 |
79 | def forward(self, x, y):
80 | return self.F(x, y).mean()
81 |
82 | def get_losses(self):
83 | self.F = F.Topk_Hard_SVM(self.labels, self.k, self.alpha)
84 |
85 |
86 | class SmoothTop1SVM(_SVMLoss):
87 | def __init__(self, n_classes, alpha=None, tau=1.):
88 | super(SmoothTop1SVM, self).__init__(n_classes=n_classes,
89 | alpha=alpha)
90 | self.tau = tau
91 | self.thresh = 1e3
92 | self.get_losses()
93 |
94 | def forward(self, x, y):
95 | smooth, hard = detect_large(x, 1, self.tau, self.thresh)
96 |
97 | loss = 0
98 | if smooth.data.sum():
99 | x_s, y_s = x[smooth[:, None]], y[smooth]
100 | x_s = x_s.view(-1, x.size(1))
101 | loss += self.F_s(x_s, y_s).sum() / x.size(0)
102 | if hard.data.sum():
103 | x_h, y_h = x[hard[:, None]], y[hard]
104 | x_h = x_h.view(-1, x.size(1))
105 | loss += self.F_h(x_h, y_h).sum() / x.size(0)
106 |
107 | return loss
108 |
109 | def get_losses(self):
110 | self.F_h = F.Top1_Hard_SVM(self.labels, self.alpha)
111 | self.F_s = F.Top1_Smooth_SVM(self.labels, self.tau, self.alpha)
112 |
113 |
114 | class SmoothTopkSVM(_SVMLoss):
115 |
116 | def __init__(self, n_classes, alpha=None, tau=1., k=5):
117 | super(SmoothTopkSVM, self).__init__(n_classes=n_classes,
118 | alpha=alpha)
119 | self.k = k
120 | self.tau = tau
121 | self.thresh = 1e3
122 | self.get_losses()
123 |
124 | def forward(self, x, y):
125 | smooth, hard = detect_large(x, self.k, self.tau, self.thresh)
126 |
127 | loss = 0
128 | if smooth.data.sum():
129 | x_s, y_s = x[smooth], y[smooth]
130 | x_s = x_s.view(-1, x.size(1))
131 | loss += self.F_s(x_s, y_s).sum() / x.size(0)
132 | if hard.data.sum():
133 | x_h, y_h = x[hard], y[hard]
134 | x_h = x_h.view(-1, x.size(1))
135 | loss += self.F_h(x_h, y_h).sum() / x.size(0)
136 |
137 | return loss
138 |
139 | def get_losses(self):
140 | self.F_h = F.Topk_Hard_SVM(self.labels, self.k, self.alpha)
141 | self.F_s = F.Topk_Smooth_SVM(self.labels, self.k, self.tau, self.alpha)
142 |
--------------------------------------------------------------------------------
/smooth-topk/src/losses/utils.py:
--------------------------------------------------------------------------------
1 | import math
2 | import torch
3 |
4 | import torch.autograd as ag
5 |
6 |
7 | def delta(y, labels, alpha=None):
8 | """
9 | Compute zero-one loss matrix for a vector of ground truth y
10 | """
11 |
12 | if isinstance(y, ag.Variable):
13 | labels = ag.Variable(labels, requires_grad=False)
14 |
15 | delta = torch.ne(y[:, None], labels[None, :]).float()
16 |
17 | if alpha is not None:
18 | delta = alpha * delta
19 | return delta
20 |
21 |
22 | def split(x, y, labels):
23 | labels = ag.Variable(labels, requires_grad=False)
24 | mask = torch.ne(labels[None, :], y[:, None])
25 |
26 | # gather result:
27 | # x_1: all scores that do contain the ground truth
28 | x_1 = x[mask].view(x.size(0), -1)
29 | # x_2: scores of the ground truth
30 | x_2 = x.gather(1, y[:, None]).view(-1)
31 | return x_1, x_2
32 |
33 |
34 | def detect_large(x, k, tau, thresh):
35 | top, _ = x.topk(k + 1, 1)
36 | # switch to hard top-k if (k+1)-largest element is much smaller
37 | # than k-largest element
38 | hard = torch.ge(top[:, k - 1] - top[:, k], k * tau * math.log(thresh)).detach()
39 | smooth = hard.eq(0)
40 | return smooth, hard
41 |
--------------------------------------------------------------------------------
/smooth-topk/src/main.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # import waitGPU
4 | # waitGPU.wait(interval=10, nproc=0, ngpu=1)
5 | # import setGPU
6 |
7 | import argparse
8 |
9 | from cli import add_all_parsers, set_defaults
10 | import time
11 | import torch
12 | import logger
13 |
14 | from utils import create_experiment, get_optimizer, set_seed,\
15 | update_optimizer, load_optimizer
16 | from data.main import get_loaders
17 | from epoch import train, test
18 |
19 | from losses.main import get_loss
20 | from models.main import get_model, load_model
21 |
22 | from setproctitle import setproctitle
23 |
24 | import sys
25 | from IPython.core import ultratb
26 | sys.excepthook = ultratb.FormattedTB(mode='Verbose',
27 | color_scheme='Linux', call_pdb=1)
28 |
29 | def run(args):
30 | tag = 'bamos.smooth-topk.seed={}.{}'.format(args.seed, args.dataset)
31 | if args.dataset == 'cifar100':
32 | tag += '.noise={}'.format(args.noise_labels)
33 | elif args.dataset == 'imagenet':
34 | tag += '-{}'.format(args.train_size)
35 | setproctitle(tag)
36 |
37 | set_seed(args.seed)
38 | xp = create_experiment(args)
39 | train_loader, val_loader, test_loader = get_loaders(args)
40 | loss = get_loss(xp, args)
41 |
42 | model = get_model(args)
43 | if args.load_model:
44 | load_model(model, args.load_model)
45 |
46 | if args.cuda:
47 | if args.parallel_gpu:
48 | model = torch.nn.DataParallel(model).cuda()
49 | else:
50 | torch.cuda.set_device(args.device)
51 | model.cuda()
52 | loss.cuda()
53 |
54 | optimizer = get_optimizer(model, args.mu, args.lr_0, xp)
55 | if args.load_optimizer:
56 | load_optimizer(optimizer, args.load_optimizer, args.lr_0)
57 |
58 | with logger.stdout_to("{}_log.txt".format(args.out_name)):
59 | clock = -time.time()
60 | for _ in range(args.epochs):
61 |
62 | xp.Epoch.update(1).log()
63 | optimizer = update_optimizer(args.lr_schedule, optimizer,
64 | model, loss, xp)
65 |
66 | xp.Learning_Rate.update().log()
67 | xp.Mu.update().log()
68 | xp.Temperature.update().log()
69 |
70 | train(model, loss, optimizer, train_loader, xp, args)
71 | test(model, loss, val_loader, xp, args)
72 |
73 | test(model, loss, test_loader, xp, args)
74 | clock += time.time()
75 |
76 | print("\nEvaluation time: \t {0:.2g} min".format(clock * 1. / 60))
77 |
78 |
79 | if __name__ == '__main__':
80 | parser = argparse.ArgumentParser()
81 | add_all_parsers(parser)
82 | args = parser.parse_args()
83 | set_defaults(args)
84 | run(args)
85 |
--------------------------------------------------------------------------------
/smooth-topk/src/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/smooth-topk/src/models/__init__.py
--------------------------------------------------------------------------------
/smooth-topk/src/models/cifar.py:
--------------------------------------------------------------------------------
1 | import math
2 | import torch.nn as nn
3 |
4 |
5 | from models.densenet import DenseNet3
6 |
7 |
8 | class BasicConvNet(nn.Module):
9 |
10 | def __init__(self, dataset, planes=16):
11 |
12 | super(BasicConvNet, self).__init__()
13 |
14 | n_classes = 10 if dataset == 'cifar10' else 100
15 |
16 | self.p = planes
17 |
18 | conv1_1 = nn.Conv2d(3, self.p, 3, padding=1, bias=False)
19 | bn1_1 = nn.BatchNorm2d(self.p)
20 | conv1_2 = nn.Conv2d(self.p, self.p, 3, padding=1, bias=False)
21 | bn1_2 = nn.BatchNorm2d(self.p)
22 |
23 | conv2_1 = nn.Conv2d(self.p, self.p * 2, 3, padding=1, bias=False)
24 | bn2_1 = nn.BatchNorm2d(self.p * 2)
25 | conv2_2 = nn.Conv2d(self.p * 2, self.p * 2, 3, padding=1, bias=False)
26 | bn2_2 = nn.BatchNorm2d(self.p * 2)
27 |
28 | conv3_1 = nn.Conv2d(self.p * 2, self.p * 4, 3, padding=1, bias=False)
29 | bn3_1 = nn.BatchNorm2d(self.p * 4)
30 | conv3_2 = nn.Conv2d(self.p * 4, self.p * 4, 3, padding=1, bias=False)
31 | bn3_2 = nn.BatchNorm2d(self.p * 4)
32 |
33 | relu = nn.ReLU(inplace=True)
34 | maxpool = nn.MaxPool2d(2)
35 | avgpool = nn.AvgPool2d(4)
36 |
37 | self.base = nn.Sequential(conv1_1, bn1_1, relu,
38 | conv1_2, bn1_2, relu, maxpool,
39 | conv2_1, bn2_1, relu,
40 | conv2_2, bn2_2, relu, maxpool,
41 | conv3_1, bn3_1, relu,
42 | conv3_2, bn3_2, relu, maxpool,
43 | avgpool)
44 |
45 | self.fc = nn.Linear(self.p * 4, n_classes)
46 |
47 | for m in self.base.modules():
48 | if isinstance(m, nn.Conv2d):
49 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
50 | m.weight.data.normal_(0, math.sqrt(2. / n))
51 | elif isinstance(m, nn.BatchNorm2d):
52 | m.weight.data.fill_(1)
53 | m.bias.data.zero_()
54 |
55 | self.fc.bias.data.zero_()
56 |
57 | def forward(self, x):
58 |
59 | x = self.base(x)
60 | x = x.view(x.size(0), -1)
61 | x = self.fc(x)
62 |
63 | return x
64 |
--------------------------------------------------------------------------------
/smooth-topk/src/models/main.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import models.cifar as cifar_models
3 |
4 | from models.parser import parse_model
5 |
6 | import torchvision.models as torch_models
7 |
8 | from collections import OrderedDict
9 |
10 |
11 | def get_model(args):
12 | parse_model(args)
13 |
14 | if args.dataset == 'imagenet':
15 | model = torch_models.__dict__[args.model]()
16 | args.model_name = args.model
17 | elif args.basic_model:
18 | model = cifar_models.BasicConvNet(args.dataset, args.planes)
19 | args.model_name = 'convnet_{}'.format(args.planes)
20 | else:
21 | model = cifar_models.DenseNet3(args.depth, args.num_classes, args.growth)
22 | args.model_name = 'densenet_{}_{}'.format(args.depth, args.growth)
23 |
24 | # Print the number of model parameters
25 | nparams = sum([p.data.nelement() for p in model.parameters()])
26 | print('Number of model parameters: \t {}'.format(nparams))
27 |
28 | return model
29 |
30 |
31 | def load_model(model, filename):
32 | # map location allows to load on CPU weights originally on GPU
33 | state_dict = torch.load(filename, map_location=lambda storage, loc: storage)
34 | # map from DataParallel to timple module if needed
35 | if 'DataParallel' in state_dict['model_repr']:
36 | new_state_dict = OrderedDict()
37 | for k, v in state_dict['model'].items():
38 | name = k.replace("module.", "")
39 | new_state_dict[name] = v
40 | state_dict['model'] = new_state_dict
41 | model.load_state_dict(state_dict['model'])
42 |
--------------------------------------------------------------------------------
/smooth-topk/src/models/parser.py:
--------------------------------------------------------------------------------
1 | def parse_model(args):
2 | if args.dataset == 'imagenet':
3 | pass
4 | elif 'basic' in args.model:
5 | parse_basic_convnet(args)
6 | elif 'densenet' in args.model:
7 | parse_densenet(args)
8 |
9 |
10 | def parse_basic_convnet(args):
11 | args.basic_model = 1
12 | args.densenet_model = 0
13 |
14 | param_str = args.model.replace("basic", "")
15 | param_str = param_str.replace("_", "-")
16 | args.planes = [int(p) for p in param_str.split("-") if p != ''].pop(0)
17 |
18 |
19 | def parse_densenet(args):
20 | args.densenet_model = 1
21 | args.basic_model = 0
22 |
23 | param_str = args.model.replace("densenet", "")
24 | param_str = param_str.replace("_", "-")
25 | args.depth, args.growth = \
26 | [int(p) for p in param_str.split("-") if p != '']
27 |
--------------------------------------------------------------------------------
/smooth-topk/src/scripts/cifar100_noise_ce.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | device=$1
4 |
5 | echo "Using device" $device
6 |
7 | for p in 0 0.2 0.4 0.6 0.8 1
8 | do
9 | python main.py --dataset cifar100 --model densenet40-40 --device $device\
10 | --out-name ../xp/cifar100/cifar100_${p}_ce --loss ce --noise $p --no-visdom;
11 | done
--------------------------------------------------------------------------------
/smooth-topk/src/scripts/cifar100_noise_entr.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # echo "Using device" $device
4 |
5 | mkdir -p logs
6 |
7 | export CUDA_VISIBLE_DEVICES=0
8 | seed=2
9 | for p in 0.4 0.6; do
10 | # for p in 0.0 0.2; do
11 | python3 main.py --dataset cifar100 --model densenet40-40 \
12 | --out-name ../xp/cifar100/cifar100_${p}_${seed}_entr \
13 | --loss entr --noise $p --seed $seed \
14 | --no-visdom --test-batch-size 64 &> /dev/null &
15 | export CUDA_VISIBLE_DEVICES=$(((CUDA_VISIBLE_DEVICES + 1) % 4))
16 | done
17 |
--------------------------------------------------------------------------------
/smooth-topk/src/scripts/cifar100_noise_lml.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # echo "Using device" $device
4 |
5 | mkdir -p logs
6 |
7 | for seed in 0 1 2 3; do
8 | # for p in 0.0 0.2 0.4 0.6 0.8 1.0; do
9 | export CUDA_VISIBLE_DEVICES=$seed
10 | for p in 0.4; do
11 | python3 main.py --dataset cifar100 --model densenet40-40 \
12 | --out-name ../xp/cifar100/cifar100_${p}_${seed}_lml_v2 \
13 | --loss lml --noise $p --seed $seed \
14 | --no-visdom --test-batch-size 64 &> /dev/null &
15 | done
16 | done
17 |
--------------------------------------------------------------------------------
/smooth-topk/src/scripts/cifar100_noise_ml.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # echo "Using device" $device
4 |
5 | mkdir -p logs
6 |
7 | # for p in 0.0 0.2 0.4 0.6 0.8 1.0; do
8 | # for p in 0.2 0.4 0.6 0.8 1.0; do
9 | for p in 0.2 0.4; do
10 | for seed in 0 3; do
11 | export CUDA_VISIBLE_DEVICES=$seed
12 | python3 main.py --dataset cifar100 --model densenet40-40 \
13 | --out-name ../xp/cifar100/cifar100_${p}_${seed}_ml \
14 | --loss ml --noise $p --seed $seed \
15 | --no-visdom --test-batch-size 64 &> /dev/null &
16 | done
17 | wait
18 | done
19 |
--------------------------------------------------------------------------------
/smooth-topk/src/scripts/cifar100_noise_svm.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # echo "Using device" $device
4 |
5 | mkdir -p logs
6 |
7 | for seed in 0 1 2 3; do
8 | # for p in 0.0 0.2 0.4 0.6 0.8 1.0; do
9 | export CUDA_VISIBLE_DEVICES=$seed
10 | for p in 0.4 0.6; do
11 | python3 main.py --dataset cifar100 --model densenet40-40 \
12 | --out-name ../xp/cifar100/cifar100_${p}_${seed}_svm \
13 | --loss svm --noise $p --seed $seed \
14 | --no-visdom --test-batch-size 64 &> /dev/null &
15 | done
16 | done
17 |
--------------------------------------------------------------------------------
/smooth-topk/src/scripts/eval.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import os
4 | import subprocess
5 | import pprint
6 |
7 | dataset = 'cifar100'
8 | xp_dir = '../xp/{}'.format(dataset)
9 |
10 | # multiple crops option
11 | if dataset == 'imagenet':
12 | crops_opt = '--multiple-crops'
13 | elif dataset == 'cifar100':
14 | crops_opt = ''
15 | else:
16 | raise ValueError
17 |
18 | for _, _, files in os.walk(xp_dir):
19 | to_analyze = sorted(filter(
20 | lambda x: 'best' in x and x.endswith('.pkl'),
21 | files
22 | ))
23 | n_analyze = len(to_analyze)
24 | print("Found {} files to evaluate:".format(n_analyze))
25 | pp = pprint.PrettyPrinter(indent=4)
26 | msg = pp.pformat(to_analyze)
27 | print(msg)
28 |
29 | for idx, xp_file in enumerate(to_analyze):
30 |
31 | print('-' * 80)
32 | print('Evaluating {} ({} out of {})'.format(xp_file, idx + 1, n_analyze))
33 |
34 | # find loss used for training
35 | if 'svm' in xp_file:
36 | loss = 'svm'
37 | elif 'ce' in xp_file:
38 | loss = 'ce'
39 | elif 'lml' in xp_file:
40 | loss = 'lml'
41 | elif '_ml' in xp_file:
42 | loss = 'ml'
43 | elif 'entr' in xp_file:
44 | loss = 'entr'
45 | else:
46 | raise ValueError('Could not parse loss name from filename')
47 |
48 | filename = os.path.join(xp_dir, xp_file)
49 | cmd = "python main.py --loss {} --load-model {} --dataset {} --eval --parallel-gpu {}"\
50 | .format(loss, filename, dataset, crops_opt)
51 | cmd = cmd.split()
52 | subprocess.call(cmd)
53 |
--------------------------------------------------------------------------------
/smooth-topk/src/scripts/imagenet_split.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import os
4 | import shutil
5 | import torchvision.datasets as datasets
6 |
7 | import sys
8 | sys.path.append('/nethome/bamos/2018-intel/smooth-topk/src')
9 |
10 | from collections import defaultdict
11 | from data.utils import random_subsets
12 |
13 |
14 | data_root = os.environ['VISION_DATA_SSD']
15 |
16 |
17 | train_root = '{}/ILSVRC2012-prepr-split/images/train'.format(data_root)
18 | val_root = '{}/ILSVRC2012-prepr-split/images/val'.format(data_root)
19 | dataset_train = datasets.ImageFolder(train_root)
20 |
21 | if not os.path.exists(val_root):
22 | os.makedirs(val_root)
23 | else:
24 | assert len(os.listdir(val_root)) == 0, \
25 | "{} is not empty: split already performed?".format(val_root)
26 | print("{} initially empty".format(val_root))
27 |
28 | n_classes = len(dataset_train.classes)
29 | val_size_per_class = 50
30 | assert val_size_per_class > 0
31 | my_dict = defaultdict(list)
32 | [my_dict[e[1]].append(e[0]) for e in dataset_train.imgs]
33 | val_imgs = []
34 | for k in my_dict.keys():
35 | imgs = sorted(my_dict[k])
36 | val_indices, = random_subsets((val_size_per_class,),
37 | len(imgs),
38 | seed=1234 + int(k))
39 | val_imgs += [imgs[idx] for idx in val_indices]
40 |
41 | counter = dict()
42 | for img in val_imgs:
43 | id_ = img.split('/')[-2]
44 | if id_ in counter.keys():
45 | counter[id_] += 1
46 | else:
47 | counter[id_] = 1
48 |
49 | balanced = len(set(counter.values())) == 1
50 | if balanced:
51 | print("data set is properly balanced")
52 | else:
53 | raise ValueError("data set should be balanced")
54 |
55 | print("Number of labels: {}".format(len(counter)))
56 | print("Number of images per label: {}".format(list(counter.values())[0]))
57 |
58 | print("Creating directories...")
59 | for new_dir in os.listdir(train_root):
60 | os.makedirs(os.path.join(val_root, new_dir))
61 |
62 | for img in val_imgs:
63 | new_img = img.replace("train", "val")
64 | print("Moving {} to {}".format(img, new_img))
65 | shutil.move(img, new_img)
66 |
--------------------------------------------------------------------------------
/smooth-topk/src/scripts/imagenet_subsets.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | SRC_DIR=$(dirname $0)/..
4 |
5 | function run_imagenet() {
6 | cd $SRC_DIR
7 |
8 | local GPUS=$1
9 | local LOSS=$2
10 | local SIZE_TAG=$3
11 | local lr_0=$4
12 | local tau=$5
13 | local mu=$6
14 | local seed=$7
15 |
16 | ARGS=""
17 | case "$SIZE_TAG" in
18 | "64k") ARGS+="--train-size 64000";;
19 | "128k") ARGS="--train-size 128000";;
20 | "320k") ARGS="--train-size 320000";;
21 | "640k") ARGS="--train-size 640000";;
22 | "all") ;;
23 | *) echo "Unrecognized size.";;
24 | esac
25 |
26 | export CUDA_VISIBLE_DEVICES=$GPUS
27 | setopt shwordsplit
28 | ./main.py --dataset imagenet --loss $LOSS \
29 | --out-name ../xp/imagenet/im${SIZE_TAG}_${LOSS}_lr=${lr_0}_mu=${mu}_tau=${tau} \
30 | --parallel-gpu $ARGS --no-visdom \
31 | --lr_0 $lr_0 --tau $tau --mu $mu --seed $seed \
32 | --use_dali
33 | }
34 |
--------------------------------------------------------------------------------
/smooth-topk/src/scripts/imagenet_subsets_ce.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # lr_0=0.1
4 | lr_0=1.0
5 | mu=2.5e-4
6 |
7 | cd $(dirname $0)/..
8 |
9 | export CUDA_VISIBLE_DEVICES=3
10 |
11 | # python main.py --dataset imagenet --loss ce \
12 | # --out-name ../xp/imagenet/im64k_ce_lr=${lr_0}_mu=$mu \
13 | # --parallel-gpu --train-size 64000 --lr_0 $lr_0 --mu=$mu --no-visdom;
14 |
15 | # python main.py --dataset imagenet --loss ce \
16 | # --out-name ../xp/imagenet/im128k_ce_lr=${lr_0}_mu=$mu \
17 | # --parallel-gpu --train-size 128000 --lr_0 $lr_0 --mu=$mu --no-visdom;
18 |
19 | # python main.py --dataset imagenet --loss ce \
20 | # --out-name ../xp/imagenet/im320k_ce_lr=${lr_0}_mu=$mu \
21 | # --parallel-gpu --train-size 320000 --lr_0 $lr_0 --mu=$mu --no-visdom;
22 |
23 | # python main.py --dataset imagenet --loss ce \
24 | # --out-name ../xp/imagenet/im640k_ce_lr=${lr_0}_mu=$mu \
25 | # --parallel-gpu --train-size 640000 --lr_0 $lr_0 --mu=$mu \
26 | # --no-visdom --use_dali
27 |
28 | # python main.py --dataset imagenet --loss ce \
29 | # --out-name ../xp/imagenet/imall_ce_lr=${lr_0}_mu=$mu \
30 | # --parallel-gpu --lr_0 $lr_0 --mu=$mu --no-visdom;
31 |
--------------------------------------------------------------------------------
/smooth-topk/src/scripts/imagenet_subsets_entr.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | lr_0=1
4 | tau=1.0
5 | mu=0.00025
6 |
7 | cd $(dirname $0)/..
8 |
9 | source ~/imagenet-fast.sh
10 | export CUDA_VISIBLE_DEVICES=3
11 |
12 | # ./main.py --dataset imagenet --loss entr \
13 | # --out-name ../xp/imagenet/im64k_entr_lr=${lr_0}_mu=${mu}_tau=${tau} \
14 | # --parallel-gpu --train-size 64000 --no-visdom \
15 | # --lr_0 $lr_0 --tau $tau --mu $mu --use_dali
16 |
17 | # ./main.py --dataset imagenet --loss entr \
18 | # --out-name ../xp/imagenet/im128k_entr_lr=${lr_0}_mu=${mu}_tau=${tau} \
19 | # --parallel-gpu --train-size 128000 --no-visdom \
20 | # --lr_0 $lr_0 --tau $tau --mu $mu --use_dali
21 |
22 | # ./main.py --dataset imagenet --loss entr \
23 | # --out-name ../xp/imagenet/im320k_entr_lr=${lr_0}_mu=${mu}_tau=${tau} \
24 | # --parallel-gpu --train-size 320000 --no-visdom \
25 | # --lr_0 $lr_0 --tau $tau --mu $mu --use_dali
26 |
27 | # ./main.py --dataset imagenet --loss entr \
28 | # --out-name ../xp/imagenet/im640k_entr_lr=${lr_0}_mu=${mu}_tau=${tau} \
29 | # --parallel-gpu --train-size 640000 --no-visdom \
30 | # --lr_0 $lr_0 --tau $tau --mu $mu --use_dali
31 |
32 | # ./main.py --dataset imagenet --loss entr \
33 | # --out-name ../xp/imagenet/imall_entr_lr=${lr_0}_mu=${mu}_tau=${tau} \
34 | # --parallel-gpu --no-visdom \
35 | # --lr_0 $lr_0 --tau $tau --mu $mu --use_dali
36 |
--------------------------------------------------------------------------------
/smooth-topk/src/scripts/imagenet_subsets_lml.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | lr_0=1
4 | tau=1.
5 | mu=0.00025
6 |
7 | cd $(dirname $0)/..
8 |
9 | # source ~/imagenet-fast.sh
10 | # source ~/.private
11 | export CUDA_VISIBLE_DEVICES=2
12 |
13 | # ./main.py --dataset imagenet --loss lml \
14 | # --out-name ../xp/imagenet/im64k_lml_lr=${lr_0}_mu=${mu}_tau=${tau} \
15 | # --parallel-gpu --train-size 64000 --no-visdom \
16 | # --lr_0 $lr_0 --tau $tau --mu $mu --use_dali
17 |
18 | # ./main.py --dataset imagenet --loss lml \
19 | # --out-name ../xp/imagenet/im128k_lml_lr=${lr_0}_mu=${mu}_tau=${tau} \
20 | # --parallel-gpu --train-size 128000 --no-visdom \
21 | # --lr_0 $lr_0 --tau $tau --mu $mu --use_dali
22 |
23 | # ./main.py --dataset imagenet --loss lml \
24 | # --out-name ../xp/imagenet/im320k_lml_lr=${lr_0}_mu=${mu}_tau=${tau} \
25 | # --parallel-gpu --train-size 320000 --no-visdom \
26 | # --lr_0 $lr_0 --tau $tau --mu $mu --use_dali
27 |
28 | # ./main.py --dataset imagenet --loss lml \
29 | # --out-name ../xp/imagenet/im640k_lml_lr=${lr_0}_mu=${mu}_tau=${tau} \
30 | # --parallel-gpu --train-size 640000 --no-visdom \
31 | # --lr_0 $lr_0 --tau $tau --mu $mu --use_dali
32 |
33 | # ./main.py --dataset imagenet --loss lml \
34 | # --out-name ../xp/imagenet/imall_lml_lr=${lr_0}_mu=${mu}_tau=${tau} \
35 | # --parallel-gpu --no-visdom \
36 | # --lr_0 $lr_0 --tau $tau --mu $mu --use_dali
37 |
--------------------------------------------------------------------------------
/smooth-topk/src/scripts/imagenet_subsets_svm.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | lr_0=1
4 | tau=0.1
5 | mu=2.5e-4
6 |
7 | cd $(dirname $0)/..
8 | export CUDA_VISIBLE_DEVICES=0
9 |
10 | source ~/imagenet-fast.sh
11 |
12 | # python main.py --dataset imagenet --loss svm --out-name ../xp/imagenet/im64k_svm \
13 | # --parallel-gpu --train-size 64000 --lr_0 $lr_0 --tau $tau --mu $mu --no-visdom;
14 |
15 | python main.py --dataset imagenet --loss svm --out-name ../xp/imagenet/im128k_svm \
16 | --parallel-gpu --train-size 128000 --lr_0 $lr_0 --tau $tau --mu $mu --no-visdom;
17 |
18 | # python main.py --dataset imagenet --loss svm --out-name ../xp/imagenet/im320k_svm \
19 | # --parallel-gpu --train-size 320000 --lr_0 $lr_0 --tau $tau --mu $mu --no-visdom;
20 |
21 | # python main.py --dataset imagenet --loss svm --out-name ../xp/imagenet/im640k_svm \
22 | # --parallel-gpu --train-size 640000 --lr_0 $lr_0 --tau $tau --mu $mu --no-visdom;
23 |
24 | # python main.py --dataset imagenet --loss svm --out-name ../xp/imagenet/imall_svm \
25 | # --parallel-gpu --lr_0 $lr_0 --tau $tau --mu $mu --no-visdom;
26 |
--------------------------------------------------------------------------------
/smooth-topk/src/scripts/perf-all.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | cd $(dirname $0)/..
4 |
5 | export CUDA_VISIBLE_DEVICES=0
6 | export OMP_NUM_THREADS=1
7 |
8 | for NCLS in 1000 10000; do
9 | for K in 5 50 100; do
10 | ./scripts/perf.py --n_classes $NCLS --k $K --n_trials 50
11 | ./scripts/perf.py --n_classes $NCLS --k $K --n_trials 50 --no-cuda
12 | done
13 | done
14 |
--------------------------------------------------------------------------------
/smooth-topk/src/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/smooth-topk/src/tests/__init__.py
--------------------------------------------------------------------------------
/smooth-topk/src/tests/py_ref.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import scipy.misc as sp
3 | import itertools
4 |
5 | from tests.utils import to_numpy
6 |
7 |
8 | def log1mexp_py(x):
9 | x = to_numpy(x).astype(np.float128)
10 | res = np.log(-np.expm1(x))
11 | return res
12 |
13 |
14 | def max_svm_py(scores, y_truth, alpha=1.):
15 |
16 | scores = scores.data.numpy()
17 | y_truth = y_truth.data.numpy()
18 |
19 | objective = 0
20 | n_samples = scores.shape[0]
21 | n_classes = scores.shape[1]
22 | for i in range(n_samples):
23 | # find maximally violated constraint
24 | loss_augmented = np.array([scores[i, y] + alpha * int(y != y_truth[i])
25 | for y in range(n_classes)])
26 | y_star = np.argmax(loss_augmented)
27 |
28 | # update metrics
29 | delta = int(y_truth[i] != y_star) * alpha
30 | objective += max(delta + scores[i, y_star] - scores[i, y_truth[i]], 0)
31 |
32 | objective *= 1. / n_samples
33 |
34 | return objective
35 |
36 |
37 | def svm_topk_max_py(scores, y_truth, k):
38 |
39 | assert k > 1
40 |
41 | scores = scores.data.numpy()
42 | y_truth = y_truth.data.numpy()
43 |
44 | objective = 0
45 | n_samples = scores.shape[0]
46 | n_classes = scores.shape[1]
47 | for i in range(n_samples):
48 | # all scores for sample i except ground truth score
49 | scores_ = np.array([scores[i, y] for y in range(n_classes)
50 | if y != y_truth[i]])
51 |
52 | # k maximal scores excluding y_truth + loss of 1
53 | obj_1 = np.mean(np.sort(scores_)[-k:]) + 1.
54 |
55 | # k - 1 maximal scores excluding y_truth + score of y_truth
56 | obj_2 = (np.sum(np.sort(scores_)[-k + 1:]) + scores[i, y_truth[i]]) / k
57 |
58 | # update metrics
59 | objective += max(obj_1, obj_2) - obj_2
60 |
61 | objective *= 1. / n_samples
62 |
63 | return objective
64 |
65 |
66 | def smooth_svm_py(x, y, tau):
67 | x, y = to_numpy(x), to_numpy(y)
68 | n_samples, n_classes = x.shape
69 | scores = x + np.not_equal(np.arange(n_classes)[None, :], y[:, None]) - \
70 | x[np.arange(n_samples), y][:, None]
71 | loss = tau * np.mean(sp.logsumexp(scores / tau, axis=1))
72 | return loss
73 |
74 |
75 | def sum_product_py(x, k):
76 | x = to_numpy(x)
77 | n_samples, n_classes = x.shape
78 | res = np.zeros(n_samples)
79 | for indices in itertools.combinations(range(n_classes), k):
80 | res += np.product(x[:, indices], axis=1)
81 | return res
82 |
83 |
84 | def svm_topk_smooth_py_1(x, y, tau, k):
85 | x, y = to_numpy(x), to_numpy(y)
86 | x = x.astype(np.float128)
87 | tau = float(tau)
88 | n_samples, n_classes = x.shape
89 | exp = np.exp(x * 1. / (k * tau))
90 |
91 | term_1 = np.zeros(n_samples)
92 | for indices in itertools.combinations(range(n_classes), k):
93 | delta = 1. - np.sum(indices == y[:, None], axis=1)
94 | term_1 += np.product(exp[:, indices], axis=1) * np.exp(delta / tau)
95 |
96 | term_2 = np.zeros(n_samples)
97 | for i in range(n_samples):
98 | all_but_y = [j for j in range(n_classes) if j != y[i]]
99 | for indices in itertools.combinations(all_but_y, k - 1):
100 | term_2[i] += np.product(exp[i, indices]) * exp[i, y[i]]
101 |
102 | loss = tau * (np.log(term_1) - np.log(term_2))
103 |
104 | return loss
105 |
106 |
107 | def svm_topk_smooth_py_2(x, y, tau, k):
108 | x, y = to_numpy(x), to_numpy(y)
109 | n_samples, n_classes = x.shape
110 | exp = np.exp(x * 1. / (k * tau))
111 |
112 | term_1 = np.zeros(n_samples)
113 | for i in range(n_samples):
114 | all_but_y = [j for j in range(n_classes) if j != y[i]]
115 | for indices in itertools.combinations(all_but_y, k - 1):
116 | term_1[i] += np.product(exp[i, indices])
117 |
118 | term_2 = np.zeros(n_samples)
119 | for i in range(n_samples):
120 | all_but_y = [j for j in range(n_classes) if j != y[i]]
121 | for indices in itertools.combinations(all_but_y, k):
122 | term_2[i] += np.product(exp[i, indices])
123 |
124 | all_ = np.arange(n_samples)
125 | loss = tau * (np.log(term_1 * exp[all_, y] + np.exp(1. / tau) * term_2) -
126 | np.log(term_1 * exp[all_, y]))
127 | return loss
128 |
--------------------------------------------------------------------------------
/smooth-topk/src/tests/test_log.py:
--------------------------------------------------------------------------------
1 | import math
2 | import torch
3 | import unittest
4 | import numpy as np
5 |
6 | from losses.logarithm import LogTensor, log1mexp
7 | from tests.utils import assert_all_close, V
8 | from tests.py_ref import log1mexp_py
9 |
10 |
11 | class TestLogTensor(unittest.TestCase):
12 |
13 | def setUp(self):
14 |
15 | torch.manual_seed(1234)
16 |
17 | self.n_element = 50
18 | self.x = torch.randn(self.n_element).abs()
19 | self.y = torch.randn(self.n_element).abs()
20 | self.nonzero_const = np.random.rand()
21 |
22 | def testSumTensors(self):
23 |
24 | sum_ = LogTensor(V(self.x)) + LogTensor(V(self.y))
25 | res_sb = sum_.torch()
26 | res_th = torch.log(torch.exp(self.x.double()) +
27 | torch.exp(self.y.double()))
28 |
29 | assert_all_close(res_th, res_sb)
30 |
31 | def testSumNonZero(self):
32 |
33 | sum_ = LogTensor(V(self.x)) + self.nonzero_const
34 | res_sb = sum_.torch()
35 | res_th = torch.log(torch.exp(self.x.double()) +
36 | self.nonzero_const)
37 |
38 | assert_all_close(res_th, res_sb)
39 |
40 | def testSumZero(self):
41 |
42 | sum_ = LogTensor(V(self.x)) + 0
43 | res_sb = sum_.torch()
44 | res_th = self.x
45 |
46 | assert_all_close(res_th, res_sb)
47 |
48 | def testMulTensors(self):
49 |
50 | sum_ = LogTensor(V(self.x)) * LogTensor(V(self.y))
51 | res_sb = sum_.torch()
52 | res_th = self.x.double() + self.y.double()
53 |
54 | assert_all_close(res_th, res_sb)
55 |
56 | def testMulNonZero(self):
57 |
58 | sum_ = LogTensor(V(self.x)) * self.nonzero_const
59 | res_sb = sum_.torch()
60 | res_th = self.x.double() + math.log(self.nonzero_const)
61 |
62 | assert_all_close(res_th, res_sb)
63 |
64 | def testMulZero(self):
65 |
66 | sum_ = LogTensor(V(self.x)) * 0
67 | res_sb = sum_.torch()
68 | res_th = -np.inf * np.ones(res_sb.size())
69 |
70 | assert_all_close(res_th, res_sb)
71 |
72 |
73 | class Test1MExp(unittest.TestCase):
74 |
75 | def setUp(self):
76 | torch.manual_seed(1234)
77 | shape = (100, 100)
78 | self.x = -torch.randn(shape).abs()
79 |
80 |
81 | def gen_test_exp1m(scale):
82 | def test(cls):
83 | x = cls.x * 10 ** scale
84 | res_th = log1mexp(x)
85 | res_py = log1mexp_py(x)
86 | assert_all_close(res_th, res_py, rtol=1e-4, atol=1e-5)
87 | return test
88 |
89 |
90 | def add_scale_tests_1mexp():
91 | for scale in (-3, -2, -1, 0, 1, 2, 3, 4):
92 | test = gen_test_exp1m(scale)
93 | test_name = 'test_scale_{}'.format(str(scale))
94 | setattr(Test1MExp, test_name, test)
95 |
96 |
97 | add_scale_tests_1mexp()
98 |
--------------------------------------------------------------------------------
/smooth-topk/src/tests/test_losses.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import unittest
3 | import numpy as np
4 |
5 | from torch.autograd import Variable
6 | from losses.svm import SmoothTop1SVM, SmoothTopkSVM, MaxTop1SVM, MaxTopkSVM
7 | from losses.functional import Topk_Smooth_SVM
8 | from tests.utils import assert_all_close, V
9 | from tests.py_ref import svm_topk_smooth_py_1, svm_topk_smooth_py_2,\
10 | smooth_svm_py, max_svm_py, svm_topk_max_py
11 |
12 | from torch.autograd.gradcheck import gradcheck
13 |
14 |
15 | class TestMaxSVM(unittest.TestCase):
16 |
17 | def setUp(self):
18 |
19 | torch.manual_seed(1234)
20 | np.random.seed(1234)
21 |
22 | self.n_samples = 20
23 | self.n_classes = 7
24 | self.alpha = 1.
25 | self.x = torch.randn(self.n_samples, self.n_classes)
26 | self.y = torch.from_numpy(np.random.randint(0, self.n_classes,
27 | size=self.n_samples))
28 | self.k = 3
29 |
30 | def testMaxSVM(self):
31 |
32 | max_svm_th = MaxTop1SVM(self.n_classes, alpha=self.alpha)
33 | res_th = max_svm_th(V(self.x), V(self.y))
34 | res_py = max_svm_py(V(self.x), V(self.y), alpha=self.alpha)
35 |
36 | assert_all_close(res_th, res_py)
37 |
38 | def testMaxSVMtopk(self):
39 |
40 | max_svm_th = MaxTopkSVM(self.n_classes, k=self.k)
41 | res_th = max_svm_th(V(self.x), V(self.y))
42 | res_py = svm_topk_max_py(V(self.x), V(self.y), k=self.k)
43 |
44 | assert_all_close(res_th, res_py)
45 |
46 |
47 | class TestSmoothSVM(unittest.TestCase):
48 |
49 | def setUp(self):
50 |
51 | torch.manual_seed(1234)
52 | np.random.seed(1234)
53 |
54 | self.n_samples = 20
55 | self.n_classes = 7
56 | self.tau = float(2.)
57 | self.x = torch.randn(self.n_samples, self.n_classes)
58 | self.y = torch.from_numpy(np.random.randint(0, self.n_classes,
59 | size=self.n_samples))
60 |
61 | def testSmoothSVM(self):
62 |
63 | smooth_svm_th = SmoothTop1SVM(self.n_classes, tau=self.tau)
64 | res_th = smooth_svm_th(V(self.x), V(self.y))
65 | res_py = smooth_svm_py(V(self.x), V(self.y), self.tau)
66 |
67 | assert_all_close(res_th, res_py)
68 |
69 |
70 | class TestSmoothSVMTopk(unittest.TestCase):
71 |
72 | def setUp(self):
73 |
74 | torch.manual_seed(1234)
75 | np.random.seed(1234)
76 |
77 | self.n_samples = 2
78 | self.n_classes = 7
79 | self.k = 5
80 | self.tau = float(2.)
81 | self.x = torch.randn(self.n_samples, self.n_classes)
82 | self.y = torch.from_numpy(np.random.randint(0, self.n_classes,
83 | size=self.n_samples))
84 | self.labels = torch.from_numpy(np.arange(self.n_classes))
85 |
86 | def testSmoothSVMpy(self):
87 |
88 | res_py_1 = svm_topk_smooth_py_1(V(self.x), V(self.y), self.tau, self.k)
89 | res_py_2 = svm_topk_smooth_py_2(V(self.x), V(self.y), self.tau, self.k)
90 |
91 | assert_all_close(res_py_1, res_py_2)
92 |
93 | def testSmoothSVMth_functional(self):
94 |
95 | F = Topk_Smooth_SVM(self.labels, self.k, self.tau)
96 | res_th = F(V(self.x), V(self.y))
97 | res_py = svm_topk_smooth_py_1(V(self.x), V(self.y), self.tau, self.k)
98 |
99 | assert_all_close(res_th, res_py)
100 |
101 | def testSmoothSVMth_loss(self):
102 |
103 | svm_topk_smooth_th = SmoothTopkSVM(self.n_classes, tau=self.tau,
104 | k=self.k)
105 | res_th = svm_topk_smooth_th(V(self.x), V(self.y))
106 | res_py = svm_topk_smooth_py_1(V(self.x),
107 | V(self.y),
108 | self.tau, self.k).mean()
109 |
110 | assert_all_close(res_th, res_py)
111 |
112 | def testSmoothSVMth_loss_scales(self):
113 |
114 | svm_topk_smooth_th = SmoothTopkSVM(self.n_classes, tau=self.tau, k=self.k)
115 | for scale in (1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3):
116 | x = self.x * scale
117 | res_th = svm_topk_smooth_th(V(x), V(self.y))
118 | res_py = svm_topk_smooth_py_1(V(x), V(self.y), self.tau, self.k).mean()
119 | assert_all_close(res_th, res_py)
120 |
121 | def testGradSmoothSVMth_loss(self):
122 |
123 | svm_topk_smooth_th = SmoothTopkSVM(self.n_classes, tau=self.tau, k=self.k)
124 | for scale in (1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4):
125 | x = self.x * scale
126 | x = Variable(x, requires_grad=True)
127 | assert gradcheck(lambda x: svm_topk_smooth_th(x, V(self.y)),
128 | (x,), atol=1e-2, rtol=1e-3, eps=max(1e-4 * scale, 1e-2)), \
129 | "failed with scale {}".format(scale)
130 |
--------------------------------------------------------------------------------
/smooth-topk/src/tests/test_sum_product.py:
--------------------------------------------------------------------------------
1 | import math
2 | import torch
3 | import unittest
4 | import numpy as np
5 |
6 | from losses.polynomial import LogSumExp
7 | from tests.utils import assert_all_close, V
8 | from tests.py_ref import sum_product_py
9 | from tests.th_ref import log_sum_exp_k
10 | from torch.autograd import Variable, gradcheck
11 |
12 |
13 | class TestSumProduct(unittest.TestCase):
14 |
15 | def setUp(self):
16 |
17 | torch.set_printoptions(linewidth=160, threshold=1e3)
18 |
19 | seed = 7
20 | np.random.seed(1234)
21 | seed = np.random.randint(1e5)
22 | torch.manual_seed(seed)
23 |
24 | self.eps = 1e-4
25 |
26 | def testLogSumProductExp(self):
27 |
28 | self.n_samples = 25
29 | self.n_classes = 20
30 | self.k = 7
31 | self.x = torch.randn(self.n_samples, self.n_classes)
32 |
33 | res_th = LogSumExp(self.k, p=1)(V(self.x)).squeeze()
34 | res1_th, res2_th = res_th[0], res_th[1]
35 | res1_py = np.log(sum_product_py(V(torch.exp(self.x)), self.k - 1))
36 | res2_py = np.log(sum_product_py(V(torch.exp(self.x)), self.k))
37 |
38 | assert_all_close(res1_th, res1_py)
39 | assert_all_close(res2_th, res2_py)
40 |
41 | def test_backward(self):
42 |
43 | self.n_samples = 25
44 | self.n_classes = 1000
45 | self.k = 100
46 | self.k = 20
47 | self.x = torch.randn(self.n_samples, self.n_classes)
48 | self.x, _ = torch.sort(self.x, dim=1, descending=True)
49 |
50 | for tau in (5e-3, 1e-2, 5e-2, 1e-1, 5e-1, 1, 5, 1e1, 5e2, 1e3):
51 | x = self.x / (tau * self.k)
52 | top, _ = x.topk(self.k + 1, 1)
53 | thresh = 1e2
54 | hard = torch.ge(top[:, self.k - 1] - top[:, self.k],
55 | math.log(thresh))
56 | smooth = hard.eq(0)
57 |
58 | x = x[smooth.unsqueeze(1).expand_as(x)].view(-1, x.size(1))
59 | if not x.size():
60 | print('empty tensor')
61 | return
62 |
63 | X_auto = Variable(x.double(), requires_grad=True)
64 | X_man = Variable(x, requires_grad=True)
65 |
66 | res1_auto, res2_auto = log_sum_exp_k(X_auto, self.k)
67 | res1_auto, res2_auto = res1_auto.squeeze(), res2_auto.squeeze()
68 |
69 | res_man = LogSumExp(self.k)(X_man).squeeze()
70 | res1_man = res_man[0]
71 | res2_man = res_man[1]
72 |
73 | proj1 = torch.randn(res1_auto.size()).fill_(1)
74 | proj2 = torch.randn(res2_auto.size()).fill_(1)
75 |
76 | proj_auto = torch.dot(V(proj1.double()), res1_auto) +\
77 | torch.dot(V(proj2.double()), res2_auto)
78 | proj_man = torch.dot(V(proj1), res1_man) +\
79 | torch.dot(V(proj2), res2_man)
80 | proj_auto.backward()
81 | proj_man.backward()
82 |
83 | # check forward
84 | assert_all_close(res1_auto, res1_man, atol=1e0, rtol=1e-3)
85 | assert_all_close(res2_auto, res2_man, atol=1e0, rtol=1e-3)
86 |
87 | # check backward
88 | assert_all_close(X_auto.grad, X_man.grad, atol=0.05, rtol=1e-2)
--------------------------------------------------------------------------------
/smooth-topk/src/tests/th_ref.py:
--------------------------------------------------------------------------------
1 | from losses.polynomial.multiplication import Multiplication
2 | from losses.polynomial.divide_conquer import divide_and_conquer
3 |
4 |
5 | def log_sum_exp_k(x, k):
6 | # number of samples and number of coefficients to compute
7 | n_s = x.size(0)
8 |
9 | assert k <= x.size(1)
10 |
11 | # clone to allow in-place operations
12 | x = x.clone()
13 |
14 | # pre-compute normalization
15 | x_summed = x.sum(1)
16 |
17 | # invert in log-space
18 | x.t_().mul_(-1)
19 |
20 | # initialize polynomials (in log-space)
21 | x = [x, x.clone().fill_(0)]
22 |
23 | # polynomial mulitplications
24 | log_res = divide_and_conquer(x, k, mul=Multiplication(k))
25 |
26 | # re-normalize
27 | coeff = log_res + x_summed[None, :]
28 |
29 | # avoid broadcasting issues (in particular if n_s = 1)
30 | coeff = coeff.view(k + 1, n_s)
31 |
32 | return coeff[k - 1: k + 1]
33 |
--------------------------------------------------------------------------------
/smooth-topk/src/tests/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 | from torch.autograd import Variable
5 |
6 |
7 | def V(x, requires_grad=False):
8 | """
9 | returns clone of tensor x wrapped in a Variable
10 | Avoids issue of inplace operations if x used in several functions
11 | """
12 | assert torch.is_tensor(x)
13 | return Variable(x.clone(), requires_grad=requires_grad)
14 |
15 |
16 | def to_numpy(tensor):
17 | if isinstance(tensor, Variable):
18 | tensor = tensor.data
19 | if torch.is_tensor(tensor):
20 | tensor = tensor.clone().cpu().numpy()
21 | if not hasattr(tensor, '__len__'):
22 | tensor = np.array([tensor])
23 | assert isinstance(tensor, np.ndarray)
24 | tensor = tensor.squeeze()
25 | return tensor
26 |
27 |
28 | def assert_all_close(tensor_1, tensor_2, rtol=1e-4, atol=1e-4):
29 | tensor_1 = to_numpy(tensor_1).astype(np.float64)
30 | tensor_2 = to_numpy(tensor_2).astype(np.float64)
31 | np.testing.assert_equal(np.isposinf(tensor_1),
32 | np.isposinf(tensor_2))
33 | np.testing.assert_equal(np.isneginf(tensor_1),
34 | np.isneginf(tensor_2))
35 | indices = np.isfinite(tensor_1)
36 | if indices.sum():
37 | tensor_1 = tensor_1[indices]
38 | tensor_2 = tensor_2[indices]
39 | err = np.max(np.abs(tensor_1 - tensor_2))
40 | err_msg = "Max abs error: {0:.3g}".format(err)
41 | np.testing.assert_allclose(tensor_1, tensor_2, rtol=rtol, atol=atol,
42 | err_msg=err_msg)
43 |
--------------------------------------------------------------------------------