├── .gitignore ├── LICENSE.mit ├── README.md ├── images ├── lml.png └── polytope.png ├── lml.py ├── neural-motifs ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── config.py ├── dataloaders │ ├── __init__.py │ ├── blob.py │ ├── image_transforms.py │ ├── mscoco.py │ └── visual_genome.py ├── docs │ ├── LICENSE.md │ ├── _config.yaml │ ├── _includes │ │ └── image.html │ ├── _layouts │ │ └── default.html │ ├── index.md │ ├── teaser.png │ └── upload.sh ├── lib │ ├── __init__.py │ ├── draw_rectangles │ │ ├── draw_rectangles.c │ │ ├── draw_rectangles.pyx │ │ └── setup.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── sg_eval.py │ │ ├── sg_eval_all_rel_cates.py │ │ ├── sg_eval_slow.py │ │ └── test_sg_eval.py │ ├── fpn │ │ ├── anchor_targets.py │ │ ├── box_intersections_cpu │ │ │ ├── bbox.c │ │ │ ├── bbox.pyx │ │ │ └── setup.py │ │ ├── box_utils.py │ │ ├── generate_anchors.py │ │ ├── make.sh │ │ ├── nms │ │ │ ├── Makefile │ │ │ ├── build.py │ │ │ ├── functions │ │ │ │ └── nms.py │ │ │ └── src │ │ │ │ ├── cuda │ │ │ │ ├── Makefile │ │ │ │ ├── nms_kernel.cu │ │ │ │ └── nms_kernel.h │ │ │ │ ├── nms_cuda.c │ │ │ │ └── nms_cuda.h │ │ ├── proposal_assignments │ │ │ ├── proposal_assignments_det.py │ │ │ ├── proposal_assignments_gtbox.py │ │ │ ├── proposal_assignments_postnms.py │ │ │ ├── proposal_assignments_rel.py │ │ │ └── rel_assignments.py │ │ └── roi_align │ │ │ ├── Makefile │ │ │ ├── __init__.py │ │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ └── roi_align │ │ │ │ └── __init__.py │ │ │ ├── build.py │ │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ │ └── src │ │ │ ├── cuda │ │ │ ├── Makefile │ │ │ ├── roi_align_kernel.cu │ │ │ └── roi_align_kernel.h │ │ │ ├── roi_align_cuda.c │ │ │ └── roi_align_cuda.h │ ├── get_dataset_counts.py │ ├── get_union_boxes.py │ ├── lstm │ │ ├── __init__.py │ │ ├── decoder_rnn.py │ │ └── highway_lstm_cuda │ │ │ ├── __init__.py │ │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ └── highway_lstm_layer │ │ │ │ └── __init__.py │ │ │ ├── alternating_highway_lstm.py │ │ │ ├── build.py │ │ │ ├── make.sh │ │ │ └── src │ │ │ ├── highway_lstm_cuda.c │ │ │ ├── highway_lstm_cuda.h │ │ │ ├── highway_lstm_kernel.cu │ │ │ └── highway_lstm_kernel.h │ ├── object_detector.py │ ├── pytorch_misc.py │ ├── rel_model.py │ ├── rel_model_stanford.py │ ├── resnet.py │ ├── sparse_targets.py │ ├── surgery.py │ └── word_vectors.py ├── misc │ ├── __init__.py │ └── motifs.py ├── models │ ├── _visualize.py │ ├── eval_rel_count.py │ ├── eval_rels.py │ ├── eval_vis.py │ ├── train_detector.py │ └── train_rels.py └── scripts │ ├── eval_models_sgcls.sh │ ├── eval_models_sgdet.sh │ ├── pretrain_detector.sh │ ├── refine_for_detection.sh │ ├── train_models_sgcls.sh │ ├── train_motifnet.sh │ ├── train_predcls.sh │ └── train_stanford.sh ├── setup.py └── smooth-topk ├── .gitignore ├── LICENSE ├── README.md ├── requirements.txt └── src ├── cli.py ├── data ├── __init__.py ├── main.py └── utils.py ├── epoch.py ├── losses ├── __init__.py ├── entr.py ├── functional.py ├── lml_loss.py ├── logarithm.py ├── main.py ├── ml.py ├── polynomial │ ├── __init__.py │ ├── divide_conquer.py │ ├── grad.py │ ├── multiplication.py │ └── sp.py ├── svm.py └── utils.py ├── main.py ├── models ├── __init__.py ├── cifar.py ├── densenet.py ├── main.py └── parser.py ├── scripts ├── cifar100_noise_ce.sh ├── cifar100_noise_entr.sh ├── cifar100_noise_lml.sh ├── cifar100_noise_ml.sh ├── cifar100_noise_svm.sh ├── eval.py ├── imagenet_split.py ├── imagenet_subsets.sh ├── imagenet_subsets_ce.sh ├── imagenet_subsets_entr.sh ├── imagenet_subsets_lml.sh ├── imagenet_subsets_svm.sh ├── perf-all.sh └── perf.py ├── tests ├── __init__.py ├── py_ref.py ├── test_log.py ├── test_losses.py ├── test_sum_product.py ├── th_ref.py └── utils.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | checkpoints 2 | data* 3 | 4 | _ext 5 | 6 | *.o 7 | *.lprof 8 | *.pkl 9 | *.egg-info 10 | dist 11 | build 12 | 13 | neural-motifs/motifnet_* -------------------------------------------------------------------------------- /LICENSE.mit: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright 2019 Intel AI, CMU, Bosch AI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # The Limited Multi-Label Projection Layer 2 | 3 | This repository is by 4 | [Brandon Amos](http://bamos.github.io), 5 | [Vladlen Koltun](http://vladlen.info/), 6 | and 7 | [J. Zico Kolter](http://zicokolter.com/) and 8 | contains the PyTorch library and source code to reproduce the 9 | experiments in our tech report on 10 | [The Limited Multi-Label Projection Layer](https://arxiv.org/abs/1906.08707). 11 | 12 | ![](./images/polytope.png) 13 | ![](./images/lml.png) 14 | 15 | --- 16 | 17 | We provide the LML layer as a PyTorch module in `lml.py`. 18 | You can install it with: 19 | 20 | ``` 21 | pip install git+git://github.com/locuslab/lml.git 22 | ``` 23 | 24 | A simple usage example to project a 5-dimensional vector 25 | onto the LML polytope with two active elements is: 26 | 27 | ```python 28 | import torch 29 | from lml import LML 30 | 31 | x = 10.*torch.randn(5) # tensor([ -4.0695, 10.8666, 13.0867, -7.1431, -14.7220]) 32 | y = LML(N=2)(x) # tensor([5.8745e-04, 9.9945e-01, 9.9994e-01, 2.7187e-05, 1.3897e-08])) 33 | ``` 34 | 35 | # Top-k Image Classification 36 | In the `smooth-topk` directory, we have connected the LML layer to the 37 | PyTorch experiments in the 38 | [oval-group/smooth-topk](https://github.com/oval-group/smooth-topk) 39 | repository. 40 | We ran these experiments with PyTorch 1.0. 41 | 42 | A single LML training run can be done from the `smooth-topk/src` directory with 43 | 44 | ``` 45 | ./main.py --dataset cifar100 --model densenet40-40 --out-name /tmp/lml-cifar --loss lml --noise 0.0 --seed 0 --no-visdom 46 | ``` 47 | 48 | Coordinating all of the CIFAR-100 experiments can be done with 49 | the `./scripts/cifar100_noise_*.sh` scripts. 50 | 51 | We have also added an option to use the 52 | [NVIDIA/DALI](https://github.com/NVIDIA/DALI) 53 | library for pre-processing ImageNet images on the GPU, 54 | but [DALI currently has known memory leaks](https://github.com/NVIDIA/DALI/issues/344) 55 | that currently cause the experiments to crash and 56 | run out of memory. 57 | 58 | # Neural Motifs: Scene Graph Generation 59 | 60 | In the `neural-motifs` directory, we have connected the LML layer to the 61 | PyTorch experiments in the 62 | [rowanz/neural-motifs](https://github.com/rowanz/neural-motifs) 63 | repository. 64 | The `README` in this directory provides more details about 65 | setting up and running the experiments. 66 | The original code has not been updated to the latest version of 67 | PyTorch and these experiments should be run with PyTorch 0.3. 68 | 69 | A single LML training run can be done from the `neural-motifs` directory with 70 | 71 | ``` 72 | python3 models/train_rels.py -m predcls -model motifnet -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 -p 10 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -ckpt checkpoints/vg-faster-rcnn.tar -save_dir /tmp/lml-nm -nepoch 50 -use_bias --lml_topk 20 73 | ``` 74 | 75 | Coordinating all of the experiments can be done with 76 | `/scripts/train_predcls.sh`. 77 | 78 | # Licensing and Citations 79 | 80 | Our LML layer in `lml.py` is licensed under the MIT license. 81 | All other code in this repository remains under the 82 | original licensing. 83 | 84 | If you find this repository helpful in your publications, 85 | please consider citing our paper. 86 | 87 | ``` 88 | @article{amos2019limited, 89 | title={{The Limited Multi-Label Projection Layer}}, 90 | author={Brandon Amos and Vladlen Koltun and J. Zico Kolter}, 91 | journal={arXiv preprint arXiv:1906.08707}, 92 | year={2019} 93 | } 94 | ``` 95 | -------------------------------------------------------------------------------- /images/lml.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/images/lml.png -------------------------------------------------------------------------------- /images/polytope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/images/polytope.png -------------------------------------------------------------------------------- /neural-motifs/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *.cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # Jupyter Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # SageMath parsed files 79 | *.sage.py 80 | 81 | # dotenv 82 | .env 83 | 84 | # virtualenv 85 | .venv 86 | venv/ 87 | ENV/ 88 | 89 | # Spyder project settings 90 | .spyderproject 91 | .spyproject 92 | 93 | # Rope project settings 94 | .ropeproject 95 | 96 | # mkdocs documentation 97 | /site 98 | 99 | # mypy 100 | .mypy_cache/ 101 | -------------------------------------------------------------------------------- /neural-motifs/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Rowan Zellers 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /neural-motifs/Makefile: -------------------------------------------------------------------------------- 1 | export PATH := /usr/local/cuda-9.1/bin:$(PATH) 2 | 3 | all: draw_rectangles box_intersections nms roi_align lstm 4 | 5 | draw_rectangles: 6 | cd lib/draw_rectangles; python setup.py build_ext --inplace 7 | box_intersections: 8 | cd lib/fpn/box_intersections_cpu; python setup.py build_ext --inplace 9 | nms: 10 | cd lib/fpn/nms; make 11 | roi_align: 12 | cd lib/fpn/roi_align; make 13 | lstm: 14 | cd lib/lstm/highway_lstm_cuda; ./make.sh 15 | -------------------------------------------------------------------------------- /neural-motifs/README.md: -------------------------------------------------------------------------------- 1 | This directory and README are from the 2 | [rowanz/neural-motifs](https://github.com/rowanz/neural-motifs) 3 | repository. 4 | Our modifications remain under the same license. 5 | 6 | 7 | --- 8 | 9 | # neural-motifs 10 | Code for Neural Motifs: Scene Graph Parsing with Global Context (CVPR 2018) 11 | 12 | This repository contains data and code for the paper [Neural Motifs: Scene Graph Parsing with Global Context](https://arxiv.org/abs/1711.06640v2) - now updated for the CVPR camera ready! this should be the same as what's available via the CVPR site when that appears, just it has the supplemental section rolled into the same PDF. For the project page (as well as links to the baseline checkpoints), check out [rowanzellers.com/neuralmotifs](https://rowanzellers.com/neuralmotifs). If the paper significantly inspires you, we request that you cite our work: 13 | 14 | ### Bibtex 15 | 16 | ``` 17 | @inproceedings{zellers2018scenegraphs, 18 | title={Neural Motifs: Scene Graph Parsing with Global Context}, 19 | author={Zellers, Rowan and Yatskar, Mark and Thomson, Sam and Choi, Yejin}, 20 | booktitle = "Conference on Computer Vision and Pattern Recognition", 21 | year={2018} 22 | } 23 | ``` 24 | # Setup 25 | 26 | 27 | 0. Install python3.6 and pytorch 3. I recommend the [Anaconda distribution](https://repo.continuum.io/archive/). To install PyTorch if you haven't already, use 28 | ```conda install pytorch=0.3.0 torchvision=0.2.0 cuda90 -c pytorch```. 29 | 30 | 1. Update the config file with the dataset paths. Specifically: 31 | - Visual Genome (the VG_100K folder, image_data.json, VG-SGG.h5, and VG-SGG-dicts.json). See data/stanford_filtered/README.md for the steps I used to download these. 32 | - You'll also need to fix your PYTHONPATH: ```export PYTHONPATH=/home/rowan/code/scene-graph``` 33 | 34 | 2. Compile everything. run ```make``` in the main directory: this compiles the Bilinear Interpolation operation for the RoIs as well as the Highway LSTM. 35 | 36 | 3. Pretrain VG detection. The old version involved pretraining COCO as well, but we got rid of that for simplicity. Run ./scripts/pretrain_detector.sh 37 | Note: You might have to modify the learning rate and batch size, particularly if you don't have 3 Titan X GPUs (which is what I used). [You can also download the pretrained detector checkpoint here.](https://drive.google.com/open?id=11zKRr2OF5oclFL47kjFYBOxScotQzArX) 38 | 39 | 4. Train VG scene graph classification: run ./scripts/train_models_sgcls.sh 2 (will run on GPU 2). OR, download the MotifNet-cls checkpoint here: [Motifnet-SGCls/PredCls](https://drive.google.com/open?id=12qziGKYjFD3LAnoy4zDT3bcg5QLC0qN6). 40 | 5. Refine for detection: run ./scripts/refine_for_detection.sh 2 or download the [Motifnet-SGDet](https://drive.google.com/open?id=1thd_5uSamJQaXAPVGVOUZGAOfGCYZYmb) checkpoint. 41 | 6. Evaluate: Refer to the scripts ./scripts/eval_models_sg[cls/det].sh. 42 | 43 | # help 44 | 45 | Feel free to open an issue if you encounter trouble getting it to work! 46 | -------------------------------------------------------------------------------- /neural-motifs/dataloaders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/dataloaders/__init__.py -------------------------------------------------------------------------------- /neural-motifs/dataloaders/image_transforms.py: -------------------------------------------------------------------------------- 1 | # Some image transforms 2 | 3 | from PIL import Image, ImageOps, ImageFilter, ImageEnhance 4 | import numpy as np 5 | from random import randint 6 | # All of these need to be called on PIL imagez 7 | 8 | class SquarePad(object): 9 | def __call__(self, img): 10 | w, h = img.size 11 | img_padded = ImageOps.expand(img, border=(0, 0, max(h - w, 0), max(w - h, 0)), 12 | fill=(int(0.485 * 256), int(0.456 * 256), int(0.406 * 256))) 13 | return img_padded 14 | 15 | 16 | class Grayscale(object): 17 | """ 18 | Converts to grayscale (not always, sometimes). 19 | """ 20 | def __call__(self, img): 21 | factor = np.sqrt(np.sqrt(np.random.rand(1))) 22 | # print("gray {}".format(factor)) 23 | enhancer = ImageEnhance.Color(img) 24 | return enhancer.enhance(factor) 25 | 26 | 27 | class Brightness(object): 28 | """ 29 | Converts to grayscale (not always, sometimes). 30 | """ 31 | def __call__(self, img): 32 | factor = np.random.randn(1)/6+1 33 | factor = min(max(factor, 0.5), 1.5) 34 | # print("brightness {}".format(factor)) 35 | 36 | enhancer = ImageEnhance.Brightness(img) 37 | return enhancer.enhance(factor) 38 | 39 | 40 | class Contrast(object): 41 | """ 42 | Converts to grayscale (not always, sometimes). 43 | """ 44 | def __call__(self, img): 45 | factor = np.random.randn(1)/8+1.0 46 | factor = min(max(factor, 0.5), 1.5) 47 | # print("contrast {}".format(factor)) 48 | 49 | enhancer = ImageEnhance.Contrast(img) 50 | return enhancer.enhance(factor) 51 | 52 | 53 | class Hue(object): 54 | """ 55 | Converts to grayscale 56 | """ 57 | def __call__(self, img): 58 | # 30 seems good 59 | factor = int(np.random.randn(1)*8) 60 | factor = min(max(factor, -30), 30) 61 | factor = np.array(factor, dtype=np.uint8) 62 | 63 | hsv = np.array(img.convert('HSV')) 64 | hsv[:,:,0] += factor 65 | new_img = Image.fromarray(hsv, 'HSV').convert('RGB') 66 | 67 | return new_img 68 | 69 | 70 | class Sharpness(object): 71 | """ 72 | Converts to grayscale 73 | """ 74 | def __call__(self, img): 75 | factor = 1.0 + np.random.randn(1)/5 76 | # print("sharpness {}".format(factor)) 77 | enhancer = ImageEnhance.Sharpness(img) 78 | return enhancer.enhance(factor) 79 | 80 | 81 | def random_crop(img, boxes, box_scale, round_boxes=True, max_crop_fraction=0.1): 82 | """ 83 | Randomly crops the image 84 | :param img: PIL image 85 | :param boxes: Ground truth boxes 86 | :param box_scale: This is the scale that the boxes are at (e.g. 1024 wide). We'll preserve that ratio 87 | :param round_boxes: Set this to true if we're going to round the boxes to ints 88 | :return: Cropped image, new boxes 89 | """ 90 | 91 | w, h = img.size 92 | 93 | max_crop_w = int(w*max_crop_fraction) 94 | max_crop_h = int(h*max_crop_fraction) 95 | boxes_scaled = boxes * max(w,h) / box_scale 96 | max_to_crop_top = min(int(boxes_scaled[:, 1].min()), max_crop_h) 97 | max_to_crop_left = min(int(boxes_scaled[:, 0].min()), max_crop_w) 98 | max_to_crop_right = min(int(w - boxes_scaled[:, 2].max()), max_crop_w) 99 | max_to_crop_bottom = min(int(h - boxes_scaled[:, 3].max()), max_crop_h) 100 | 101 | crop_top = randint(0, max(max_to_crop_top, 0)) 102 | crop_left = randint(0, max(max_to_crop_left, 0)) 103 | crop_right = randint(0, max(max_to_crop_right, 0)) 104 | crop_bottom = randint(0, max(max_to_crop_bottom, 0)) 105 | img_cropped = img.crop((crop_left, crop_top, w - crop_right, h - crop_bottom)) 106 | 107 | new_boxes = box_scale / max(img_cropped.size) * np.column_stack( 108 | (boxes_scaled[:,0]-crop_left, boxes_scaled[:,1]-crop_top, boxes_scaled[:,2]-crop_left, boxes_scaled[:,3]-crop_top)) 109 | 110 | if round_boxes: 111 | new_boxes = np.round(new_boxes).astype(np.int32) 112 | return img_cropped, new_boxes 113 | 114 | 115 | class RandomOrder(object): 116 | """ Composes several transforms together in random order - or not at all! 117 | """ 118 | 119 | def __init__(self, transforms): 120 | self.transforms = transforms 121 | 122 | def __call__(self, img): 123 | if self.transforms is None: 124 | return img 125 | num_to_pick = np.random.choice(len(self.transforms)) 126 | if num_to_pick == 0: 127 | return img 128 | 129 | order = np.random.choice(len(self.transforms), size=num_to_pick, replace=False) 130 | for i in order: 131 | img = self.transforms[i](img) 132 | return img -------------------------------------------------------------------------------- /neural-motifs/docs/LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Heiswayi Nrird 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /neural-motifs/docs/_config.yaml: -------------------------------------------------------------------------------- 1 | exclude: [README.md, LICENSE.md] 2 | 3 | defaults: 4 | - values: 5 | layout: default 6 | -------------------------------------------------------------------------------- /neural-motifs/docs/_includes/image.html: -------------------------------------------------------------------------------- 1 |
2 | {{ include.description }} 3 |
4 | 5 | -------------------------------------------------------------------------------- /neural-motifs/docs/_layouts/default.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | {{ page.title }} 9 | 10 | 11 | 12 | 13 | 62 | 75 | 76 | 77 | 78 | 79 | {{ content }} 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /neural-motifs/docs/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | permalink: / 3 | title: Neural Motifs 4 | author: Rowan Zellers 5 | description: Scene Graph Parsing with Global Context (CVPR 2018) 6 | google_analytics_id: UA-84290243-3 7 | --- 8 | # Neural Motifs: Scene Graph Parsing with Global Context (CVPR 2018) 9 | 10 | ### by [Rowan Zellers](https://rowanzellers.com), [Mark Yatskar](https://homes.cs.washington.edu/~my89/), [Sam Thomson](https://http://samthomson.com/), [Yejin Choi](https://homes.cs.washington.edu/~yejin/) 11 | 12 | 13 | {% include image.html url="teaser.png" description="teaser" %} 14 | 15 | # Overview 16 | 17 | * In this work, we investigate the problem of producing structured graph representations of visual scenes. Similar to object detection, we must predict a box around each object. Here, we also need to predict an edge (with one of several labels, possibly `background`) between every ordered pair of boxes, producing a directed graph where the edges hopefully represent the semantics and interactions present in the scene. 18 | * We present an analysis of the [Visual Genome Scene Graphs dataset](http://visualgenome.org/). In particular: 19 | * Object labels (e.g. person, shirt) are highly predictive of edge labels (e.g. wearing), but **not vice versa**. 20 | * Over 90% of the edges in the dataset are non-semantic. 21 | * There is a significant amount of structure in the dataset, in the form of graph motifs (regularly appearing substructures). 22 | * Motivated by our analysis, we present a simple baseline that outperforms previous approaches. 23 | * We introduce Stacked Motif Networks (MotifNet), which is a novel architecture that is designed to capture higher order motifs in scene graphs. In doing so, it achieves a sizeable performance gain over prior state-of-the-art. 24 | 25 | # Read the paper! 26 | The old version of the paper is available at [arxiv link](https://arxiv.org/abs/1711.06640) - camera ready version coming soon! 27 | 28 | # Bibtex 29 | ``` 30 | @inproceedings{zellers2018scenegraphs, 31 | title={Neural Motifs: Scene Graph Parsing with Global Context}, 32 | author={Zellers, Rowan and Yatskar, Mark and Thomson, Sam and Choi, Yejin}, 33 | booktitle = "Conference on Computer Vision and Pattern Recognition", 34 | year={2018} 35 | } 36 | ``` 37 | 38 | # View some examples! 39 | 40 | Check out [this tool](https://rowanzellers.com/scenegraph2/) I made to visualize the scene graph predictions. Disclaimer: the predictions are from an earlier version of the model, but hopefully they're still helpful! 41 | 42 | # Code 43 | 44 | Visit the [`neural-motifs` GitHub repository](https://github.com/rowanz/neural-motifs) for our reference implementation and instructions for running our code. 45 | 46 | It is released under the MIT license. 47 | 48 | # Checkpoints available for download 49 | * [Pretrained Detector](https://drive.google.com/open?id=11zKRr2OF5oclFL47kjFYBOxScotQzArX) 50 | * [Motifnet-SGDet](https://drive.google.com/open?id=1thd_5uSamJQaXAPVGVOUZGAOfGCYZYmb) 51 | * [Motifnet-SGCls/PredCls](https://drive.google.com/open?id=12qziGKYjFD3LAnoy4zDT3bcg5QLC0qN6) 52 | 53 | # questions? 54 | 55 | Feel free to get in touch! My main website is at [rowanzellers.com](https://rowanzellers.com) 56 | -------------------------------------------------------------------------------- /neural-motifs/docs/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/docs/teaser.png -------------------------------------------------------------------------------- /neural-motifs/docs/upload.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | scp -r _site/* USERNAME@SITE:~/rowanzellers.com/neuralmotifs -------------------------------------------------------------------------------- /neural-motifs/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/lib/__init__.py -------------------------------------------------------------------------------- /neural-motifs/lib/draw_rectangles/draw_rectangles.pyx: -------------------------------------------------------------------------------- 1 | ###### 2 | # Draws rectangles 3 | ###### 4 | 5 | cimport cython 6 | import numpy as np 7 | cimport numpy as np 8 | 9 | DTYPE = np.float32 10 | ctypedef np.float32_t DTYPE_t 11 | 12 | def draw_union_boxes(bbox_pairs, pooling_size, padding=0): 13 | """ 14 | Draws union boxes for the image. 15 | :param box_pairs: [num_pairs, 8] 16 | :param fmap_size: Size of the original feature map 17 | :param stride: ratio between fmap size and original img (<1) 18 | :param pooling_size: resize everything to this size 19 | :return: [num_pairs, 2, pooling_size, pooling_size arr 20 | """ 21 | assert padding == 0, "Padding>0 not supported yet" 22 | return draw_union_boxes_c(bbox_pairs, pooling_size) 23 | 24 | cdef DTYPE_t minmax(DTYPE_t x): 25 | return min(max(x, 0), 1) 26 | 27 | cdef np.ndarray[DTYPE_t, ndim=4] draw_union_boxes_c( 28 | np.ndarray[DTYPE_t, ndim=2] box_pairs, unsigned int pooling_size): 29 | """ 30 | Parameters 31 | ---------- 32 | boxes: (N, 4) ndarray of float. everything has arbitrary ratios 33 | query_boxes: (K, 4) ndarray of float 34 | Returns 35 | ------- 36 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 37 | """ 38 | cdef unsigned int N = box_pairs.shape[0] 39 | 40 | cdef np.ndarray[DTYPE_t, ndim = 4] uboxes = np.zeros( 41 | (N, 2, pooling_size, pooling_size), dtype=DTYPE) 42 | cdef DTYPE_t x1_union, y1_union, x2_union, y2_union, w, h, x1_box, y1_box, x2_box, y2_box, y_contrib, x_contrib 43 | cdef unsigned int n, i, j, k 44 | 45 | for n in range(N): 46 | x1_union = min(box_pairs[n, 0], box_pairs[n, 4]) 47 | y1_union = min(box_pairs[n, 1], box_pairs[n, 5]) 48 | x2_union = max(box_pairs[n, 2], box_pairs[n, 6]) 49 | y2_union = max(box_pairs[n, 3], box_pairs[n, 7]) 50 | 51 | w = x2_union - x1_union 52 | h = y2_union - y1_union 53 | 54 | for i in range(2): 55 | # Now everything is in the range [0, pooling_size]. 56 | x1_box = (box_pairs[n, 0+4*i] - x1_union)*pooling_size / w 57 | y1_box = (box_pairs[n, 1+4*i] - y1_union)*pooling_size / h 58 | x2_box = (box_pairs[n, 2+4*i] - x1_union)*pooling_size / w 59 | y2_box = (box_pairs[n, 3+4*i] - y1_union)*pooling_size / h 60 | # print("{:.3f}, {:.3f}, {:.3f}, {:.3f}".format(x1_box, y1_box, x2_box, y2_box)) 61 | for j in range(pooling_size): 62 | y_contrib = minmax(j+1-y1_box)*minmax(y2_box-j) 63 | for k in range(pooling_size): 64 | x_contrib = minmax(k+1-x1_box)*minmax(x2_box-k) 65 | # print("j {} yc {} k {} xc {}".format(j, y_contrib, k, x_contrib)) 66 | uboxes[n,i,j,k] = x_contrib*y_contrib 67 | return uboxes 68 | -------------------------------------------------------------------------------- /neural-motifs/lib/draw_rectangles/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from Cython.Build import cythonize 3 | import numpy 4 | 5 | setup(name="draw_rectangles_cython", ext_modules=cythonize('draw_rectangles.pyx'), include_dirs=[numpy.get_include()]) -------------------------------------------------------------------------------- /neural-motifs/lib/evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/lib/evaluation/__init__.py -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/anchor_targets.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generates anchor targets to train the detector. Does this during the collate step in training 3 | as it's much cheaper to do this on a separate thread. 4 | 5 | Heavily adapted from faster_rcnn/rpn_msr/anchor_target_layer.py. 6 | """ 7 | import numpy as np 8 | import numpy.random as npr 9 | 10 | from config import IM_SCALE, RPN_NEGATIVE_OVERLAP, RPN_POSITIVE_OVERLAP, \ 11 | RPN_BATCHSIZE, RPN_FG_FRACTION, ANCHOR_SIZE, ANCHOR_SCALES, ANCHOR_RATIOS 12 | from lib.fpn.box_intersections_cpu.bbox import bbox_overlaps 13 | from lib.fpn.generate_anchors import generate_anchors 14 | 15 | 16 | def anchor_target_layer(gt_boxes, im_size, 17 | allowed_border=0): 18 | """ 19 | Assign anchors to ground-truth targets. Produces anchor classification 20 | labels and bounding-box regression targets. 21 | 22 | for each (H, W) location i 23 | generate 3 anchor boxes centered on cell i 24 | filter out-of-image anchors 25 | measure GT overlap 26 | 27 | :param gt_boxes: [x1, y1, x2, y2] boxes. These are assumed to be at the same scale as 28 | the image (IM_SCALE) 29 | :param im_size: Size of the image (h, w). This is assumed to be scaled to IM_SCALE 30 | """ 31 | if max(im_size) != IM_SCALE: 32 | raise ValueError("im size is {}".format(im_size)) 33 | h, w = im_size 34 | 35 | # Get the indices of the anchors in the feature map. 36 | # h, w, A, 4 37 | ans_np = generate_anchors(base_size=ANCHOR_SIZE, 38 | feat_stride=16, 39 | anchor_scales=ANCHOR_SCALES, 40 | anchor_ratios=ANCHOR_RATIOS, 41 | ) 42 | ans_np_flat = ans_np.reshape((-1, 4)) 43 | inds_inside = np.where( 44 | (ans_np_flat[:, 0] >= -allowed_border) & 45 | (ans_np_flat[:, 1] >= -allowed_border) & 46 | (ans_np_flat[:, 2] < w + allowed_border) & # width 47 | (ans_np_flat[:, 3] < h + allowed_border) # height 48 | )[0] 49 | good_ans_flat = ans_np_flat[inds_inside] 50 | if good_ans_flat.size == 0: 51 | raise ValueError("There were no good anchors for an image of size {} with boxes {}".format(im_size, gt_boxes)) 52 | 53 | # overlaps between the anchors and the gt boxes [num_anchors, num_gtboxes] 54 | overlaps = bbox_overlaps(good_ans_flat, gt_boxes) 55 | anchor_to_gtbox = overlaps.argmax(axis=1) 56 | max_overlaps = overlaps[np.arange(anchor_to_gtbox.shape[0]), anchor_to_gtbox] 57 | gtbox_to_anchor = overlaps.argmax(axis=0) 58 | gt_max_overlaps = overlaps[gtbox_to_anchor, np.arange(overlaps.shape[1])] 59 | gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] 60 | 61 | # Good anchors are those that match SOMEWHERE within a decent tolerance 62 | # label: 1 is positive, 0 is negative, -1 is dont care. 63 | # assign bg labels first so that positive labels can clobber them 64 | labels = (-1) * np.ones(overlaps.shape[0], dtype=np.int64) 65 | labels[max_overlaps < RPN_NEGATIVE_OVERLAP] = 0 66 | labels[gt_argmax_overlaps] = 1 67 | labels[max_overlaps >= RPN_POSITIVE_OVERLAP] = 1 68 | 69 | # subsample positive labels if we have too many 70 | num_fg = int(RPN_FG_FRACTION * RPN_BATCHSIZE) 71 | fg_inds = np.where(labels == 1)[0] 72 | if len(fg_inds) > num_fg: 73 | labels[npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)] = -1 74 | 75 | # subsample negative labels if we have too many 76 | num_bg = RPN_BATCHSIZE - np.sum(labels == 1) 77 | bg_inds = np.where(labels == 0)[0] 78 | if len(bg_inds) > num_bg: 79 | labels[npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)] = -1 80 | # print("{} fg {} bg ratio{:.3f} inds inside {}".format(RPN_BATCHSIZE-num_bg, num_bg, (RPN_BATCHSIZE-num_bg)/RPN_BATCHSIZE, inds_inside.shape[0])) 81 | 82 | 83 | # Get the labels at the original size 84 | labels_unmap = (-1) * np.ones(ans_np_flat.shape[0], dtype=np.int64) 85 | labels_unmap[inds_inside] = labels 86 | 87 | # h, w, A 88 | labels_unmap_res = labels_unmap.reshape(ans_np.shape[:-1]) 89 | anchor_inds = np.column_stack(np.where(labels_unmap_res >= 0)) 90 | 91 | # These ought to be in the same order 92 | anchor_inds_flat = np.where(labels >= 0)[0] 93 | anchors = good_ans_flat[anchor_inds_flat] 94 | bbox_targets = gt_boxes[anchor_to_gtbox[anchor_inds_flat]] 95 | labels = labels[anchor_inds_flat] 96 | 97 | assert np.all(labels >= 0) 98 | 99 | 100 | # Anchors: [num_used, 4] 101 | # Anchor_inds: [num_used, 3] (h, w, A) 102 | # bbox_targets: [num_used, 4] 103 | # labels: [num_used] 104 | 105 | return anchors, anchor_inds, bbox_targets, labels 106 | -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/box_intersections_cpu/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps(boxes, query_boxes): 16 | cdef np.ndarray[DTYPE_t, ndim=2] boxes_contig = np.ascontiguousarray(boxes, dtype=DTYPE) 17 | cdef np.ndarray[DTYPE_t, ndim=2] query_contig = np.ascontiguousarray(query_boxes, dtype=DTYPE) 18 | 19 | return bbox_overlaps_c(boxes_contig, query_contig) 20 | 21 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_overlaps_c( 22 | np.ndarray[DTYPE_t, ndim=2] boxes, 23 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 24 | """ 25 | Parameters 26 | ---------- 27 | boxes: (N, 4) ndarray of float 28 | query_boxes: (K, 4) ndarray of float 29 | Returns 30 | ------- 31 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 32 | """ 33 | cdef unsigned int N = boxes.shape[0] 34 | cdef unsigned int K = query_boxes.shape[0] 35 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 36 | cdef DTYPE_t iw, ih, box_area 37 | cdef DTYPE_t ua 38 | cdef unsigned int k, n 39 | for k in range(K): 40 | box_area = ( 41 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 42 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 43 | ) 44 | for n in range(N): 45 | iw = ( 46 | min(boxes[n, 2], query_boxes[k, 2]) - 47 | max(boxes[n, 0], query_boxes[k, 0]) + 1 48 | ) 49 | if iw > 0: 50 | ih = ( 51 | min(boxes[n, 3], query_boxes[k, 3]) - 52 | max(boxes[n, 1], query_boxes[k, 1]) + 1 53 | ) 54 | if ih > 0: 55 | ua = float( 56 | (boxes[n, 2] - boxes[n, 0] + 1) * 57 | (boxes[n, 3] - boxes[n, 1] + 1) + 58 | box_area - iw * ih 59 | ) 60 | overlaps[n, k] = iw * ih / ua 61 | return overlaps 62 | 63 | 64 | def bbox_intersections(boxes, query_boxes): 65 | cdef np.ndarray[DTYPE_t, ndim=2] boxes_contig = np.ascontiguousarray(boxes, dtype=DTYPE) 66 | cdef np.ndarray[DTYPE_t, ndim=2] query_contig = np.ascontiguousarray(query_boxes, dtype=DTYPE) 67 | 68 | return bbox_intersections_c(boxes_contig, query_contig) 69 | 70 | 71 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_intersections_c( 72 | np.ndarray[DTYPE_t, ndim=2] boxes, 73 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 74 | """ 75 | For each query box compute the intersection ratio covered by boxes 76 | ---------- 77 | Parameters 78 | ---------- 79 | boxes: (N, 4) ndarray of float 80 | query_boxes: (K, 4) ndarray of float 81 | Returns 82 | ------- 83 | overlaps: (N, K) ndarray of intersec between boxes and query_boxes 84 | """ 85 | cdef unsigned int N = boxes.shape[0] 86 | cdef unsigned int K = query_boxes.shape[0] 87 | cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE) 88 | cdef DTYPE_t iw, ih, box_area 89 | cdef DTYPE_t ua 90 | cdef unsigned int k, n 91 | for k in range(K): 92 | box_area = ( 93 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 94 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 95 | ) 96 | for n in range(N): 97 | iw = ( 98 | min(boxes[n, 2], query_boxes[k, 2]) - 99 | max(boxes[n, 0], query_boxes[k, 0]) + 1 100 | ) 101 | if iw > 0: 102 | ih = ( 103 | min(boxes[n, 3], query_boxes[k, 3]) - 104 | max(boxes[n, 1], query_boxes[k, 1]) + 1 105 | ) 106 | if ih > 0: 107 | intersec[n, k] = iw * ih / box_area 108 | return intersec -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/box_intersections_cpu/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from Cython.Build import cythonize 3 | import numpy 4 | 5 | setup(name="bbox_cython", ext_modules=cythonize('bbox.pyx'), include_dirs=[numpy.get_include()]) -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/box_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from torch.nn import functional as F 4 | from lib.fpn.box_intersections_cpu.bbox import bbox_overlaps as bbox_overlaps_np 5 | from lib.fpn.box_intersections_cpu.bbox import bbox_intersections as bbox_intersections_np 6 | 7 | 8 | def bbox_loss(prior_boxes, deltas, gt_boxes, eps=1e-4, scale_before=1): 9 | """ 10 | Computes the loss for predicting the GT boxes from prior boxes 11 | :param prior_boxes: [num_boxes, 4] (x1, y1, x2, y2) 12 | :param deltas: [num_boxes, 4] (tx, ty, th, tw) 13 | :param gt_boxes: [num_boxes, 4] (x1, y1, x2, y2) 14 | :return: 15 | """ 16 | prior_centers = center_size(prior_boxes) #(cx, cy, w, h) 17 | gt_centers = center_size(gt_boxes) #(cx, cy, w, h) 18 | 19 | center_targets = (gt_centers[:, :2] - prior_centers[:, :2]) / prior_centers[:, 2:] 20 | size_targets = torch.log(gt_centers[:, 2:]) - torch.log(prior_centers[:, 2:]) 21 | all_targets = torch.cat((center_targets, size_targets), 1) 22 | 23 | loss = F.smooth_l1_loss(deltas, all_targets, size_average=False)/(eps + prior_centers.size(0)) 24 | 25 | return loss 26 | 27 | 28 | def bbox_preds(boxes, deltas): 29 | """ 30 | Converts "deltas" (predicted by the network) along with prior boxes 31 | into (x1, y1, x2, y2) representation. 32 | :param boxes: Prior boxes, represented as (x1, y1, x2, y2) 33 | :param deltas: Offsets (tx, ty, tw, th) 34 | :param box_strides [num_boxes,] distance apart between boxes. anchor box can't go more than 35 | \pm box_strides/2 from its current position. If None then we'll use the widths 36 | and heights 37 | :return: Transformed boxes 38 | """ 39 | 40 | if boxes.size(0) == 0: 41 | return boxes 42 | prior_centers = center_size(boxes) 43 | 44 | xys = prior_centers[:, :2] + prior_centers[:, 2:] * deltas[:, :2] 45 | 46 | whs = torch.exp(deltas[:, 2:]) * prior_centers[:, 2:] 47 | 48 | return point_form(torch.cat((xys, whs), 1)) 49 | 50 | 51 | def center_size(boxes): 52 | """ Convert prior_boxes to (cx, cy, w, h) 53 | representation for comparison to center-size form ground truth data. 54 | Args: 55 | boxes: (tensor) point_form boxes 56 | Return: 57 | boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. 58 | """ 59 | wh = boxes[:, 2:] - boxes[:, :2] + 1.0 60 | 61 | if isinstance(boxes, np.ndarray): 62 | return np.column_stack((boxes[:, :2] + 0.5 * wh, wh)) 63 | return torch.cat((boxes[:, :2] + 0.5 * wh, wh), 1) 64 | 65 | 66 | def point_form(boxes): 67 | """ Convert prior_boxes to (xmin, ymin, xmax, ymax) 68 | representation for comparison to point form ground truth data. 69 | Args: 70 | boxes: (tensor) center-size default boxes from priorbox layers. 71 | Return: 72 | boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. 73 | """ 74 | if isinstance(boxes, np.ndarray): 75 | return np.column_stack((boxes[:, :2] - 0.5 * boxes[:, 2:], 76 | boxes[:, :2] + 0.5 * (boxes[:, 2:] - 2.0))) 77 | return torch.cat((boxes[:, :2] - 0.5 * boxes[:, 2:], 78 | boxes[:, :2] + 0.5 * (boxes[:, 2:] - 2.0)), 1) # xmax, ymax 79 | 80 | 81 | ########################################################################### 82 | ### Torch Utils, creds to Max de Groot 83 | ########################################################################### 84 | 85 | def bbox_intersections(box_a, box_b): 86 | """ We resize both tensors to [A,B,2] without new malloc: 87 | [A,2] -> [A,1,2] -> [A,B,2] 88 | [B,2] -> [1,B,2] -> [A,B,2] 89 | Then we compute the area of intersect between box_a and box_b. 90 | Args: 91 | box_a: (tensor) bounding boxes, Shape: [A,4]. 92 | box_b: (tensor) bounding boxes, Shape: [B,4]. 93 | Return: 94 | (tensor) intersection area, Shape: [A,B]. 95 | """ 96 | if isinstance(box_a, np.ndarray): 97 | assert isinstance(box_b, np.ndarray) 98 | return bbox_intersections_np(box_a, box_b) 99 | A = box_a.size(0) 100 | B = box_b.size(0) 101 | max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), 102 | box_b[:, 2:].unsqueeze(0).expand(A, B, 2)) 103 | min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), 104 | box_b[:, :2].unsqueeze(0).expand(A, B, 2)) 105 | inter = torch.clamp((max_xy - min_xy + 1.0), min=0) 106 | return inter[:, :, 0] * inter[:, :, 1] 107 | 108 | 109 | def bbox_overlaps(box_a, box_b): 110 | """Compute the jaccard overlap of two sets of boxes. The jaccard overlap 111 | is simply the intersection over union of two boxes. Here we operate on 112 | ground truth boxes and default boxes. 113 | E.g.: 114 | A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) 115 | Args: 116 | box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] 117 | box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] 118 | Return: 119 | jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] 120 | """ 121 | if isinstance(box_a, np.ndarray): 122 | assert isinstance(box_b, np.ndarray) 123 | return bbox_overlaps_np(box_a, box_b) 124 | 125 | inter = bbox_intersections(box_a, box_b) 126 | area_a = ((box_a[:, 2] - box_a[:, 0] + 1.0) * 127 | (box_a[:, 3] - box_a[:, 1] + 1.0)).unsqueeze(1).expand_as(inter) # [A,B] 128 | area_b = ((box_b[:, 2] - box_b[:, 0] + 1.0) * 129 | (box_b[:, 3] - box_b[:, 1] + 1.0)).unsqueeze(0).expand_as(inter) # [A,B] 130 | union = area_a + area_b - inter 131 | return inter / union # [A,B] 132 | 133 | 134 | def nms_overlaps(boxes): 135 | """ get overlaps for each channel""" 136 | assert boxes.dim() == 3 137 | N = boxes.size(0) 138 | nc = boxes.size(1) 139 | max_xy = torch.min(boxes[:, None, :, 2:].expand(N, N, nc, 2), 140 | boxes[None, :, :, 2:].expand(N, N, nc, 2)) 141 | 142 | min_xy = torch.max(boxes[:, None, :, :2].expand(N, N, nc, 2), 143 | boxes[None, :, :, :2].expand(N, N, nc, 2)) 144 | 145 | inter = torch.clamp((max_xy - min_xy + 1.0), min=0) 146 | 147 | # n, n, 151 148 | inters = inter[:,:,:,0]*inter[:,:,:,1] 149 | boxes_flat = boxes.view(-1, 4) 150 | areas_flat = (boxes_flat[:,2]- boxes_flat[:,0]+1.0)*( 151 | boxes_flat[:,3]- boxes_flat[:,1]+1.0) 152 | areas = areas_flat.view(boxes.size(0), boxes.size(1)) 153 | union = -inters + areas[None] + areas[:, None] 154 | return inters / union 155 | 156 | -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/generate_anchors.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | from config import IM_SCALE 8 | 9 | import numpy as np 10 | 11 | 12 | # Verify that we compute the same anchors as Shaoqing's matlab implementation: 13 | # 14 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat 15 | # >> anchors 16 | # 17 | # anchors = 18 | # 19 | # -83 -39 100 56 20 | # -175 -87 192 104 21 | # -359 -183 376 200 22 | # -55 -55 72 72 23 | # -119 -119 136 136 24 | # -247 -247 264 264 25 | # -35 -79 52 96 26 | # -79 -167 96 184 27 | # -167 -343 184 360 28 | 29 | # array([[ -83., -39., 100., 56.], 30 | # [-175., -87., 192., 104.], 31 | # [-359., -183., 376., 200.], 32 | # [ -55., -55., 72., 72.], 33 | # [-119., -119., 136., 136.], 34 | # [-247., -247., 264., 264.], 35 | # [ -35., -79., 52., 96.], 36 | # [ -79., -167., 96., 184.], 37 | # [-167., -343., 184., 360.]]) 38 | 39 | def generate_anchors(base_size=16, feat_stride=16, anchor_scales=(8,16,32), anchor_ratios=(0.5,1,2)): 40 | """ A wrapper function to generate anchors given different scales 41 | Also return the number of anchors in variable 'length' 42 | """ 43 | anchors = generate_base_anchors(base_size=base_size, 44 | ratios=np.array(anchor_ratios), 45 | scales=np.array(anchor_scales)) 46 | A = anchors.shape[0] 47 | shift_x = np.arange(0, IM_SCALE // feat_stride) * feat_stride # Same as shift_x 48 | shift_x, shift_y = np.meshgrid(shift_x, shift_x) 49 | 50 | shifts = np.stack([shift_x, shift_y, shift_x, shift_y], -1) # h, w, 4 51 | all_anchors = shifts[:, :, None] + anchors[None, None] #h, w, A, 4 52 | return all_anchors 53 | 54 | # shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() 55 | # K = shifts.shape[0] 56 | # # width changes faster, so here it is H, W, C 57 | # anchors = anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) 58 | # anchors = anchors.reshape((K * A, 4)).astype(np.float32, copy=False) 59 | # length = np.int32(anchors.shape[0]) 60 | 61 | 62 | def generate_base_anchors(base_size=16, ratios=[0.5, 1, 2], scales=2 ** np.arange(3, 6)): 63 | """ 64 | Generate anchor (reference) windows by enumerating aspect ratios X 65 | scales wrt a reference (0, 0, 15, 15) window. 66 | """ 67 | 68 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 69 | ratio_anchors = _ratio_enum(base_anchor, ratios) 70 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 71 | for i in range(ratio_anchors.shape[0])]) 72 | return anchors 73 | 74 | 75 | def _whctrs(anchor): 76 | """ 77 | Return width, height, x center, and y center for an anchor (window). 78 | """ 79 | 80 | w = anchor[2] - anchor[0] + 1 81 | h = anchor[3] - anchor[1] + 1 82 | x_ctr = anchor[0] + 0.5 * (w - 1) 83 | y_ctr = anchor[1] + 0.5 * (h - 1) 84 | return w, h, x_ctr, y_ctr 85 | 86 | 87 | def _mkanchors(ws, hs, x_ctr, y_ctr): 88 | """ 89 | Given a vector of widths (ws) and heights (hs) around a center 90 | (x_ctr, y_ctr), output a set of anchors (windows). 91 | """ 92 | 93 | ws = ws[:, np.newaxis] 94 | hs = hs[:, np.newaxis] 95 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 96 | y_ctr - 0.5 * (hs - 1), 97 | x_ctr + 0.5 * (ws - 1), 98 | y_ctr + 0.5 * (hs - 1))) 99 | return anchors 100 | 101 | 102 | def _ratio_enum(anchor, ratios): 103 | """ 104 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 105 | """ 106 | 107 | w, h, x_ctr, y_ctr = _whctrs(anchor) 108 | size = w * h 109 | size_ratios = size / ratios 110 | # NOTE: CHANGED TO NOT HAVE ROUNDING 111 | ws = np.sqrt(size_ratios) 112 | hs = ws * ratios 113 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 114 | return anchors 115 | 116 | 117 | def _scale_enum(anchor, scales): 118 | """ 119 | Enumerate a set of anchors for each scale wrt an anchor. 120 | """ 121 | 122 | w, h, x_ctr, y_ctr = _whctrs(anchor) 123 | ws = w * scales 124 | hs = h * scales 125 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 126 | return anchors 127 | -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd anchors 4 | python setup.py build_ext --inplace 5 | cd .. 6 | 7 | cd box_intersections_cpu 8 | python setup.py build_ext --inplace 9 | cd .. 10 | 11 | cd cpu_nms 12 | python build.py 13 | cd .. 14 | 15 | cd roi_align 16 | python build.py -C src/cuda clean 17 | python build.py -C src/cuda clean 18 | cd .. 19 | 20 | echo "Done compiling hopefully" 21 | -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/nms/Makefile: -------------------------------------------------------------------------------- 1 | all: src/cuda/nms.cu.o 2 | python build.py 3 | 4 | src/cuda/nms.cu.o: src/cuda/nms_kernel.cu 5 | $(MAKE) -C src/cuda 6 | 7 | clean: 8 | $(MAKE) -C src/cuda clean 9 | -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/nms/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | # Might have to export PATH=/usr/local/cuda-8.0/bin${PATH:+:${PATH}} 5 | 6 | sources = [] 7 | headers = [] 8 | defines = [] 9 | with_cuda = False 10 | 11 | if torch.cuda.is_available(): 12 | print('Including CUDA code.') 13 | sources += ['src/nms_cuda.c'] 14 | headers += ['src/nms_cuda.h'] 15 | defines += [('WITH_CUDA', None)] 16 | with_cuda = True 17 | 18 | this_file = os.path.dirname(os.path.realpath(__file__)) 19 | print(this_file) 20 | extra_objects = ['src/cuda/nms.cu.o'] 21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 22 | 23 | ffi = create_extension( 24 | '_ext.nms', 25 | headers=headers, 26 | sources=sources, 27 | define_macros=defines, 28 | relative_to=__file__, 29 | with_cuda=with_cuda, 30 | extra_objects=extra_objects 31 | ) 32 | 33 | if __name__ == '__main__': 34 | ffi.build() 35 | 36 | -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/nms/functions/nms.py: -------------------------------------------------------------------------------- 1 | # Le code for doing NMS 2 | import torch 3 | import numpy as np 4 | from .._ext import nms 5 | 6 | 7 | def apply_nms(scores, boxes, pre_nms_topn=12000, post_nms_topn=2000, boxes_per_im=None, 8 | nms_thresh=0.7): 9 | """ 10 | Note - this function is non-differentiable so everything is assumed to be a tensor, not 11 | a variable. 12 | """ 13 | just_inds = boxes_per_im is None 14 | if boxes_per_im is None: 15 | boxes_per_im = [boxes.size(0)] 16 | 17 | 18 | s = 0 19 | keep = [] 20 | im_per = [] 21 | for bpi in boxes_per_im: 22 | e = s + int(bpi) 23 | keep_im = _nms_single_im(scores[s:e], boxes[s:e], pre_nms_topn, post_nms_topn, nms_thresh) 24 | keep.append(keep_im + s) 25 | im_per.append(keep_im.size(0)) 26 | 27 | s = e 28 | 29 | inds = torch.cat(keep, 0) 30 | if just_inds: 31 | return inds 32 | return inds, im_per 33 | 34 | 35 | def _nms_single_im(scores, boxes, pre_nms_topn=12000, post_nms_topn=2000, nms_thresh=0.7): 36 | keep = torch.IntTensor(scores.size(0)) 37 | vs, idx = torch.sort(scores, dim=0, descending=True) 38 | if idx.size(0) > pre_nms_topn: 39 | idx = idx[:pre_nms_topn] 40 | boxes_sorted = boxes[idx].contiguous() 41 | num_out = nms.nms_apply(keep, boxes_sorted, nms_thresh) 42 | num_out = min(num_out, post_nms_topn) 43 | keep = keep[:num_out].long() 44 | keep = idx[keep.cuda(scores.get_device())] 45 | return keep 46 | -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/nms/src/cuda/Makefile: -------------------------------------------------------------------------------- 1 | all: nms_kernel.cu nms_kernel.h 2 | /usr/local/cuda/bin/nvcc -c -o nms.cu.o nms_kernel.cu --compiler-options -fPIC -gencode arch=compute_52,code=sm_52 -gencode arch=compute_61,code=sm_61 3 | # /usr/local/cuda/bin/nvcc -c -o nms.cu.o nms_kernel.cu --compiler-options -fPIC -gencode arch=compute_52,code=sm_52 4 | clean: 5 | rm nms.cu.o 6 | -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/nms/src/cuda/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include 9 | #include 10 | 11 | #define CUDA_CHECK(condition) \ 12 | /* Code block avoids redefinition of cudaError_t error */ \ 13 | do { \ 14 | cudaError_t error = condition; \ 15 | if (error != cudaSuccess) { \ 16 | std::cout << cudaGetErrorString(error) << std::endl; \ 17 | } \ 18 | } while (0) 19 | 20 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 21 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 22 | 23 | __device__ inline float devIoU(float const * const a, float const * const b) { 24 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 25 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 26 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 27 | float interS = width * height; 28 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 29 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 30 | return interS / (Sa + Sb - interS); 31 | } 32 | 33 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 34 | const float *dev_boxes, unsigned long long *dev_mask) { 35 | const int row_start = blockIdx.y; 36 | const int col_start = blockIdx.x; 37 | 38 | // if (row_start > col_start) return; 39 | 40 | const int row_size = 41 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 42 | const int col_size = 43 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 44 | 45 | __shared__ float block_boxes[threadsPerBlock * 5]; 46 | if (threadIdx.x < col_size) { 47 | block_boxes[threadIdx.x * 4 + 0] = 48 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 4 + 0]; 49 | block_boxes[threadIdx.x * 4 + 1] = 50 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 4 + 1]; 51 | block_boxes[threadIdx.x * 4 + 2] = 52 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 4 + 2]; 53 | block_boxes[threadIdx.x * 4 + 3] = 54 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 4 + 3]; 55 | } 56 | __syncthreads(); 57 | 58 | if (threadIdx.x < row_size) { 59 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 60 | const float *cur_box = dev_boxes + cur_box_idx * 4; 61 | int i = 0; 62 | unsigned long long t = 0; 63 | int start = 0; 64 | if (row_start == col_start) { 65 | start = threadIdx.x + 1; 66 | } 67 | for (i = start; i < col_size; i++) { 68 | if (devIoU(cur_box, block_boxes + i * 4) > nms_overlap_thresh) { 69 | t |= 1ULL << i; 70 | } 71 | } 72 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 73 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 74 | } 75 | } 76 | 77 | void _set_device(int device_id) { 78 | int current_device; 79 | CUDA_CHECK(cudaGetDevice(¤t_device)); 80 | if (current_device == device_id) { 81 | return; 82 | } 83 | // The call to cudaSetDevice must come before any calls to Get, which 84 | // may perform initialization using the GPU. 85 | CUDA_CHECK(cudaSetDevice(device_id)); 86 | } 87 | 88 | extern "C" int ApplyNMSGPU(int* keep_out, const float* boxes_dev, const int boxes_num, 89 | float nms_overlap_thresh, int device_id) { 90 | _set_device(device_id); 91 | 92 | unsigned long long* mask_dev = NULL; 93 | 94 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 95 | 96 | CUDA_CHECK(cudaMalloc(&mask_dev, 97 | boxes_num * col_blocks * sizeof(unsigned long long))); 98 | 99 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 100 | DIVUP(boxes_num, threadsPerBlock)); 101 | dim3 threads(threadsPerBlock); 102 | nms_kernel<<>>(boxes_num, 103 | nms_overlap_thresh, 104 | boxes_dev, 105 | mask_dev); 106 | 107 | std::vector mask_host(boxes_num * col_blocks); 108 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 109 | mask_dev, 110 | sizeof(unsigned long long) * boxes_num * col_blocks, 111 | cudaMemcpyDeviceToHost)); 112 | 113 | std::vector remv(col_blocks); 114 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 115 | 116 | int num_to_keep = 0; 117 | for (int i = 0; i < boxes_num; i++) { 118 | int nblock = i / threadsPerBlock; 119 | int inblock = i % threadsPerBlock; 120 | 121 | if (!(remv[nblock] & (1ULL << inblock))) { 122 | keep_out[num_to_keep++] = i; 123 | unsigned long long *p = &mask_host[0] + i * col_blocks; 124 | for (int j = nblock; j < col_blocks; j++) { 125 | remv[j] |= p[j]; 126 | } 127 | } 128 | } 129 | 130 | CUDA_CHECK(cudaFree(mask_dev)); 131 | return num_to_keep; 132 | } 133 | -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/nms/src/cuda/nms_kernel.h: -------------------------------------------------------------------------------- 1 | int ApplyNMSGPU(int* keep_out, const float* boxes_dev, const int boxes_num, 2 | float nms_overlap_thresh, int device_id); 3 | 4 | -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/nms/src/nms_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "cuda/nms_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int nms_apply(THIntTensor* keep, THCudaTensor* boxes_sorted, const float nms_thresh) 8 | { 9 | int* keep_data = THIntTensor_data(keep); 10 | const float* boxes_sorted_data = THCudaTensor_data(state, boxes_sorted); 11 | 12 | const int boxes_num = THCudaTensor_size(state, boxes_sorted, 0); 13 | 14 | const int devId = THCudaTensor_getDevice(state, boxes_sorted); 15 | 16 | int numTotalKeep = ApplyNMSGPU(keep_data, boxes_sorted_data, boxes_num, nms_thresh, devId); 17 | return numTotalKeep; 18 | } 19 | 20 | 21 | -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/nms/src/nms_cuda.h: -------------------------------------------------------------------------------- 1 | int nms_apply(THIntTensor* keep, THCudaTensor* boxes_sorted, const float nms_thresh); -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/proposal_assignments/proposal_assignments_det.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import numpy.random as npr 4 | from config import BG_THRESH_HI, BG_THRESH_LO, FG_FRACTION, ROIS_PER_IMG 5 | from lib.fpn.box_utils import bbox_overlaps 6 | from lib.pytorch_misc import to_variable 7 | import torch 8 | 9 | ############################################################# 10 | # The following is only for object detection 11 | @to_variable 12 | def proposal_assignments_det(rpn_rois, gt_boxes, gt_classes, image_offset, fg_thresh=0.5): 13 | """ 14 | Assign object detection proposals to ground-truth targets. Produces proposal 15 | classification labels and bounding-box regression targets. 16 | :param rpn_rois: [img_ind, x1, y1, x2, y2] 17 | :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1 18 | :param gt_classes: [num_boxes, 2] array of [img_ind, class] 19 | :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) 20 | :return: 21 | rois: [num_rois, 5] 22 | labels: [num_rois] array of labels 23 | bbox_targets [num_rois, 4] array of targets for the labels. 24 | """ 25 | fg_rois_per_image = int(np.round(ROIS_PER_IMG * FG_FRACTION)) 26 | 27 | gt_img_inds = gt_classes[:, 0] - image_offset 28 | 29 | all_boxes = torch.cat([rpn_rois[:, 1:], gt_boxes], 0) 30 | 31 | ims_per_box = torch.cat([rpn_rois[:, 0].long(), gt_img_inds], 0) 32 | 33 | im_sorted, idx = torch.sort(ims_per_box, 0) 34 | all_boxes = all_boxes[idx] 35 | 36 | # Assume that the GT boxes are already sorted in terms of image id 37 | num_images = int(im_sorted[-1]) + 1 38 | 39 | labels = [] 40 | rois = [] 41 | bbox_targets = [] 42 | for im_ind in range(num_images): 43 | g_inds = (gt_img_inds == im_ind).nonzero() 44 | 45 | if g_inds.dim() == 0: 46 | continue 47 | g_inds = g_inds.squeeze(1) 48 | g_start = g_inds[0] 49 | g_end = g_inds[-1] + 1 50 | 51 | t_inds = (im_sorted == im_ind).nonzero().squeeze(1) 52 | t_start = t_inds[0] 53 | t_end = t_inds[-1] + 1 54 | 55 | # Max overlaps: for each predicted box, get the max ROI 56 | # Get the indices into the GT boxes too (must offset by the box start) 57 | ious = bbox_overlaps(all_boxes[t_start:t_end], gt_boxes[g_start:g_end]) 58 | max_overlaps, gt_assignment = ious.max(1) 59 | max_overlaps = max_overlaps.cpu().numpy() 60 | # print("Best overlap is {}".format(max_overlaps.max())) 61 | # print("\ngt assignment is {} while g_start is {} \n ---".format(gt_assignment, g_start)) 62 | gt_assignment += g_start 63 | 64 | keep_inds_np, num_fg = _sel_inds(max_overlaps, fg_thresh, fg_rois_per_image, 65 | ROIS_PER_IMG) 66 | 67 | if keep_inds_np.size == 0: 68 | continue 69 | 70 | keep_inds = torch.LongTensor(keep_inds_np).cuda(rpn_rois.get_device()) 71 | 72 | labels_ = gt_classes[:, 1][gt_assignment[keep_inds]] 73 | bbox_target_ = gt_boxes[gt_assignment[keep_inds]] 74 | 75 | # Clamp labels_ for the background RoIs to 0 76 | if num_fg < labels_.size(0): 77 | labels_[num_fg:] = 0 78 | 79 | rois_ = torch.cat(( 80 | im_sorted[t_start:t_end, None][keep_inds].float(), 81 | all_boxes[t_start:t_end][keep_inds], 82 | ), 1) 83 | 84 | labels.append(labels_) 85 | rois.append(rois_) 86 | bbox_targets.append(bbox_target_) 87 | 88 | rois = torch.cat(rois, 0) 89 | labels = torch.cat(labels, 0) 90 | bbox_targets = torch.cat(bbox_targets, 0) 91 | return rois, labels, bbox_targets 92 | 93 | 94 | def _sel_inds(max_overlaps, fg_thresh=0.5, fg_rois_per_image=128, rois_per_image=256): 95 | # Select foreground RoIs as those with >= FG_THRESH overlap 96 | fg_inds = np.where(max_overlaps >= fg_thresh)[0] 97 | 98 | # Guard against the case when an image has fewer than fg_rois_per_image 99 | # foreground RoIs 100 | fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.shape[0]) 101 | # Sample foreground regions without replacement 102 | if fg_inds.size > 0: 103 | fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) 104 | 105 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) 106 | bg_inds = np.where((max_overlaps < BG_THRESH_HI) & (max_overlaps >= BG_THRESH_LO))[0] 107 | 108 | # Compute number of background RoIs to take from this image (guarding 109 | # against there being fewer than desired) 110 | bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image 111 | bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) 112 | # Sample background regions without replacement 113 | if bg_inds.size > 0: 114 | bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) 115 | 116 | return np.append(fg_inds, bg_inds), fg_rois_per_this_image 117 | 118 | -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/proposal_assignments/proposal_assignments_gtbox.py: -------------------------------------------------------------------------------- 1 | from lib.pytorch_misc import enumerate_by_image, gather_nd, random_choose 2 | from lib.fpn.box_utils import bbox_preds, center_size, bbox_overlaps 3 | import torch 4 | from lib.pytorch_misc import diagonal_inds, to_variable 5 | from config import RELS_PER_IMG, REL_FG_FRACTION 6 | 7 | 8 | @to_variable 9 | def proposal_assignments_gtbox(rois, gt_boxes, gt_classes, gt_rels, image_offset, fg_thresh=0.5): 10 | """ 11 | Assign object detection proposals to ground-truth targets. Produces proposal 12 | classification labels and bounding-box regression targets. 13 | :param rpn_rois: [img_ind, x1, y1, x2, y2] 14 | :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1]. Not needed it seems 15 | :param gt_classes: [num_boxes, 2] array of [img_ind, class] 16 | Note, the img_inds here start at image_offset 17 | :param gt_rels [num_boxes, 4] array of [img_ind, box_0, box_1, rel type]. 18 | Note, the img_inds here start at image_offset 19 | :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) 20 | :return: 21 | rois: [num_rois, 5] 22 | labels: [num_rois] array of labels 23 | bbox_targets [num_rois, 4] array of targets for the labels. 24 | rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type) 25 | """ 26 | im_inds = rois[:,0].long() 27 | 28 | num_im = im_inds[-1] + 1 29 | 30 | # Offset the image indices in fg_rels to refer to absolute indices (not just within img i) 31 | fg_rels = gt_rels.clone() 32 | fg_rels[:,0] -= image_offset 33 | offset = {} 34 | for i, s, e in enumerate_by_image(im_inds): 35 | offset[i] = s 36 | for i, s, e in enumerate_by_image(fg_rels[:, 0]): 37 | fg_rels[s:e, 1:3] += offset[i] 38 | 39 | # Try ALL things, not just intersections. 40 | is_cand = (im_inds[:, None] == im_inds[None]) 41 | is_cand.view(-1)[diagonal_inds(is_cand)] = 0 42 | 43 | # # Compute salience 44 | # gt_inds = fg_rels[:, 1:3].contiguous().view(-1) 45 | # labels_arange = labels.data.new(labels.size(0)) 46 | # torch.arange(0, labels.size(0), out=labels_arange) 47 | # salience_labels = ((gt_inds[:, None] == labels_arange[None]).long().sum(0) > 0).long() 48 | # labels = torch.stack((labels, salience_labels), 1) 49 | 50 | # Add in some BG labels 51 | 52 | # NOW WE HAVE TO EXCLUDE THE FGs. 53 | # TODO: check if this causes an error if many duplicate GTs havent been filtered out 54 | 55 | is_cand.view(-1)[fg_rels[:,1]*im_inds.size(0) + fg_rels[:,2]] = 0 56 | is_bgcand = is_cand.nonzero() 57 | # TODO: make this sample on a per image case 58 | # If too many then sample 59 | num_fg = min(fg_rels.size(0), int(RELS_PER_IMG * REL_FG_FRACTION * num_im)) 60 | if num_fg < fg_rels.size(0): 61 | fg_rels = random_choose(fg_rels, num_fg) 62 | 63 | # If too many then sample 64 | num_bg = min(is_bgcand.size(0) if is_bgcand.dim() > 0 else 0, 65 | int(RELS_PER_IMG * num_im) - num_fg) 66 | if num_bg > 0: 67 | bg_rels = torch.cat(( 68 | im_inds[is_bgcand[:, 0]][:, None], 69 | is_bgcand, 70 | (is_bgcand[:, 0, None] < -10).long(), 71 | ), 1) 72 | 73 | if num_bg < is_bgcand.size(0): 74 | bg_rels = random_choose(bg_rels, num_bg) 75 | rel_labels = torch.cat((fg_rels, bg_rels), 0) 76 | else: 77 | rel_labels = fg_rels 78 | 79 | 80 | # last sort by rel. 81 | _, perm = torch.sort(rel_labels[:, 0]*(gt_boxes.size(0)**2) + 82 | rel_labels[:,1]*gt_boxes.size(0) + rel_labels[:,2]) 83 | 84 | rel_labels = rel_labels[perm].contiguous() 85 | 86 | labels = gt_classes[:,1].contiguous() 87 | return rois, labels, rel_labels 88 | -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/proposal_assignments/proposal_assignments_postnms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Goal: assign ROIs to targets 3 | # -------------------------------------------------------- 4 | 5 | 6 | import numpy as np 7 | import numpy.random as npr 8 | from .proposal_assignments_rel import _sel_rels 9 | from lib.fpn.box_utils import bbox_overlaps 10 | from lib.pytorch_misc import to_variable 11 | import torch 12 | 13 | 14 | @to_variable 15 | def proposal_assignments_postnms( 16 | rois, gt_boxes, gt_classes, gt_rels, nms_inds, image_offset, fg_thresh=0.5, 17 | max_objs=100, max_rels=100, rand_val=0.01): 18 | """ 19 | Assign object detection proposals to ground-truth targets. Produces proposal 20 | classification labels and bounding-box regression targets. 21 | :param rpn_rois: [img_ind, x1, y1, x2, y2] 22 | :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1] 23 | :param gt_classes: [num_boxes, 2] array of [img_ind, class] 24 | :param gt_rels [num_boxes, 4] array of [img_ind, box_0, box_1, rel type] 25 | :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) 26 | :return: 27 | rois: [num_rois, 5] 28 | labels: [num_rois] array of labels 29 | rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type) 30 | """ 31 | pred_inds_np = rois[:, 0].cpu().numpy().astype(np.int64) 32 | pred_boxes_np = rois[:, 1:].cpu().numpy() 33 | nms_inds_np = nms_inds.cpu().numpy() 34 | sup_inds_np = np.setdiff1d(np.arange(pred_boxes_np.shape[0]), nms_inds_np) 35 | 36 | # split into chosen and suppressed 37 | chosen_inds_np = pred_inds_np[nms_inds_np] 38 | chosen_boxes_np = pred_boxes_np[nms_inds_np] 39 | 40 | suppre_inds_np = pred_inds_np[sup_inds_np] 41 | suppre_boxes_np = pred_boxes_np[sup_inds_np] 42 | 43 | gt_boxes_np = gt_boxes.cpu().numpy() 44 | gt_classes_np = gt_classes.cpu().numpy() 45 | gt_rels_np = gt_rels.cpu().numpy() 46 | 47 | gt_classes_np[:, 0] -= image_offset 48 | gt_rels_np[:, 0] -= image_offset 49 | 50 | num_im = gt_classes_np[:, 0].max()+1 51 | 52 | rois = [] 53 | obj_labels = [] 54 | rel_labels = [] 55 | num_box_seen = 0 56 | 57 | for im_ind in range(num_im): 58 | chosen_ind = np.where(chosen_inds_np == im_ind)[0] 59 | suppre_ind = np.where(suppre_inds_np == im_ind)[0] 60 | 61 | gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0] 62 | gt_boxes_i = gt_boxes_np[gt_ind] 63 | gt_classes_i = gt_classes_np[gt_ind, 1] 64 | gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:] 65 | 66 | # Get IOUs between chosen and GT boxes and if needed we'll add more in 67 | 68 | chosen_boxes_i = chosen_boxes_np[chosen_ind] 69 | suppre_boxes_i = suppre_boxes_np[suppre_ind] 70 | 71 | n_chosen = chosen_boxes_i.shape[0] 72 | n_suppre = suppre_boxes_i.shape[0] 73 | n_gt_box = gt_boxes_i.shape[0] 74 | 75 | # add a teensy bit of random noise because some GT boxes might be duplicated, etc. 76 | pred_boxes_i = np.concatenate((chosen_boxes_i, suppre_boxes_i, gt_boxes_i), 0) 77 | ious = bbox_overlaps(pred_boxes_i, gt_boxes_i) + rand_val*( 78 | np.random.rand(pred_boxes_i.shape[0], gt_boxes_i.shape[0])-0.5) 79 | 80 | # Let's say that a box can only be assigned ONCE for now because we've already done 81 | # the NMS and stuff. 82 | is_hit = ious > fg_thresh 83 | 84 | obj_assignments_i = is_hit.argmax(1) 85 | obj_assignments_i[~is_hit.any(1)] = -1 86 | 87 | vals, first_occurance_ind = np.unique(obj_assignments_i, return_index=True) 88 | obj_assignments_i[np.setdiff1d( 89 | np.arange(obj_assignments_i.shape[0]), first_occurance_ind)] = -1 90 | 91 | extra_to_add = np.where(obj_assignments_i[n_chosen:] != -1)[0] + n_chosen 92 | 93 | # Add them in somewhere at random 94 | num_inds_to_have = min(max_objs, n_chosen + extra_to_add.shape[0]) 95 | boxes_i = np.zeros((num_inds_to_have, 4), dtype=np.float32) 96 | labels_i = np.zeros(num_inds_to_have, dtype=np.int64) 97 | 98 | inds_from_nms = np.sort(np.random.choice(num_inds_to_have, size=n_chosen, replace=False)) 99 | inds_from_elsewhere = np.setdiff1d(np.arange(num_inds_to_have), inds_from_nms) 100 | 101 | boxes_i[inds_from_nms] = chosen_boxes_i 102 | labels_i[inds_from_nms] = gt_classes_i[obj_assignments_i[:n_chosen]] 103 | 104 | boxes_i[inds_from_elsewhere] = pred_boxes_i[extra_to_add] 105 | labels_i[inds_from_elsewhere] = gt_classes_i[obj_assignments_i[extra_to_add]] 106 | 107 | # Now, we do the relationships. same as for rle 108 | all_rels_i = _sel_rels(bbox_overlaps(boxes_i, gt_boxes_i), 109 | boxes_i, 110 | labels_i, 111 | gt_classes_i, 112 | gt_rels_i, 113 | fg_thresh=fg_thresh, 114 | fg_rels_per_image=100) 115 | all_rels_i[:,0:2] += num_box_seen 116 | 117 | rois.append(np.column_stack(( 118 | im_ind * np.ones(boxes_i.shape[0], dtype=np.float32), 119 | boxes_i, 120 | ))) 121 | obj_labels.append(labels_i) 122 | rel_labels.append(np.column_stack(( 123 | im_ind*np.ones(all_rels_i.shape[0], dtype=np.int64), 124 | all_rels_i, 125 | ))) 126 | num_box_seen += boxes_i.size 127 | 128 | rois = torch.FloatTensor(np.concatenate(rois, 0)).cuda(gt_boxes.get_device(), async=True) 129 | labels = torch.LongTensor(np.concatenate(obj_labels, 0)).cuda(gt_boxes.get_device(), async=True) 130 | rel_labels = torch.LongTensor(np.concatenate(rel_labels, 0)).cuda(gt_boxes.get_device(), 131 | async=True) 132 | 133 | return rois, labels, rel_labels 134 | -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/proposal_assignments/rel_assignments.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Goal: assign ROIs to targets 3 | # -------------------------------------------------------- 4 | 5 | 6 | import numpy as np 7 | import numpy.random as npr 8 | from config import BG_THRESH_HI, BG_THRESH_LO, REL_FG_FRACTION, RELS_PER_IMG_REFINE 9 | from lib.fpn.box_utils import bbox_overlaps 10 | from lib.pytorch_misc import to_variable, nonintersecting_2d_inds 11 | from collections import defaultdict 12 | import torch 13 | 14 | @to_variable 15 | def rel_assignments(im_inds, rpn_rois, roi_gtlabels, gt_boxes, gt_classes, gt_rels, image_offset, 16 | fg_thresh=0.5, num_sample_per_gt=4, filter_non_overlap=True): 17 | """ 18 | Assign object detection proposals to ground-truth targets. Produces proposal 19 | classification labels and bounding-box regression targets. 20 | :param rpn_rois: [img_ind, x1, y1, x2, y2] 21 | :param gt_boxes: [num_boxes, 4] array of x0, y0, x1, y1] 22 | :param gt_classes: [num_boxes, 2] array of [img_ind, class] 23 | :param gt_rels [num_boxes, 4] array of [img_ind, box_0, box_1, rel type] 24 | :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) 25 | :return: 26 | rois: [num_rois, 5] 27 | labels: [num_rois] array of labels 28 | bbox_targets [num_rois, 4] array of targets for the labels. 29 | rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type) 30 | """ 31 | fg_rels_per_image = int(np.round(REL_FG_FRACTION * 64)) 32 | 33 | pred_inds_np = im_inds.cpu().numpy() 34 | pred_boxes_np = rpn_rois.cpu().numpy() 35 | pred_boxlabels_np = roi_gtlabels.cpu().numpy() 36 | gt_boxes_np = gt_boxes.cpu().numpy() 37 | gt_classes_np = gt_classes.cpu().numpy() 38 | gt_rels_np = gt_rels.cpu().numpy() 39 | 40 | gt_classes_np[:, 0] -= image_offset 41 | gt_rels_np[:, 0] -= image_offset 42 | 43 | num_im = gt_classes_np[:, 0].max()+1 44 | 45 | # print("Pred inds {} pred boxes {} pred box labels {} gt classes {} gt rels {}".format( 46 | # pred_inds_np, pred_boxes_np, pred_boxlabels_np, gt_classes_np, gt_rels_np 47 | # )) 48 | 49 | rel_labels = [] 50 | num_box_seen = 0 51 | for im_ind in range(num_im): 52 | pred_ind = np.where(pred_inds_np == im_ind)[0] 53 | 54 | gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0] 55 | gt_boxes_i = gt_boxes_np[gt_ind] 56 | gt_classes_i = gt_classes_np[gt_ind, 1] 57 | gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:] 58 | 59 | # [num_pred, num_gt] 60 | pred_boxes_i = pred_boxes_np[pred_ind] 61 | pred_boxlabels_i = pred_boxlabels_np[pred_ind] 62 | 63 | ious = bbox_overlaps(pred_boxes_i, gt_boxes_i) 64 | is_match = (pred_boxlabels_i[:,None] == gt_classes_i[None]) & (ious >= fg_thresh) 65 | 66 | # FOR BG. Limit ourselves to only IOUs that overlap, but are not the exact same box 67 | pbi_iou = bbox_overlaps(pred_boxes_i, pred_boxes_i) 68 | if filter_non_overlap: 69 | rel_possibilities = (pbi_iou < 1) & (pbi_iou > 0) 70 | rels_intersect = rel_possibilities 71 | else: 72 | rel_possibilities = np.ones((pred_boxes_i.shape[0], pred_boxes_i.shape[0]), 73 | dtype=np.int64) - np.eye(pred_boxes_i.shape[0], 74 | dtype=np.int64) 75 | rels_intersect = (pbi_iou < 1) & (pbi_iou > 0) 76 | 77 | # ONLY select relations between ground truth because otherwise we get useless data 78 | rel_possibilities[pred_boxlabels_i == 0] = 0 79 | rel_possibilities[:, pred_boxlabels_i == 0] = 0 80 | 81 | # Sample the GT relationships. 82 | fg_rels = [] 83 | p_size = [] 84 | for i, (from_gtind, to_gtind, rel_id) in enumerate(gt_rels_i): 85 | fg_rels_i = [] 86 | fg_scores_i = [] 87 | 88 | for from_ind in np.where(is_match[:, from_gtind])[0]: 89 | for to_ind in np.where(is_match[:, to_gtind])[0]: 90 | if from_ind != to_ind: 91 | fg_rels_i.append((from_ind, to_ind, rel_id)) 92 | fg_scores_i.append((ious[from_ind, from_gtind] * ious[to_ind, to_gtind])) 93 | rel_possibilities[from_ind, to_ind] = 0 94 | if len(fg_rels_i) == 0: 95 | continue 96 | p = np.array(fg_scores_i) 97 | p = p / p.sum() 98 | p_size.append(p.shape[0]) 99 | num_to_add = min(p.shape[0], num_sample_per_gt) 100 | for rel_to_add in npr.choice(p.shape[0], p=p, size=num_to_add, replace=False): 101 | fg_rels.append(fg_rels_i[rel_to_add]) 102 | 103 | fg_rels = np.array(fg_rels, dtype=np.int64) 104 | if fg_rels.size > 0 and fg_rels.shape[0] > fg_rels_per_image: 105 | fg_rels = fg_rels[npr.choice(fg_rels.shape[0], size=fg_rels_per_image, replace=False)] 106 | elif fg_rels.size == 0: 107 | fg_rels = np.zeros((0, 3), dtype=np.int64) 108 | 109 | bg_rels = np.column_stack(np.where(rel_possibilities)) 110 | bg_rels = np.column_stack((bg_rels, np.zeros(bg_rels.shape[0], dtype=np.int64))) 111 | 112 | num_bg_rel = min(64 - fg_rels.shape[0], bg_rels.shape[0]) 113 | if bg_rels.size > 0: 114 | # Sample 4x as many intersecting relationships as non-intersecting. 115 | # bg_rels_intersect = rels_intersect[bg_rels[:, 0], bg_rels[:, 1]] 116 | # p = bg_rels_intersect.astype(np.float32) 117 | # p[bg_rels_intersect == 0] = 0.2 118 | # p[bg_rels_intersect == 1] = 0.8 119 | # p /= p.sum() 120 | bg_rels = bg_rels[ 121 | np.random.choice(bg_rels.shape[0], 122 | #p=p, 123 | size=num_bg_rel, replace=False)] 124 | else: 125 | bg_rels = np.zeros((0, 3), dtype=np.int64) 126 | 127 | if fg_rels.size == 0 and bg_rels.size == 0: 128 | # Just put something here 129 | bg_rels = np.array([[0, 0, 0]], dtype=np.int64) 130 | 131 | # print("GTR {} -> AR {} vs {}".format(gt_rels.shape, fg_rels.shape, bg_rels.shape)) 132 | all_rels_i = np.concatenate((fg_rels, bg_rels), 0) 133 | all_rels_i[:,0:2] += num_box_seen 134 | 135 | all_rels_i = all_rels_i[np.lexsort((all_rels_i[:,1], all_rels_i[:,0]))] 136 | 137 | rel_labels.append(np.column_stack(( 138 | im_ind*np.ones(all_rels_i.shape[0], dtype=np.int64), 139 | all_rels_i, 140 | ))) 141 | 142 | num_box_seen += pred_boxes_i.shape[0] 143 | rel_labels = torch.LongTensor( 144 | np.concatenate(rel_labels, 0)).cuda(rpn_rois.get_device(), async=True 145 | ) 146 | return rel_labels 147 | -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/roi_align/Makefile: -------------------------------------------------------------------------------- 1 | all: src/cuda/roi_align.cu.o 2 | python build.py 3 | 4 | src/cuda/roi_align.cu.o: src/cuda/roi_align_kernel.cu 5 | $(MAKE) -C src/cuda 6 | 7 | clean: 8 | $(MAKE) -C src/cuda clean 9 | -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/roi_align/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/lib/fpn/roi_align/__init__.py -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/roi_align/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/lib/fpn/roi_align/_ext/__init__.py -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/roi_align/_ext/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_align import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/roi_align/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | # Might have to export PATH=/usr/local/cuda-8.0/bin${PATH:+:${PATH}} 5 | 6 | # sources = ['src/roi_align.c'] 7 | # headers = ['src/roi_align.h'] 8 | sources = [] 9 | headers = [] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/roi_align_cuda.c'] 16 | headers += ['src/roi_align_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/cuda/roi_align.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | 25 | ffi = create_extension( 26 | '_ext.roi_align', 27 | headers=headers, 28 | sources=sources, 29 | define_macros=defines, 30 | relative_to=__file__, 31 | with_cuda=with_cuda, 32 | extra_objects=extra_objects 33 | ) 34 | 35 | if __name__ == '__main__': 36 | ffi.build() 37 | -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/roi_align/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/lib/fpn/roi_align/functions/__init__.py -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/roi_align/functions/roi_align.py: -------------------------------------------------------------------------------- 1 | """ 2 | performs ROI aligning 3 | """ 4 | 5 | import torch 6 | from torch.autograd import Function 7 | from .._ext import roi_align 8 | 9 | class RoIAlignFunction(Function): 10 | def __init__(self, aligned_height, aligned_width, spatial_scale): 11 | self.aligned_width = int(aligned_width) 12 | self.aligned_height = int(aligned_height) 13 | self.spatial_scale = float(spatial_scale) 14 | 15 | self.feature_size = None 16 | 17 | def forward(self, features, rois): 18 | self.save_for_backward(rois) 19 | 20 | rois_normalized = rois.clone() 21 | 22 | self.feature_size = features.size() 23 | batch_size, num_channels, data_height, data_width = self.feature_size 24 | 25 | height = (data_height -1) / self.spatial_scale 26 | width = (data_width - 1) / self.spatial_scale 27 | 28 | rois_normalized[:,1] /= width 29 | rois_normalized[:,2] /= height 30 | rois_normalized[:,3] /= width 31 | rois_normalized[:,4] /= height 32 | 33 | 34 | num_rois = rois.size(0) 35 | 36 | output = features.new(num_rois, num_channels, self.aligned_height, 37 | self.aligned_width).zero_() 38 | 39 | if features.is_cuda: 40 | res = roi_align.roi_align_forward_cuda(self.aligned_height, 41 | self.aligned_width, 42 | self.spatial_scale, features, 43 | rois_normalized, output) 44 | assert res == 1 45 | else: 46 | raise ValueError 47 | 48 | return output 49 | 50 | def backward(self, grad_output): 51 | assert(self.feature_size is not None and grad_output.is_cuda) 52 | 53 | rois = self.saved_tensors[0] 54 | 55 | rois_normalized = rois.clone() 56 | 57 | batch_size, num_channels, data_height, data_width = self.feature_size 58 | 59 | height = (data_height -1) / self.spatial_scale 60 | width = (data_width - 1) / self.spatial_scale 61 | 62 | rois_normalized[:,1] /= width 63 | rois_normalized[:,2] /= height 64 | rois_normalized[:,3] /= width 65 | rois_normalized[:,4] /= height 66 | 67 | grad_input = rois_normalized.new(batch_size, num_channels, data_height, 68 | data_width).zero_() 69 | res = roi_align.roi_align_backward_cuda(self.aligned_height, 70 | self.aligned_width, 71 | self.spatial_scale, grad_output, 72 | rois_normalized, grad_input) 73 | assert res == 1 74 | return grad_input, None 75 | -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/roi_align/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/lib/fpn/roi_align/modules/__init__.py -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/roi_align/modules/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from torch.nn.functional import avg_pool2d, max_pool2d 3 | from ..functions.roi_align import RoIAlignFunction 4 | 5 | 6 | class RoIAlign(Module): 7 | def __init__(self, aligned_height, aligned_width, spatial_scale): 8 | super(RoIAlign, self).__init__() 9 | 10 | self.aligned_width = int(aligned_width) 11 | self.aligned_height = int(aligned_height) 12 | self.spatial_scale = float(spatial_scale) 13 | 14 | def forward(self, features, rois): 15 | return RoIAlignFunction(self.aligned_height, self.aligned_width, 16 | self.spatial_scale)(features, rois) 17 | 18 | class RoIAlignAvg(Module): 19 | def __init__(self, aligned_height, aligned_width, spatial_scale): 20 | super(RoIAlignAvg, self).__init__() 21 | 22 | self.aligned_width = int(aligned_width) 23 | self.aligned_height = int(aligned_height) 24 | self.spatial_scale = float(spatial_scale) 25 | 26 | def forward(self, features, rois): 27 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 28 | self.spatial_scale)(features, rois) 29 | return avg_pool2d(x, kernel_size=2, stride=1) 30 | 31 | class RoIAlignMax(Module): 32 | def __init__(self, aligned_height, aligned_width, spatial_scale): 33 | super(RoIAlignMax, self).__init__() 34 | 35 | self.aligned_width = int(aligned_width) 36 | self.aligned_height = int(aligned_height) 37 | self.spatial_scale = float(spatial_scale) 38 | 39 | def forward(self, features, rois): 40 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 41 | self.spatial_scale)(features, rois) 42 | return max_pool2d(x, kernel_size=2, stride=1) 43 | -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/roi_align/src/cuda/Makefile: -------------------------------------------------------------------------------- 1 | all: roi_align_kernel.cu roi_align_kernel.h 2 | /usr/local/cuda/bin/nvcc -c -o roi_align.cu.o roi_align_kernel.cu --compiler-options -fPIC -gencode arch=compute_52,code=sm_52 -gencode arch=compute_61,code=sm_61 3 | # /usr/local/cuda/bin/nvcc -c -o roi_align.cu.o roi_align_kernel.cu --compiler-options -fPIC -gencode arch=compute_52,code=sm_52 4 | clean: 5 | rm roi_align.cu.o 6 | -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/roi_align/src/cuda/roi_align_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_ALIGN_KERNEL 2 | #define _ROI_ALIGN_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | __global__ void ROIAlignForward(const int nthreads, const float* image_ptr, const float* boxes_ptr, int num_boxes, int batch, int image_height, int image_width, int crop_height, 9 | int crop_width, int depth, float extrapolation_value, float* crops_ptr); 10 | 11 | int ROIAlignForwardLaucher( 12 | const float* image_ptr, const float* boxes_ptr, 13 | int num_boxes, int batch, int image_height, int image_width, int crop_height, 14 | int crop_width, int depth, float extrapolation_value, float* crops_ptr, cudaStream_t stream); 15 | 16 | __global__ void ROIAlignBackward(const int nthreads, const float* grads_ptr, 17 | const float* boxes_ptr, int num_boxes, int batch, int image_height, 18 | int image_width, int crop_height, int crop_width, int depth, 19 | float* grads_image_ptr); 20 | 21 | int ROIAlignBackwardLaucher(const float* grads_ptr, const float* boxes_ptr, int num_boxes, 22 | int batch, int image_height, int image_width, int crop_height, 23 | int crop_width, int depth, float* grads_image_ptr, cudaStream_t stream); 24 | 25 | #ifdef __cplusplus 26 | } 27 | #endif 28 | 29 | #endif 30 | 31 | -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/roi_align/src/roi_align_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "cuda/roi_align_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_align_forward_cuda(int crop_height, int crop_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 9 | { 10 | // Grab the input tensor 11 | float * image_ptr = THCudaTensor_data(state, features); 12 | float * boxes_ptr = THCudaTensor_data(state, rois); 13 | 14 | float * crops_ptr = THCudaTensor_data(state, output); 15 | 16 | // Number of ROIs 17 | int num_boxes = THCudaTensor_size(state, rois, 0); 18 | int size_rois = THCudaTensor_size(state, rois, 1); 19 | if (size_rois != 5) 20 | { 21 | return 0; 22 | } 23 | 24 | // batch size 25 | int batch = THCudaTensor_size(state, features, 0); 26 | // data height 27 | int image_height = THCudaTensor_size(state, features, 2); 28 | // data width 29 | int image_width = THCudaTensor_size(state, features, 3); 30 | // Number of channels 31 | int depth = THCudaTensor_size(state, features, 1); 32 | 33 | cudaStream_t stream = THCState_getCurrentStream(state); 34 | float extrapolation_value = 0.0; 35 | 36 | ROIAlignForwardLaucher( 37 | image_ptr, boxes_ptr, num_boxes, batch, image_height, image_width, 38 | crop_height, crop_width, depth, extrapolation_value, crops_ptr, 39 | stream); 40 | 41 | return 1; 42 | } 43 | 44 | int roi_align_backward_cuda(int crop_height, int crop_width, float spatial_scale, 45 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 46 | { 47 | // Grab the input tensor 48 | float * grads_ptr = THCudaTensor_data(state, top_grad); 49 | float * boxes_ptr = THCudaTensor_data(state, rois); 50 | 51 | float * grads_image_ptr = THCudaTensor_data(state, bottom_grad); 52 | 53 | // Number of ROIs 54 | int num_boxes = THCudaTensor_size(state, rois, 0); 55 | int size_rois = THCudaTensor_size(state, rois, 1); 56 | if (size_rois != 5) 57 | { 58 | return 0; 59 | } 60 | 61 | // batch size 62 | int batch = THCudaTensor_size(state, bottom_grad, 0); 63 | // data height 64 | int image_height = THCudaTensor_size(state, bottom_grad, 2); 65 | // data width 66 | int image_width = THCudaTensor_size(state, bottom_grad, 3); 67 | // Number of channels 68 | int depth = THCudaTensor_size(state, bottom_grad, 1); 69 | 70 | cudaStream_t stream = THCState_getCurrentStream(state); 71 | 72 | ROIAlignBackwardLaucher( 73 | grads_ptr, boxes_ptr, num_boxes, batch, image_height, image_width, 74 | crop_height, crop_width, depth, grads_image_ptr, stream); 75 | return 1; 76 | } 77 | -------------------------------------------------------------------------------- /neural-motifs/lib/fpn/roi_align/src/roi_align_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_align_forward_cuda(int crop_height, int crop_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output); 3 | 4 | int roi_align_backward_cuda(int crop_height, int crop_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, 6 | THCudaTensor * bottom_grad); 7 | -------------------------------------------------------------------------------- /neural-motifs/lib/get_dataset_counts.py: -------------------------------------------------------------------------------- 1 | """ 2 | Get counts of all of the examples in the dataset. Used for creating the baseline 3 | dictionary model 4 | """ 5 | 6 | import numpy as np 7 | from dataloaders.visual_genome import VG 8 | from lib.fpn.box_intersections_cpu.bbox import bbox_overlaps 9 | from lib.pytorch_misc import nonintersecting_2d_inds 10 | 11 | 12 | def get_counts(train_data=VG(mode='train', filter_duplicate_rels=False, num_val_im=5000), must_overlap=True): 13 | """ 14 | Get counts of all of the relations. Used for modeling directly P(rel | o1, o2) 15 | :param train_data: 16 | :param must_overlap: 17 | :return: 18 | """ 19 | fg_matrix = np.zeros(( 20 | train_data.num_classes, 21 | train_data.num_classes, 22 | train_data.num_predicates, 23 | ), dtype=np.int64) 24 | 25 | bg_matrix = np.zeros(( 26 | train_data.num_classes, 27 | train_data.num_classes, 28 | ), dtype=np.int64) 29 | 30 | for ex_ind in range(len(train_data)): 31 | gt_classes = train_data.gt_classes[ex_ind].copy() 32 | gt_relations = train_data.relationships[ex_ind].copy() 33 | gt_boxes = train_data.gt_boxes[ex_ind].copy() 34 | 35 | # For the foreground, we'll just look at everything 36 | o1o2 = gt_classes[gt_relations[:, :2]] 37 | for (o1, o2), gtr in zip(o1o2, gt_relations[:,2]): 38 | fg_matrix[o1, o2, gtr] += 1 39 | 40 | # For the background, get all of the things that overlap. 41 | o1o2_total = gt_classes[np.array( 42 | box_filter(gt_boxes, must_overlap=must_overlap), dtype=int)] 43 | for (o1, o2) in o1o2_total: 44 | bg_matrix[o1, o2] += 1 45 | 46 | return fg_matrix, bg_matrix 47 | 48 | 49 | def box_filter(boxes, must_overlap=False): 50 | """ Only include boxes that overlap as possible relations. 51 | If no overlapping boxes, use all of them.""" 52 | n_cands = boxes.shape[0] 53 | 54 | overlaps = bbox_overlaps(boxes.astype(np.float), boxes.astype(np.float)) > 0 55 | np.fill_diagonal(overlaps, 0) 56 | 57 | all_possib = np.ones_like(overlaps, dtype=np.bool) 58 | np.fill_diagonal(all_possib, 0) 59 | 60 | if must_overlap: 61 | possible_boxes = np.column_stack(np.where(overlaps)) 62 | 63 | if possible_boxes.size == 0: 64 | possible_boxes = np.column_stack(np.where(all_possib)) 65 | else: 66 | possible_boxes = np.column_stack(np.where(all_possib)) 67 | return possible_boxes 68 | 69 | if __name__ == '__main__': 70 | fg, bg = get_counts(must_overlap=False) 71 | -------------------------------------------------------------------------------- /neural-motifs/lib/get_union_boxes.py: -------------------------------------------------------------------------------- 1 | """ 2 | credits to https://github.com/ruotianluo/pytorch-faster-rcnn/blob/master/lib/nets/network.py#L91 3 | """ 4 | 5 | import torch 6 | from torch.autograd import Variable 7 | from torch.nn import functional as F 8 | from lib.fpn.roi_align.functions.roi_align import RoIAlignFunction 9 | from lib.draw_rectangles.draw_rectangles import draw_union_boxes 10 | import numpy as np 11 | from torch.nn.modules.module import Module 12 | from torch import nn 13 | from config import BATCHNORM_MOMENTUM 14 | 15 | class UnionBoxesAndFeats(Module): 16 | def __init__(self, pooling_size=7, stride=16, dim=256, concat=False, use_feats=True): 17 | """ 18 | :param pooling_size: Pool the union boxes to this dimension 19 | :param stride: pixel spacing in the entire image 20 | :param dim: Dimension of the feats 21 | :param concat: Whether to concat (yes) or add (False) the representations 22 | """ 23 | super(UnionBoxesAndFeats, self).__init__() 24 | 25 | self.pooling_size = pooling_size 26 | self.stride = stride 27 | 28 | self.dim = dim 29 | self.use_feats = use_feats 30 | 31 | self.conv = nn.Sequential( 32 | nn.Conv2d(2, dim //2, kernel_size=7, stride=2, padding=3, bias=True), 33 | nn.ReLU(inplace=True), 34 | nn.BatchNorm2d(dim//2, momentum=BATCHNORM_MOMENTUM), 35 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1), 36 | nn.Conv2d(dim // 2, dim, kernel_size=3, stride=1, padding=1, bias=True), 37 | nn.ReLU(inplace=True), 38 | nn.BatchNorm2d(dim, momentum=BATCHNORM_MOMENTUM), 39 | ) 40 | self.concat = concat 41 | 42 | def forward(self, fmap, rois, union_inds): 43 | union_pools = union_boxes(fmap, rois, union_inds, pooling_size=self.pooling_size, stride=self.stride) 44 | if not self.use_feats: 45 | return union_pools.detach() 46 | 47 | pair_rois = torch.cat((rois[:, 1:][union_inds[:, 0]], rois[:, 1:][union_inds[:, 1]]),1).data.cpu().numpy() 48 | # rects_np = get_rect_features(pair_rois, self.pooling_size*2-1) - 0.5 49 | rects_np = draw_union_boxes(pair_rois, self.pooling_size*4-1) - 0.5 50 | rects = Variable(torch.FloatTensor(rects_np).cuda(fmap.get_device()), volatile=fmap.volatile) 51 | if self.concat: 52 | return torch.cat((union_pools, self.conv(rects)), 1) 53 | return union_pools + self.conv(rects) 54 | 55 | # def get_rect_features(roi_pairs, pooling_size): 56 | # rects_np = draw_union_boxes(roi_pairs, pooling_size) 57 | # # add union + intersection 58 | # stuff_to_cat = [ 59 | # rects_np.max(1), 60 | # rects_np.min(1), 61 | # np.minimum(1-rects_np[:,0], rects_np[:,1]), 62 | # np.maximum(1-rects_np[:,0], rects_np[:,1]), 63 | # np.minimum(rects_np[:,0], 1-rects_np[:,1]), 64 | # np.maximum(rects_np[:,0], 1-rects_np[:,1]), 65 | # np.minimum(1-rects_np[:,0], 1-rects_np[:,1]), 66 | # np.maximum(1-rects_np[:,0], 1-rects_np[:,1]), 67 | # ] 68 | # rects_np = np.concatenate([rects_np] + [x[:,None] for x in stuff_to_cat], 1) 69 | # return rects_np 70 | 71 | 72 | def union_boxes(fmap, rois, union_inds, pooling_size=14, stride=16): 73 | """ 74 | :param fmap: (batch_size, d, IM_SIZE/stride, IM_SIZE/stride) 75 | :param rois: (num_rois, 5) with [im_ind, x1, y1, x2, y2] 76 | :param union_inds: (num_urois, 2) with [roi_ind1, roi_ind2] 77 | :param pooling_size: we'll resize to this 78 | :param stride: 79 | :return: 80 | """ 81 | assert union_inds.size(1) == 2 82 | im_inds = rois[:,0][union_inds[:,0]] 83 | assert (im_inds.data == rois.data[:,0][union_inds[:,1]]).sum() == union_inds.size(0) 84 | union_rois = torch.cat(( 85 | im_inds[:,None], 86 | torch.min(rois[:, 1:3][union_inds[:, 0]], rois[:, 1:3][union_inds[:, 1]]), 87 | torch.max(rois[:, 3:5][union_inds[:, 0]], rois[:, 3:5][union_inds[:, 1]]), 88 | ),1) 89 | 90 | # (num_rois, d, pooling_size, pooling_size) 91 | union_pools = RoIAlignFunction(pooling_size, pooling_size, 92 | spatial_scale=1/stride)(fmap, union_rois) 93 | return union_pools 94 | 95 | -------------------------------------------------------------------------------- /neural-motifs/lib/lstm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/lib/lstm/__init__.py -------------------------------------------------------------------------------- /neural-motifs/lib/lstm/highway_lstm_cuda/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/lib/lstm/highway_lstm_cuda/__init__.py -------------------------------------------------------------------------------- /neural-motifs/lib/lstm/highway_lstm_cuda/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/lib/lstm/highway_lstm_cuda/_ext/__init__.py -------------------------------------------------------------------------------- /neural-motifs/lib/lstm/highway_lstm_cuda/_ext/highway_lstm_layer/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._highway_lstm_layer import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /neural-motifs/lib/lstm/highway_lstm_cuda/build.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=invalid-name 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | if not torch.cuda.is_available(): 7 | raise Exception('HighwayLSTM can only be compiled with CUDA') 8 | 9 | sources = ['src/highway_lstm_cuda.c'] 10 | headers = ['src/highway_lstm_cuda.h'] 11 | defines = [('WITH_CUDA', None)] 12 | with_cuda = True 13 | 14 | this_file = os.path.dirname(os.path.realpath(__file__)) 15 | extra_objects = ['src/highway_lstm_kernel.cu.o'] 16 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 17 | 18 | ffi = create_extension( 19 | '_ext.highway_lstm_layer', 20 | headers=headers, 21 | sources=sources, 22 | define_macros=defines, 23 | relative_to=__file__, 24 | with_cuda=with_cuda, 25 | extra_objects=extra_objects 26 | ) 27 | 28 | if __name__ == '__main__': 29 | ffi.build() 30 | -------------------------------------------------------------------------------- /neural-motifs/lib/lstm/highway_lstm_cuda/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | # Which CUDA capabilities do we want to pre-build for? 6 | # https://developer.nvidia.com/cuda-gpus 7 | # Compute/shader model Cards 8 | # 61 P4, P40, Titan X 9 | # 60 P100 10 | # 52 M40 11 | # 37 K80 12 | # 35 K40, K20 13 | # 30 K10, Grid K520 (AWS G2) 14 | 15 | CUDA_MODELS=(52 61) 16 | 17 | # Nvidia doesn't guarantee binary compatability across GPU versions. 18 | # However, binary compatibility within one GPU generation can be guaranteed 19 | # under certain conditions because they share the basic instruction set. 20 | # This is the case between two GPU versions that do not show functional 21 | # differences at all (for instance when one version is a scaled down version 22 | # of the other), or when one version is functionally included in the other. 23 | 24 | # To fix this problem, we can create a 'fat binary' which generates multiple 25 | # translations of the CUDA source. The most appropriate version is chosen at 26 | # runtime by the CUDA driver. See: 27 | # http://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#gpu-compilation 28 | # http://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#fatbinaries 29 | CUDA_MODEL_TARGETS="" 30 | for i in "${CUDA_MODELS[@]}" 31 | do 32 | CUDA_MODEL_TARGETS+=" -gencode arch=compute_${i},code=sm_${i}" 33 | done 34 | 35 | echo "Building kernel for following target architectures: " 36 | echo $CUDA_MODEL_TARGETS 37 | 38 | cd src 39 | echo "Compiling kernel" 40 | /usr/local/cuda/bin/nvcc -c -o highway_lstm_kernel.cu.o highway_lstm_kernel.cu --compiler-options -fPIC $CUDA_MODEL_TARGETS 41 | cd ../ 42 | python build.py 43 | -------------------------------------------------------------------------------- /neural-motifs/lib/lstm/highway_lstm_cuda/src/highway_lstm_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "highway_lstm_kernel.h" 3 | 4 | extern THCState *state; 5 | 6 | int highway_lstm_forward_cuda(int inputSize, int hiddenSize, int miniBatch, 7 | int numLayers, int seqLength, 8 | THCudaTensor *x, 9 | THIntTensor *lengths, 10 | THCudaTensor *h_data, 11 | THCudaTensor *c_data, 12 | THCudaTensor *tmp_i, 13 | THCudaTensor *tmp_h, 14 | THCudaTensor *T, 15 | THCudaTensor *bias, 16 | THCudaTensor *dropout, 17 | THCudaTensor *gates, 18 | int isTraining) { 19 | 20 | float * x_ptr = THCudaTensor_data(state, x); 21 | int * lengths_ptr = THIntTensor_data(lengths); 22 | float * h_data_ptr = THCudaTensor_data(state, h_data); 23 | float * c_data_ptr = THCudaTensor_data(state, c_data); 24 | float * tmp_i_ptr = THCudaTensor_data(state, tmp_i); 25 | float * tmp_h_ptr = THCudaTensor_data(state, tmp_h); 26 | float * T_ptr = THCudaTensor_data(state, T); 27 | float * bias_ptr = THCudaTensor_data(state, bias); 28 | float * dropout_ptr = THCudaTensor_data(state, dropout); 29 | float * gates_ptr; 30 | if (isTraining == 1) { 31 | gates_ptr = THCudaTensor_data(state, gates); 32 | } else { 33 | gates_ptr = NULL; 34 | } 35 | 36 | cudaStream_t stream = THCState_getCurrentStream(state); 37 | cublasHandle_t handle = THCState_getCurrentBlasHandle(state); 38 | 39 | highway_lstm_forward_ongpu(inputSize, hiddenSize, miniBatch, numLayers, 40 | seqLength, x_ptr, lengths_ptr, h_data_ptr, c_data_ptr, tmp_i_ptr, 41 | tmp_h_ptr, T_ptr, bias_ptr, dropout_ptr, gates_ptr, 42 | isTraining, stream, handle); 43 | 44 | return 1; 45 | 46 | } 47 | 48 | int highway_lstm_backward_cuda(int inputSize, int hiddenSize, int miniBatch, int numLayers, int seqLength, 49 | THCudaTensor *out_grad, 50 | THIntTensor *lengths, 51 | THCudaTensor *h_data_grad, 52 | THCudaTensor *c_data_grad, 53 | THCudaTensor *x, 54 | THCudaTensor *h_data, 55 | THCudaTensor *c_data, 56 | THCudaTensor *T, 57 | THCudaTensor *gates_out, 58 | THCudaTensor *dropout_in, 59 | THCudaTensor *h_gates_grad, 60 | THCudaTensor *i_gates_grad, 61 | THCudaTensor *h_out_grad, 62 | THCudaTensor *x_grad, 63 | THCudaTensor *T_grad, 64 | THCudaTensor *bias_grad, 65 | int isTraining, 66 | int do_weight_grad) { 67 | 68 | float * out_grad_ptr = THCudaTensor_data(state, out_grad); 69 | int * lengths_ptr = THIntTensor_data(lengths); 70 | float * h_data_grad_ptr = THCudaTensor_data(state, h_data_grad); 71 | float * c_data_grad_ptr = THCudaTensor_data(state, c_data_grad); 72 | float * x_ptr = THCudaTensor_data(state, x); 73 | float * h_data_ptr = THCudaTensor_data(state, h_data); 74 | float * c_data_ptr = THCudaTensor_data(state, c_data); 75 | float * T_ptr = THCudaTensor_data(state, T); 76 | float * gates_out_ptr = THCudaTensor_data(state, gates_out); 77 | float * dropout_in_ptr = THCudaTensor_data(state, dropout_in); 78 | float * h_gates_grad_ptr = THCudaTensor_data(state, h_gates_grad); 79 | float * i_gates_grad_ptr = THCudaTensor_data(state, i_gates_grad); 80 | float * h_out_grad_ptr = THCudaTensor_data(state, h_out_grad); 81 | float * x_grad_ptr = THCudaTensor_data(state, x_grad); 82 | float * T_grad_ptr = THCudaTensor_data(state, T_grad); 83 | float * bias_grad_ptr = THCudaTensor_data(state, bias_grad); 84 | 85 | cudaStream_t stream = THCState_getCurrentStream(state); 86 | cublasHandle_t handle = THCState_getCurrentBlasHandle(state); 87 | 88 | highway_lstm_backward_ongpu(inputSize, hiddenSize, miniBatch, numLayers, 89 | seqLength, out_grad_ptr, lengths_ptr, h_data_grad_ptr, c_data_grad_ptr, 90 | x_ptr, h_data_ptr, c_data_ptr, T_ptr, gates_out_ptr, dropout_in_ptr, 91 | h_gates_grad_ptr, i_gates_grad_ptr, h_out_grad_ptr, 92 | x_grad_ptr, T_grad_ptr, bias_grad_ptr, isTraining, do_weight_grad, 93 | stream, handle); 94 | 95 | return 1; 96 | 97 | } 98 | -------------------------------------------------------------------------------- /neural-motifs/lib/lstm/highway_lstm_cuda/src/highway_lstm_cuda.h: -------------------------------------------------------------------------------- 1 | int highway_lstm_forward_cuda(int inputSize, int hiddenSize, int miniBatch, int numLayers, int seqLength, 2 | THCudaTensor *x, THIntTensor *lengths, THCudaTensor *h_data, 3 | THCudaTensor *c_data, THCudaTensor *tmp_i, 4 | THCudaTensor *tmp_h, THCudaTensor *T, THCudaTensor *bias, 5 | THCudaTensor *dropout, THCudaTensor *gates, int isTraining); 6 | 7 | int highway_lstm_backward_cuda(int inputSize, int hiddenSize, int miniBatch, 8 | int numLayers, int seqLength, THCudaTensor *out_grad, THIntTensor *lengths, 9 | THCudaTensor *h_data_grad, THCudaTensor *c_data_grad, THCudaTensor *x, 10 | THCudaTensor *h_data, THCudaTensor *c_data, THCudaTensor *T, 11 | THCudaTensor *gates_out, THCudaTensor *dropout_in, 12 | THCudaTensor *h_gates_grad, THCudaTensor *i_gates_grad, 13 | THCudaTensor *h_out_grad, THCudaTensor *x_grad, THCudaTensor *T_grad, 14 | THCudaTensor *bias_grad, int isTraining, int do_weight_grad); 15 | -------------------------------------------------------------------------------- /neural-motifs/lib/lstm/highway_lstm_cuda/src/highway_lstm_kernel.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | void highway_lstm_forward_ongpu(int inputSize, int hiddenSize, int miniBatch, int numLayers, int seqLength, float *x, int *lengths, float*h_data, float *c_data, float *tmp_i, float *tmp_h, float *T, float *bias, float *dropout, float *gates, int is_training, cudaStream_t stream, cublasHandle_t handle); 8 | 9 | void highway_lstm_backward_ongpu(int inputSize, int hiddenSize, int miniBatch, int numLayers, int seqLength, float *out_grad, int *lengths, float *h_data_grad, float *c_data_grad, float *x, float *h_data, float *c_data, float *T, float *gates_out, float *dropout_in, float *h_gates_grad, float *i_gates_grad, float *h_out_grad, float *x_grad, float *T_grad, float *bias_grad, int isTraining, int do_weight_grad, cudaStream_t stream, cublasHandle_t handle); 10 | 11 | #ifdef __cplusplus 12 | } 13 | #endif 14 | -------------------------------------------------------------------------------- /neural-motifs/lib/resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | import torch.utils.model_zoo as model_zoo 4 | from torchvision.models.resnet import model_urls, conv3x3, BasicBlock 5 | from torchvision.models.vgg import vgg16 6 | from config import BATCHNORM_MOMENTUM 7 | 8 | class Bottleneck(nn.Module): 9 | expansion = 4 10 | 11 | def __init__(self, inplanes, planes, stride=1, downsample=None, relu_end=True): 12 | super(Bottleneck, self).__init__() 13 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 14 | self.bn1 = nn.BatchNorm2d(planes, momentum=BATCHNORM_MOMENTUM) 15 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 16 | padding=1, bias=False) 17 | self.bn2 = nn.BatchNorm2d(planes, momentum=BATCHNORM_MOMENTUM) 18 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 19 | self.bn3 = nn.BatchNorm2d(planes * 4, momentum=BATCHNORM_MOMENTUM) 20 | self.relu = nn.ReLU(inplace=True) 21 | self.downsample = downsample 22 | self.stride = stride 23 | self.relu_end = relu_end 24 | 25 | def forward(self, x): 26 | residual = x 27 | 28 | out = self.conv1(x) 29 | out = self.bn1(out) 30 | out = self.relu(out) 31 | 32 | out = self.conv2(out) 33 | out = self.bn2(out) 34 | out = self.relu(out) 35 | 36 | out = self.conv3(out) 37 | out = self.bn3(out) 38 | 39 | if self.downsample is not None: 40 | residual = self.downsample(x) 41 | 42 | out += residual 43 | 44 | if self.relu_end: 45 | out = self.relu(out) 46 | return out 47 | 48 | 49 | class ResNet(nn.Module): 50 | 51 | def __init__(self, block, layers, num_classes=1000): 52 | self.inplanes = 64 53 | super(ResNet, self).__init__() 54 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 55 | bias=False) 56 | self.bn1 = nn.BatchNorm2d(64, momentum=BATCHNORM_MOMENTUM) 57 | self.relu = nn.ReLU(inplace=True) 58 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 59 | self.layer1 = self._make_layer(block, 64, layers[0]) 60 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 61 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 62 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1) # HACK 63 | self.avgpool = nn.AvgPool2d(7) 64 | self.fc = nn.Linear(512 * block.expansion, num_classes) 65 | 66 | for m in self.modules(): 67 | if isinstance(m, nn.Conv2d): 68 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 69 | m.weight.data.normal_(0, math.sqrt(2. / n)) 70 | elif isinstance(m, nn.BatchNorm2d): 71 | m.weight.data.fill_(1) 72 | m.bias.data.zero_() 73 | 74 | def _make_layer(self, block, planes, blocks, stride=1): 75 | downsample = None 76 | if stride != 1 or self.inplanes != planes * block.expansion: 77 | downsample = nn.Sequential( 78 | nn.Conv2d(self.inplanes, planes * block.expansion, 79 | kernel_size=1, stride=stride, bias=False), 80 | nn.BatchNorm2d(planes * block.expansion, momentum=BATCHNORM_MOMENTUM), 81 | ) 82 | 83 | layers = [] 84 | layers.append(block(self.inplanes, planes, stride, downsample)) 85 | self.inplanes = planes * block.expansion 86 | for i in range(1, blocks): 87 | layers.append(block(self.inplanes, planes)) 88 | 89 | return nn.Sequential(*layers) 90 | 91 | def forward(self, x): 92 | x = self.conv1(x) 93 | x = self.bn1(x) 94 | x = self.relu(x) 95 | x = self.maxpool(x) 96 | 97 | x = self.layer1(x) 98 | x = self.layer2(x) 99 | x = self.layer3(x) 100 | x = self.layer4(x) 101 | 102 | x = self.avgpool(x) 103 | x = x.view(x.size(0), -1) 104 | x = self.fc(x) 105 | 106 | return x 107 | 108 | def resnet101(pretrained=False, **kwargs): 109 | """Constructs a ResNet-101 model. 110 | 111 | Args: 112 | pretrained (bool): If True, returns a model pre-trained on ImageNet 113 | """ 114 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 115 | if pretrained: 116 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) 117 | return model 118 | 119 | def resnet_l123(): 120 | model = resnet101(pretrained=True) 121 | del model.layer4 122 | del model.avgpool 123 | del model.fc 124 | return model 125 | 126 | def resnet_l4(relu_end=True): 127 | model = resnet101(pretrained=True) 128 | l4 = model.layer4 129 | if not relu_end: 130 | l4[-1].relu_end = False 131 | l4[0].conv2.stride = (1, 1) 132 | l4[0].downsample[0].stride = (1, 1) 133 | return l4 134 | 135 | def vgg_fc(relu_end=True, linear_end=True): 136 | model = vgg16(pretrained=True) 137 | vfc = model.classifier 138 | del vfc._modules['6'] # Get rid of linear layer 139 | del vfc._modules['5'] # Get rid of linear layer 140 | if not relu_end: 141 | del vfc._modules['4'] # Get rid of linear layer 142 | if not linear_end: 143 | del vfc._modules['3'] 144 | return vfc 145 | 146 | 147 | -------------------------------------------------------------------------------- /neural-motifs/lib/sparse_targets.py: -------------------------------------------------------------------------------- 1 | from lib.word_vectors import obj_edge_vectors 2 | import torch.nn as nn 3 | import torch 4 | from torch.autograd import Variable 5 | import numpy as np 6 | from config import DATA_PATH 7 | import os 8 | from lib.get_dataset_counts import get_counts 9 | 10 | 11 | class FrequencyBias(nn.Module): 12 | """ 13 | The goal of this is to provide a simplified way of computing 14 | P(predicate | obj1, obj2, img). 15 | """ 16 | 17 | def __init__(self, eps=1e-3): 18 | super(FrequencyBias, self).__init__() 19 | 20 | fg_matrix, bg_matrix = get_counts(must_overlap=True) 21 | bg_matrix += 1 22 | fg_matrix[:, :, 0] = bg_matrix 23 | 24 | pred_dist = np.log(fg_matrix / fg_matrix.sum(2)[:, :, None] + eps) 25 | 26 | self.num_objs = pred_dist.shape[0] 27 | pred_dist = torch.FloatTensor(pred_dist).view(-1, pred_dist.shape[2]) 28 | 29 | self.obj_baseline = nn.Embedding(pred_dist.size(0), pred_dist.size(1)) 30 | self.obj_baseline.weight.data = pred_dist 31 | 32 | def index_with_labels(self, labels): 33 | """ 34 | :param labels: [batch_size, 2] 35 | :return: 36 | """ 37 | return self.obj_baseline(labels[:, 0] * self.num_objs + labels[:, 1]) 38 | 39 | def forward(self, obj_cands0, obj_cands1): 40 | """ 41 | :param obj_cands0: [batch_size, 151] prob distibution over cands. 42 | :param obj_cands1: [batch_size, 151] prob distibution over cands. 43 | :return: [batch_size, #predicates] array, which contains potentials for 44 | each possibility 45 | """ 46 | # [batch_size, 151, 151] repr of the joint distribution 47 | joint_cands = obj_cands0[:, :, None] * obj_cands1[:, None] 48 | 49 | # [151, 151, 51] of targets per. 50 | baseline = joint_cands.view(joint_cands.size(0), -1) @ self.obj_baseline.weight 51 | 52 | return baseline 53 | -------------------------------------------------------------------------------- /neural-motifs/lib/surgery.py: -------------------------------------------------------------------------------- 1 | # create predictions from the other stuff 2 | """ 3 | Go from proposals + scores to relationships. 4 | 5 | pred-cls: No bbox regression, obj dist is exactly known 6 | sg-cls : No bbox regression 7 | sg-det : Bbox regression 8 | 9 | in all cases we'll return: 10 | boxes, objs, rels, pred_scores 11 | 12 | """ 13 | 14 | import numpy as np 15 | import torch 16 | from lib.pytorch_misc import unravel_index 17 | from lib.fpn.box_utils import bbox_overlaps 18 | # from ad3 import factor_graph as fg 19 | from time import time 20 | 21 | def filter_dets(boxes, obj_scores, obj_classes, rel_inds, pred_scores): 22 | """ 23 | Filters detections.... 24 | :param boxes: [num_box, topk, 4] if bbox regression else [num_box, 4] 25 | :param obj_scores: [num_box] probabilities for the scores 26 | :param obj_classes: [num_box] class labels for the topk 27 | :param rel_inds: [num_rel, 2] TENSOR consisting of (im_ind0, im_ind1) 28 | :param pred_scores: [topk, topk, num_rel, num_predicates] 29 | :param use_nms: True if use NMS to filter dets. 30 | :return: boxes, objs, rels, pred_scores 31 | 32 | """ 33 | if boxes.dim() != 2: 34 | raise ValueError("Boxes needs to be [num_box, 4] but its {}".format(boxes.size())) 35 | 36 | num_box = boxes.size(0) 37 | assert obj_scores.size(0) == num_box 38 | 39 | assert obj_classes.size() == obj_scores.size() 40 | num_rel = rel_inds.size(0) 41 | assert rel_inds.size(1) == 2 42 | assert pred_scores.size(0) == num_rel 43 | 44 | obj_scores0 = obj_scores.data[rel_inds[:,0]] 45 | obj_scores1 = obj_scores.data[rel_inds[:,1]] 46 | 47 | pred_scores_max, pred_classes_argmax = pred_scores.data[:,1:].max(1) 48 | pred_classes_argmax = pred_classes_argmax + 1 49 | 50 | rel_scores_argmaxed = pred_scores_max * obj_scores0 * obj_scores1 51 | rel_scores_vs, rel_scores_idx = torch.sort(rel_scores_argmaxed.view(-1), dim=0, descending=True) 52 | 53 | rels = rel_inds[rel_scores_idx].cpu().numpy() 54 | pred_scores_sorted = pred_scores[rel_scores_idx].data.cpu().numpy() 55 | obj_scores_np = obj_scores.data.cpu().numpy() 56 | objs_np = obj_classes.data.cpu().numpy() 57 | boxes_out = boxes.data.cpu().numpy() 58 | 59 | return boxes_out, objs_np, obj_scores_np, rels, pred_scores_sorted 60 | 61 | # def _get_similar_boxes(boxes, obj_classes_topk, nms_thresh=0.3): 62 | # """ 63 | # Assuming bg is NOT A LABEL. 64 | # :param boxes: [num_box, topk, 4] if bbox regression else [num_box, 4] 65 | # :param obj_classes: [num_box, topk] class labels 66 | # :return: num_box, topk, num_box, topk array containing similarities. 67 | # """ 68 | # topk = obj_classes_topk.size(1) 69 | # num_box = boxes.size(0) 70 | # 71 | # box_flat = boxes.view(-1, 4) if boxes.dim() == 3 else boxes[:, None].expand( 72 | # num_box, topk, 4).contiguous().view(-1, 4) 73 | # jax = bbox_overlaps(box_flat, box_flat).data > nms_thresh 74 | # # Filter out things that are not gonna compete. 75 | # classes_eq = obj_classes_topk.data.view(-1)[:, None] == obj_classes_topk.data.view(-1)[None, :] 76 | # jax &= classes_eq 77 | # boxes_are_similar = jax.view(num_box, topk, num_box, topk) 78 | # return boxes_are_similar.cpu().numpy().astype(np.bool) 79 | -------------------------------------------------------------------------------- /neural-motifs/lib/word_vectors.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adapted from PyTorch's text library. 3 | """ 4 | 5 | import array 6 | import os 7 | import zipfile 8 | 9 | import six 10 | import torch 11 | from six.moves.urllib.request import urlretrieve 12 | from tqdm import tqdm 13 | 14 | from config import DATA_PATH 15 | 16 | 17 | def obj_edge_vectors(names, wv_type='glove.6B', wv_dir=DATA_PATH, wv_dim=300): 18 | wv_dict, wv_arr, wv_size = load_word_vectors(wv_dir, wv_type, wv_dim) 19 | 20 | vectors = torch.Tensor(len(names), wv_dim) 21 | vectors.normal_(0,1) 22 | 23 | for i, token in enumerate(names): 24 | wv_index = wv_dict.get(token, None) 25 | if wv_index is not None: 26 | vectors[i] = wv_arr[wv_index] 27 | else: 28 | # Try the longest word (hopefully won't be a preposition 29 | lw_token = sorted(token.split(' '), key=lambda x: len(x), reverse=True)[0] 30 | print("{} -> {} ".format(token, lw_token)) 31 | wv_index = wv_dict.get(lw_token, None) 32 | if wv_index is not None: 33 | vectors[i] = wv_arr[wv_index] 34 | else: 35 | print("fail on {}".format(token)) 36 | 37 | return vectors 38 | 39 | URL = { 40 | 'glove.42B': 'http://nlp.stanford.edu/data/glove.42B.300d.zip', 41 | 'glove.840B': 'http://nlp.stanford.edu/data/glove.840B.300d.zip', 42 | 'glove.twitter.27B': 'http://nlp.stanford.edu/data/glove.twitter.27B.zip', 43 | 'glove.6B': 'http://nlp.stanford.edu/data/glove.6B.zip', 44 | } 45 | 46 | 47 | def load_word_vectors(root, wv_type, dim): 48 | """Load word vectors from a path, trying .pt, .txt, and .zip extensions.""" 49 | if isinstance(dim, int): 50 | dim = str(dim) + 'd' 51 | fname = os.path.join(root, wv_type + '.' + dim) 52 | if os.path.isfile(fname + '.pt'): 53 | fname_pt = fname + '.pt' 54 | print('loading word vectors from', fname_pt) 55 | return torch.load(fname_pt) 56 | if os.path.isfile(fname + '.txt'): 57 | fname_txt = fname + '.txt' 58 | cm = open(fname_txt, 'rb') 59 | cm = [line for line in cm] 60 | elif os.path.basename(wv_type) in URL: 61 | url = URL[wv_type] 62 | print('downloading word vectors from {}'.format(url)) 63 | filename = os.path.basename(fname) 64 | if not os.path.exists(root): 65 | os.makedirs(root) 66 | with tqdm(unit='B', unit_scale=True, miniters=1, desc=filename) as t: 67 | fname, _ = urlretrieve(url, fname, reporthook=reporthook(t)) 68 | with zipfile.ZipFile(fname, "r") as zf: 69 | print('extracting word vectors into {}'.format(root)) 70 | zf.extractall(root) 71 | if not os.path.isfile(fname + '.txt'): 72 | raise RuntimeError('no word vectors of requested dimension found') 73 | return load_word_vectors(root, wv_type, dim) 74 | else: 75 | raise RuntimeError('unable to load word vectors') 76 | 77 | wv_tokens, wv_arr, wv_size = [], array.array('d'), None 78 | if cm is not None: 79 | for line in tqdm(range(len(cm)), desc="loading word vectors from {}".format(fname_txt)): 80 | entries = cm[line].strip().split(b' ') 81 | word, entries = entries[0], entries[1:] 82 | if wv_size is None: 83 | wv_size = len(entries) 84 | try: 85 | if isinstance(word, six.binary_type): 86 | word = word.decode('utf-8') 87 | except: 88 | print('non-UTF8 token', repr(word), 'ignored') 89 | continue 90 | wv_arr.extend(float(x) for x in entries) 91 | wv_tokens.append(word) 92 | 93 | wv_dict = {word: i for i, word in enumerate(wv_tokens)} 94 | wv_arr = torch.Tensor(wv_arr).view(-1, wv_size) 95 | ret = (wv_dict, wv_arr, wv_size) 96 | torch.save(ret, fname + '.pt') 97 | return ret 98 | 99 | def reporthook(t): 100 | """https://github.com/tqdm/tqdm""" 101 | last_b = [0] 102 | 103 | def inner(b=1, bsize=1, tsize=None): 104 | """ 105 | b: int, optionala 106 | Number of blocks just transferred [default: 1]. 107 | bsize: int, optional 108 | Size of each block (in tqdm units) [default: 1]. 109 | tsize: int, optional 110 | Total size (in tqdm units). If [default: None] remains unchanged. 111 | """ 112 | if tsize is not None: 113 | t.total = tsize 114 | t.update((b - last_b[0]) * bsize) 115 | last_b[0] = b 116 | return inner 117 | -------------------------------------------------------------------------------- /neural-motifs/misc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/neural-motifs/misc/__init__.py -------------------------------------------------------------------------------- /neural-motifs/models/eval_rels.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | 4 | import os, sys 5 | script_dir = os.path.dirname(os.path.realpath(__file__)) 6 | sys.path.append('{}/../'.format(script_dir)) 7 | 8 | from dataloaders.visual_genome import VGDataLoader, VG 9 | import numpy as np 10 | import torch 11 | 12 | from config import ModelConfig 13 | from lib.pytorch_misc import optimistic_restore 14 | from lib.evaluation.sg_eval import BasicSceneGraphEvaluator 15 | from tqdm import tqdm 16 | from config import BOX_SCALE, IM_SCALE 17 | import dill as pkl 18 | import os 19 | 20 | import sys 21 | from IPython.core import ultratb 22 | sys.excepthook = ultratb.FormattedTB(mode='Verbose', 23 | color_scheme='Linux', call_pdb=1) 24 | 25 | conf = ModelConfig() 26 | if conf.model == 'motifnet': 27 | from lib.rel_model import RelModel 28 | elif conf.model == 'stanford': 29 | from lib.rel_model_stanford import RelModelStanford as RelModel 30 | else: 31 | raise ValueError() 32 | 33 | train, val, test = VG.splits( 34 | num_val_im=conf.val_size, filter_duplicate_rels=True, 35 | use_proposals=conf.use_proposals, 36 | filter_non_overlap=conf.mode == 'sgdet') 37 | if conf.test: 38 | val = test 39 | train_loader, val_loader = VGDataLoader.splits( 40 | train, val, mode='rel', 41 | batch_size=conf.batch_size, 42 | num_workers=conf.num_workers, 43 | num_gpus=conf.num_gpus 44 | ) 45 | 46 | detector = RelModel( 47 | classes=train.ind_to_classes, rel_classes=train.ind_to_predicates, 48 | num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True, 49 | use_resnet=conf.use_resnet, order=conf.order, 50 | nl_edge=conf.nl_edge, nl_obj=conf.nl_obj, hidden_dim=conf.hidden_dim, 51 | use_proposals=conf.use_proposals, 52 | pass_in_obj_feats_to_decoder=conf.pass_in_obj_feats_to_decoder, 53 | pass_in_obj_feats_to_edge=conf.pass_in_obj_feats_to_edge, 54 | pooling_dim=conf.pooling_dim, 55 | rec_dropout=conf.rec_dropout, 56 | use_bias=conf.use_bias, 57 | use_tanh=conf.use_tanh, 58 | limit_vision=conf.limit_vision 59 | ) 60 | 61 | 62 | detector.cuda() 63 | ckpt = torch.load(conf.ckpt) 64 | 65 | optimistic_restore(detector, ckpt['state_dict']) 66 | # if conf.mode == 'sgdet': 67 | # det_ckpt = torch.load('checkpoints/new_vgdet/vg-19.tar')['state_dict'] 68 | # detector.detector.bbox_fc.weight.data.copy_(det_ckpt['bbox_fc.weight']) 69 | # detector.detector.bbox_fc.bias.data.copy_(det_ckpt['bbox_fc.bias']) 70 | # detector.detector.score_fc.weight.data.copy_(det_ckpt['score_fc.weight']) 71 | # detector.detector.score_fc.bias.data.copy_(det_ckpt['score_fc.bias']) 72 | 73 | all_pred_entries = [] 74 | def val_batch(batch_num, b, evaluator, thrs=(20, 50, 100)): 75 | result, result_preds = detector[b] 76 | assert conf.num_gpus == 1 77 | # if conf.num_gpus == 1: 78 | # result_preds = [result_preds] 79 | 80 | 81 | for i, (boxes_i, objs_i, obj_scores_i, rels_i, pred_scores_i) in enumerate(result_preds): 82 | gt_entry = { 83 | 'gt_classes': val.gt_classes[batch_num + i].copy(), 84 | 'gt_relations': val.relationships[batch_num + i].copy(), 85 | 'gt_boxes': val.gt_boxes[batch_num + i].copy(), 86 | } 87 | assert np.all(objs_i[rels_i[:,0]] > 0) and np.all(objs_i[rels_i[:,1]] > 0) 88 | # assert np.all(rels_i[:,2] > 0) 89 | 90 | pred_entry = { 91 | 'pred_boxes': boxes_i * BOX_SCALE/IM_SCALE, 92 | 'pred_classes': objs_i, 93 | 'pred_rel_inds': rels_i, 94 | 'obj_scores': obj_scores_i, 95 | 'rel_scores': pred_scores_i, 96 | } 97 | all_pred_entries.append(pred_entry) 98 | 99 | evaluator[conf.mode].evaluate_scene_graph_entry( 100 | gt_entry, 101 | pred_entry, 102 | ) 103 | 104 | evaluator = BasicSceneGraphEvaluator.all_modes(multiple_preds=conf.multi_pred) 105 | if conf.cache is not None and os.path.exists(conf.cache): 106 | print("Found {}! Loading from it".format(conf.cache)) 107 | with open(conf.cache,'rb') as f: 108 | all_pred_entries = pkl.load(f) 109 | for i, pred_entry in enumerate(tqdm(all_pred_entries)): 110 | gt_entry = { 111 | 'gt_classes': val.gt_classes[i].copy(), 112 | 'gt_relations': val.relationships[i].copy(), 113 | 'gt_boxes': val.gt_boxes[i].copy(), 114 | } 115 | evaluator[conf.mode].evaluate_scene_graph_entry( 116 | gt_entry, 117 | pred_entry, 118 | ) 119 | evaluator[conf.mode].print_stats() 120 | else: 121 | detector.eval() 122 | for val_b, batch in enumerate(tqdm(val_loader)): 123 | val_batch(conf.num_gpus*val_b, batch, evaluator) 124 | 125 | evaluator[conf.mode].print_stats() 126 | 127 | if conf.cache is not None: 128 | with open(conf.cache,'wb') as f: 129 | pkl.dump(all_pred_entries, f) 130 | 131 | import pickle as pkl 132 | tag = 'test' if conf.test else 'train' 133 | if conf.multi_pred: 134 | tag += '.multi_pred' 135 | for N in [20, 50, 100]: 136 | pkl.dump(evaluator['predcls'].result_dict['predcls_recall'][N], 137 | open('{}.{}.pkl'.format(tag, N), 'wb')) 138 | -------------------------------------------------------------------------------- /neural-motifs/scripts/eval_models_sgcls.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This is a script that will evaluate all models for SGCLS 4 | export CUDA_VISIBLE_DEVICES=$1 5 | 6 | if [ $1 == "0" ]; then 7 | echo "EVALING THE BASELINE" 8 | python models/eval_rels.py -m sgcls -model motifnet -nl_obj 0 -nl_edge 0 -b 6 \ 9 | -clip 5 -p 100 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -ckpt checkpoints/baseline-sgcls/vgrel-11.tar \ 10 | -nepoch 50 -use_bias -test -cache baseline_sgcls 11 | python models/eval_rels.py -m predcls -model motifnet -nl_obj 0 -nl_edge 0 -b 6 \ 12 | -clip 5 -p 100 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -ckpt checkpoints/baseline-sgcls/vgrel-11.tar \ 13 | -nepoch 50 -use_bias -test -cache baseline_predcls 14 | elif [ $1 == "1" ]; then 15 | echo "EVALING MESSAGE PASSING" 16 | python models/eval_rels.py -m sgcls -model stanford -b 6 -p 100 -lr 1e-3 -ngpu 1 -clip 5 \ 17 | -ckpt checkpoints/stanford-sgcls/vgrel-11.tar -test -cache stanford_sgcls 18 | python models/eval_rels.py -m predcls -model stanford -b 6 -p 100 -lr 1e-3 -ngpu 1 -clip 5 \ 19 | -ckpt checkpoints/stanford-sgcls/vgrel-11.tar -test -cache stanford_predcls 20 | elif [ $1 == "2" ]; then 21 | echo "EVALING MOTIFNET" 22 | python models/eval_rels.py -m sgcls -model motifnet -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \ 23 | -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -test -ckpt checkpoints/vgrel-motifnet-sgcls.tar -nepoch 50 -use_bias -cache motifnet_sgcls 24 | python models/eval_rels.py -m predcls -model motifnet -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \ 25 | -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -test -ckpt checkpoints/vgrel-motifnet-sgcls.tar -nepoch 50 -use_bias -cache motifnet_predcls 26 | fi 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /neural-motifs/scripts/eval_models_sgdet.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This is a script that will evaluate all the models for SGDET 4 | export CUDA_VISIBLE_DEVICES=$1 5 | 6 | if [ $1 == "0" ]; then 7 | echo "EVALING THE BASELINE" 8 | python models/eval_rels.py -m sgdet -model motifnet -nl_obj 0 -nl_edge 0 -b 6 \ 9 | -clip 5 -p 100 -pooling_dim 4096 -ngpu 1 -ckpt checkpoints/baseline-sgdet/vgrel-17.tar \ 10 | -nepoch 50 -use_bias -cache baseline_sgdet.pkl -test 11 | elif [ $1 == "1" ]; then 12 | echo "EVALING MESSAGE PASSING" 13 | 14 | python models/eval_rels.py -m sgdet -model stanford -b 6 -p 100 -lr 1e-3 -ngpu 1 -clip 5 \ 15 | -ckpt checkpoints/stanford-sgdet/vgrel-18.tar -cache stanford_sgdet.pkl -test 16 | elif [ $1 == "2" ]; then 17 | echo "EVALING MOTIFNET" 18 | python models/eval_rels.py -m sgdet -model motifnet -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \ 19 | -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -test -ckpt checkpoints/motifnet-sgdet/vgrel-14.tar -nepoch 50 -cache motifnet_sgdet.pkl -use_bias 20 | fi 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /neural-motifs/scripts/pretrain_detector.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Train the model without COCO pretraining 3 | python models/train_detector.py -b 6 -lr 1e-3 -save_dir checkpoints/vgdet -nepoch 50 -ngpu 3 -nwork 3 -p 100 -clip 5 4 | 5 | # If you want to evaluate on the frequency baseline now, run this command (replace the checkpoint with the 6 | # best checkpoint you found). 7 | #export CUDA_VISIBLE_DEVICES=0 8 | #python models/eval_rel_count.py -ngpu 1 -b 6 -ckpt checkpoints/vgdet/vg-24.tar -nwork 1 -p 100 -test 9 | #export CUDA_VISIBLE_DEVICES=1 10 | #python models/eval_rel_count.py -ngpu 1 -b 6 -ckpt checkpoints/vgdet/vg-28.tar -nwork 1 -p 100 -test 11 | #export CUDA_VISIBLE_DEVICES=2 12 | #python models/eval_rel_count.py -ngpu 1 -b 6 -ckpt checkpoints/vgdet/vg-28.tar -nwork 1 -p 100 -test 13 | # 14 | # 15 | -------------------------------------------------------------------------------- /neural-motifs/scripts/refine_for_detection.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Refine Motifnet for detection 4 | 5 | 6 | export CUDA_VISIBLE_DEVICES=$1 7 | 8 | if [ $1 == "0" ]; then 9 | echo "TRAINING THE BASELINE" 10 | python models/train_rels.py -m sgdet -model motifnet -nl_obj 0 -nl_edge 0 -b 6 \ 11 | -clip 5 -p 100 -pooling_dim 4096 -lr 1e-4 -ngpu 1 -ckpt checkpoints/baseline-sgcls/vgrel-11.tar -save_dir checkpoints/baseline-sgdet \ 12 | -nepoch 50 -use_bias 13 | elif [ $1 == "1" ]; then 14 | echo "TRAINING STANFORD" 15 | python models/train_rels.py -m sgdet -model stanford -b 6 -p 100 -lr 1e-4 -ngpu 1 -clip 5 \ 16 | -ckpt checkpoints/stanford-sgcls/vgrel-11.tar -save_dir checkpoints/stanford-sgdet 17 | elif [ $1 == "2" ]; then 18 | echo "Refining Motifnet for detection!" 19 | python models/train_rels.py -m sgdet -model motifnet -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \ 20 | -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-4 -ngpu 1 -ckpt checkpoints/motifnet-sgcls/vgrel-7.tar \ 21 | -save_dir checkpoints/motifnet-sgdet -nepoch 10 -use_bias 22 | fi -------------------------------------------------------------------------------- /neural-motifs/scripts/train_models_sgcls.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This is a script that will train all of the models for scene graph classification and then evaluate them. 4 | # export CUDA_VISIBLE_DEVICES=$1 5 | export CUDA_VISIBLE_DEVICES=1 6 | 7 | if [ $1 == "0" ]; then 8 | echo "TRAINING THE BASELINE" 9 | python models/train_rels.py -m sgcls -model motifnet -nl_obj 0 -nl_edge 0 -b 6 \ 10 | -clip 5 -p 100 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -ckpt checkpoints/vgdet/vg-24.tar -save_dir checkpoints/baseline2 \ 11 | -nepoch 50 -use_bias 12 | elif [ $1 == "1" ]; then 13 | echo "TRAINING MESSAGE PASSING" 14 | 15 | python models/train_rels.py -m sgcls -model stanford -b 6 -p 100 -lr 1e-3 -ngpu 1 -clip 5 \ 16 | -ckpt checkpoints/vgdet/vg-24.tar -save_dir checkpoints/stanford2 17 | elif [ $1 == "2" ]; then 18 | echo "TRAINING MOTIFNET" 19 | 20 | python models/train_rels.py -m sgcls -model motifnet -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \ 21 | -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -ckpt checkpoints/vg-faster-rcnn.tar \ 22 | -save_dir checkpoints/motifnet2 -nepoch 50 -use_bias 23 | fi 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /neural-motifs/scripts/train_motifnet.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Train Motifnet using different orderings 4 | 5 | export CUDA_VISIBLE_DEVICES=$1 6 | 7 | if [ $1 == "0" ]; then 8 | echo "TRAINING MOTIFNET V1" 9 | python models/train_rels.py -m sgcls -model motifnet -order size -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \ 10 | -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -ckpt checkpoints/vgdet/vg-24.tar \ 11 | -save_dir checkpoints/motifnet-size-sgcls -nepoch 50 -use_bias 12 | elif [ $1 == "1" ]; then 13 | echo "TRAINING MOTIFNET V2" 14 | python models/train_rels.py -m sgcls -model motifnet -order random -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \ 15 | -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -ckpt checkpoints/vgdet/vg-24.tar \ 16 | -save_dir checkpoints/motifnet-random-sgcls -nepoch 50 -use_bias 17 | elif [ $1 == "2" ]; then 18 | echo "TRAINING MOTIFNET V3" 19 | python models/train_rels.py -m sgcls -model motifnet -order confidence -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \ 20 | -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -ckpt checkpoints/vgdet/vg-24.tar \ 21 | -save_dir checkpoints/motifnet-conf-sgcls -nepoch 50 -use_bias 22 | fi -------------------------------------------------------------------------------- /neural-motifs/scripts/train_predcls.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd $(dirname $0)/.. 4 | 5 | run_baseline() { 6 | SEED=$1 7 | # No manual seed is set internally, so just give the 8 | # output directory a different name. 9 | python3 models/train_rels.py -m predcls -model motifnet \ 10 | -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \ 11 | -p 10 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 \ 12 | -ngpu 1 -ckpt checkpoints/vg-faster-rcnn.tar \ 13 | -save_dir checkpoints/baseline_predcls.$SEED \ 14 | -nepoch 30 -use_bias &> /dev/null & 15 | } 16 | 17 | run_lml() { 18 | TOPK=$1 19 | SEED=$2 20 | # No manual seed is set internally, so just give the 21 | # output directory a different name. 22 | python3 models/train_rels.py -m predcls -model motifnet \ 23 | -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \ 24 | -p 10 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 \ 25 | -ngpu 1 -ckpt checkpoints/vg-faster-rcnn.tar \ 26 | -save_dir checkpoints/lml_predcls.$TOPK.$SEED \ 27 | -nepoch 30 -use_bias -lml_topk $TOPK &> /dev/null & 28 | } 29 | 30 | run_entr() { 31 | TOPK=$1 32 | SEED=$2 33 | # No manual seed is set internally, so just give the 34 | # output directory a different name. 35 | python3 models/train_rels.py -m predcls -model motifnet \ 36 | -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \ 37 | -p 10 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 \ 38 | -ngpu 1 -ckpt checkpoints/vg-faster-rcnn.tar \ 39 | -save_dir checkpoints/entr_predcls.$TOPK.$SEED \ 40 | -nepoch 30 -use_bias -entr_topk $TOPK &> logs/entr.$TOPK.log & 41 | } 42 | 43 | run_ml() { 44 | SEED=$1 45 | # No manual seed is set internally, so just give the 46 | # output directory a different name. 47 | python3 models/train_rels.py -m predcls -model motifnet \ 48 | -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \ 49 | -p 10 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 \ 50 | -ngpu 1 -ckpt checkpoints/vg-faster-rcnn.tar \ 51 | -save_dir checkpoints/ml_predcls.$SEED \ 52 | -nepoch 30 -use_bias -ml_loss &> logs/ml.log & 53 | } 54 | 55 | SEED=0 56 | 57 | # export CUDA_VISIBLE_DEVICES=0 58 | # run_baseline $SEED 59 | 60 | # export CUDA_VISIBLE_DEVICES=1 61 | # run_lml 20 $SEED 62 | 63 | # export CUDA_VISIBLE_DEVICES=0 64 | # run_lml 50 $SEED 65 | 66 | # export CUDA_VISIBLE_DEVICES=1 67 | # run_lml 100 $SEED 68 | 69 | # export CUDA_VISIBLE_DEVICES=0 70 | # run_entr 20 $SEED 71 | 72 | # export CUDA_VISIBLE_DEVICES=1 73 | # run_entr 50 $SEED 74 | 75 | # export CUDA_VISIBLE_DEVICES=2 76 | # run_entr 100 $SEED 77 | 78 | export CUDA_VISIBLE_DEVICES=0 79 | run_ml 80 | 81 | wait 82 | -------------------------------------------------------------------------------- /neural-motifs/scripts/train_stanford.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | python models/train_rels.py -m sgcls -model stanford -b 4 -p 400 -lr 1e-4 -ngpu 1 -ckpt checkpoints/vgdet/vg-24.tar -save_dir checkpoints/stanford -adam 4 | 5 | # To test you can run this command 6 | # python models/eval_rels.py -m sgcls -model stanford -ngpu 1 -ckpt checkpoints/stanford/vgrel-28.tar -test 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | setup( 4 | name='lml', 5 | version='0.0.1', 6 | description="The limited multi-label projection layer.", 7 | author='Brandon Amos', 8 | author_email='brandon.amos.cs@gmail.com', 9 | platforms=['any'], 10 | license="MIT", 11 | url='https://github.com/locuslab/lml', 12 | py_modules=['lml'], 13 | install_requires=[ 14 | 'numpy>=1<2', 15 | 'semantic_version', 16 | ] 17 | ) 18 | -------------------------------------------------------------------------------- /smooth-topk/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | # experiments logged 104 | xp/ 105 | -------------------------------------------------------------------------------- /smooth-topk/README.md: -------------------------------------------------------------------------------- 1 | This directory and README are from the 2 | [oval-group/smooth-topk](https://github.com/oval-group/smooth-topk) 3 | repository. 4 | Our modifications remain under the same license. 5 | 6 | --- 7 | 8 | # Smooth Loss Functions for Deep Top-k Classification 9 | 10 | This repository contains the implementation of the paper [Smooth Loss Functions for Deep Top-k Classification](https://openreview.net/forum?id=Hk5elxbRW¬eId=Hk5elxbRW) in pytorch. If you use this work for your research, please cite the paper: 11 | 12 | ``` 13 | @Article{berrada2018smooth, 14 | author = {Berrada, Leonard and Zisserman, Andrew and Kumar, M Pawan}, 15 | title = {Smooth Loss Functions for Deep Top-k Classification}, 16 | journal = {International Conference on Learning Representations}, 17 | year = {2018}, 18 | } 19 | ``` 20 | 21 | The core algorithm to compute the smooth top-k loss function with the elementary symmetric polynomials is contained in the `losses` module. Algorithms for the elementary symmetric polynomials and their gradients are in `losses/polynomial`. 22 | 23 | ## Requirements 24 | 25 | This code has been tested for pytorch 0.3.0. Detailed requirements are available in `requirements.txt`. The code should be compatible with python 2 and 3 (developed in 2.7). 26 | 27 | ## Reproducing the results 28 | 29 | ### CIFAR-100 30 | 31 | To reproduce the experiments with gpu `1`: 32 | * `scripts/cifar100_noise_ce.sh 1` 33 | * `scripts/cifar100_noise_svm.sh 1` 34 | 35 | ### ImageNet 36 | 37 | We use the official validation set of ImageNet as a test set. Therefore we create our own balanced validation set made of 50,000 training images. This can be done with `scripts/imagenet_split.py`. 38 | 39 | To reproduce the experiments: 40 | * `scripts/imagenet_subsets_ce.sh` 41 | * `scripts/imagenet_subsets_svm.sh` 42 | 43 | Warning: these scripts will use all available GPUs. To restrict the devices used, use the environment variable `CUDA_VISIBLE_DEVICES`. For example, to train the SVM models on GPUS `0` and `1`, you can run `CUDA_VISIBLE_DEVICES=0,1 scripts/imagenet_subsets_svm.sh`. 44 | 45 | The performance of the resulting models can then be obtained by executing `python scripts/eval.py`. This script evaluates the performance of the best models and writes the results in a text file. 46 | 47 | ### Algorithms Comparison 48 | 49 | The script `scripts/perf.py` allows to compare the speed and numerical stability of different algorithms, including the standard algorithm to evaluate the Elementary Symmetric Functions (ESF). 50 | 51 | ## Acknowledgments 52 | 53 | The DenseNet implementation is from [densenet-pytorch](https://github.com/andreasveit/densenet-pytorch). 54 | 55 | -------------------------------------------------------------------------------- /smooth-topk/requirements.txt: -------------------------------------------------------------------------------- 1 | tqdm 2 | torch==0.3.0 3 | future 4 | scipy 5 | numpy 6 | git+git://github.com/oval-group/logger#egg=v0.3 7 | torchvision==0.2.0 8 | -------------------------------------------------------------------------------- /smooth-topk/src/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/smooth-topk/src/data/__init__.py -------------------------------------------------------------------------------- /smooth-topk/src/data/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.utils.data as data 3 | import torchvision.datasets as datasets 4 | 5 | from collections import defaultdict 6 | 7 | 8 | class LabelNoise(data.Dataset): 9 | def __init__(self, dataset, k, n_labels, p=1): 10 | 11 | assert n_labels % k == 0 12 | 13 | self.dataset = dataset 14 | self.k = k 15 | # random label between 0 and k-1 16 | self.noise = np.random.choice(k, size=len(self.dataset)) 17 | # noisy labels are introduced for each sample with probability p 18 | self.p = np.random.binomial(1, p, size=len(self.dataset)) 19 | 20 | print('Noisy labels (p={})'.format(p)) 21 | 22 | def __getitem__(self, idx): 23 | img, label = self.dataset[idx] 24 | if self.p[idx]: 25 | label = label - label % self.k + self.noise[idx] 26 | return img, label 27 | 28 | def __len__(self): 29 | return len(self.dataset) 30 | 31 | 32 | class Subset(data.Dataset): 33 | def __init__(self, dataset, indices=None): 34 | """ 35 | Subset of dataset given by indices. 36 | """ 37 | super(Subset, self).__init__() 38 | self.dataset = dataset 39 | self.indices = indices 40 | 41 | if self.indices is None: 42 | self.n_samples = len(self.dataset) 43 | else: 44 | self.n_samples = len(self.indices) 45 | assert self.n_samples >= 0 and \ 46 | self.n_samples <= len(self.dataset), \ 47 | "length of {} incompatible with dataset of size {}"\ 48 | .format(self.n_samples, len(self.dataset)) 49 | 50 | def __getitem__(self, idx): 51 | if self.indices is None: 52 | return self.dataset[idx] 53 | else: 54 | return self.dataset[self.indices[idx]] 55 | 56 | def __len__(self): 57 | return self.n_samples 58 | 59 | 60 | def random_subsets(subset_sizes, n_total, seed=None, replace=False): 61 | """ 62 | Return subsets of indices, with sizes given by the iterable 63 | subset_sizes, drawn from {0, ..., n_total - 1} 64 | Subsets may be distinct or not according to the replace option. 65 | Optional seed for deterministic draw. 66 | """ 67 | # save current random state 68 | state = np.random.get_state() 69 | sum_sizes = sum(subset_sizes) 70 | assert sum_sizes <= n_total 71 | 72 | np.random.seed(seed) 73 | 74 | total_subset = np.random.choice(n_total, size=sum_sizes, 75 | replace=replace) 76 | perm = np.random.permutation(total_subset) 77 | res = [] 78 | start = 0 79 | for size in subset_sizes: 80 | res.append(perm[start: start + size]) 81 | start += size 82 | # restore initial random state 83 | np.random.set_state(state) 84 | return res 85 | 86 | 87 | def split_dataset(dataset_train, dataset_val, train_size, val_size): 88 | if isinstance(dataset_train, datasets.ImageFolder): 89 | n_classes = len(dataset_train.classes) 90 | if train_size < len(dataset_train): 91 | train_size_per_class = train_size // n_classes 92 | else: 93 | train_size_per_class = float('inf') 94 | assert train_size_per_class > 0 95 | my_dict = defaultdict(list) 96 | [my_dict[e[1]].append(e[0]) for e in dataset_train.imgs] 97 | train_imgs = [] 98 | for k in my_dict.keys(): 99 | imgs = my_dict[k] 100 | adapted_train_size = min(train_size_per_class, len(imgs)) 101 | train_indices, = random_subsets((adapted_train_size,), 102 | len(imgs), 103 | seed=1234 + int(k)) 104 | train_imgs += [(imgs[idx], int(k)) for idx in train_indices] 105 | np.random.shuffle(train_imgs) 106 | dataset_train.imgs = train_imgs 107 | dataset_train.samples = train_imgs 108 | else: 109 | train_indices, val_indices = random_subsets((train_size, val_size), 110 | len(dataset_train), 111 | seed=1234) 112 | 113 | dataset_train = Subset(dataset_train, train_indices) 114 | dataset_val = Subset(dataset_val, val_indices) 115 | 116 | return dataset_train, dataset_val 117 | -------------------------------------------------------------------------------- /smooth-topk/src/epoch.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.autograd as ag 3 | 4 | from utils import regularization, accuracy, print_stats, dump_results 5 | from tqdm import tqdm 6 | 7 | 8 | def data_to_var(data, target, cuda, volatile=False): 9 | 10 | if cuda: 11 | data = data.cuda() 12 | target = target.cuda() 13 | data = ag.Variable(data, volatile=volatile) 14 | target = ag.Variable(target) 15 | 16 | return data, target 17 | 18 | 19 | def train(model, loss, optimizer, loader, xp, args): 20 | 21 | if args.use_dali: 22 | loader.reset() 23 | loader_len = loader._size // loader.batch_size 24 | else: 25 | loader_len = len(loader) 26 | 27 | if not loader_len: 28 | return 0 29 | 30 | model.train() 31 | 32 | xp.Parent_Train.reset() 33 | 34 | for batch_idx, batch in tqdm(enumerate(loader), desc='Train Epoch', 35 | leave=False, total=loader_len): 36 | if args.use_dali: 37 | data = batch[0]['data'] 38 | target = batch[0]['label'].squeeze().cuda().long() 39 | else: 40 | data, target = batch 41 | data, target = data_to_var(data, target, args.cuda) 42 | 43 | output = model(data) 44 | obj = loss(output, target) 45 | if obj.item() != obj.item(): 46 | print('NaN Erorr') 47 | import sys 48 | sys.exit(-1) 49 | 50 | optimizer.zero_grad() 51 | obj.backward() 52 | optimizer.step() 53 | 54 | prec1 = accuracy(output.data, target.data, topk=1) 55 | preck = accuracy(output.data, target.data, topk=xp.config['topk']) 56 | xp.Parent_Train.update(loss=obj.data.item(), acck=preck, acc1=prec1, n=data.size(0)) 57 | 58 | # compute objective function (including regularization) 59 | obj = xp.Loss_Train.get() + regularization(model, xp.mu) 60 | xp.Obj_Train.update(obj) 61 | # measure elapsed time 62 | xp.Timer_Train.update() 63 | 64 | xp.log_with_tag('train') 65 | 66 | if args.verbosity: 67 | print_stats(xp, 'train') 68 | 69 | 70 | def test(model, loss, loader, xp, args): 71 | if 'dali' in loader.__module__: 72 | loader.reset() 73 | loader_len = loader._size // loader.batch_size 74 | else: 75 | loader_len = len(loader) 76 | if not loader_len: 77 | return 0 78 | 79 | model.eval() 80 | 81 | metrics = xp.get_metric(tag=loader.tag, name='parent') 82 | timer = xp.get_metric(tag=loader.tag, name='timer') 83 | 84 | metrics.reset() 85 | 86 | if args.multiple_crops: 87 | epoch_test_multiple_crops(model, loader, xp, args.cuda) 88 | else: 89 | epoch_test(model, loader, xp, args.cuda) 90 | 91 | # measure elapsed time 92 | timer.update() 93 | xp.log_with_tag(loader.tag) 94 | 95 | if loader.tag == 'val': 96 | xp.Acc1_Val_Best.update(xp.acc1_val).log() 97 | xp.Acck_Val_Best.update(xp.acck_val).log() 98 | 99 | if args.verbosity: 100 | print_stats(xp, loader.tag) 101 | 102 | if args.eval: 103 | dump_results(xp, args) 104 | 105 | 106 | def epoch_test(model, loader, xp, cuda): 107 | if 'dali' in loader.__module__: 108 | loader_len = loader._size // loader.batch_size 109 | else: 110 | loader_len = len(loader) 111 | metrics = xp.get_metric(tag=loader.tag, name='parent') 112 | for batch_idx, batch in tqdm(enumerate(loader), desc='Test Epoch', 113 | leave=False, total=loader_len): 114 | if 'dali' in loader.__module__: 115 | data = batch[0]['data'] 116 | target = batch[0]['label'].squeeze().cuda().long() 117 | else: 118 | data, target = batch 119 | data, target = data_to_var(data, target, cuda, volatile=True) 120 | output = model(data) 121 | 122 | prec1 = accuracy(output.data, target.data, topk=1) 123 | preck = accuracy(output.data, target.data, topk=xp.config['topk']) 124 | metrics.update(acck=preck, acc1=prec1, n=data.size(0)) 125 | 126 | 127 | def epoch_test_multiple_crops(model, loader, xp, cuda): 128 | metrics = xp.get_metric(tag=loader.tag, name='parent') 129 | xp.Temperature.update() 130 | for batch_idx, (data, target) in tqdm(enumerate(loader), desc='Test Epoch', 131 | leave=False, total=len(loader)): 132 | 133 | target = ag.Variable(target.cuda()) 134 | avg = 0 135 | for img in data: 136 | img = ag.Variable(img.cuda(), volatile=True) 137 | output = model(img) 138 | # cross-entropy 139 | if xp.temperature == -1: 140 | avg += nn.functional.softmax(output).data 141 | # smooth-svm 142 | else: 143 | avg += output.data 144 | # avg += torch.exp(output.data / xp.temperature) 145 | 146 | prec1 = accuracy(avg, target.data, topk=1) 147 | preck = accuracy(avg, target.data, topk=xp.config['topk']) 148 | metrics.update(acck=preck, acc1=prec1, n=target.size(0)) 149 | -------------------------------------------------------------------------------- /smooth-topk/src/losses/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/smooth-topk/src/losses/__init__.py -------------------------------------------------------------------------------- /smooth-topk/src/losses/entr.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | class EntrLoss(nn.Module): 5 | def __init__(self, n_classes, k=5, tau=1.0): 6 | super(EntrLoss, self).__init__() 7 | self.n_classes = n_classes 8 | self.k = k 9 | self.tau = tau 10 | 11 | def forward(self, x, y): 12 | n_batch = x.shape[0] 13 | 14 | x = x/self.tau 15 | x_sorted, I = x.sort(dim=1, descending=True) 16 | x_sorted_last = x_sorted[:,self.k:] 17 | I_last = I[:,self.k:] 18 | 19 | fy = x.gather(1, y.unsqueeze(1)) 20 | J = (I_last != y.unsqueeze(1)).type_as(x) 21 | 22 | # Could potentially be improved numerically by using 23 | # \log\sum\exp{x_} = c + \log\sum\exp{x_-c} 24 | safe_z = torch.clamp(x_sorted_last-fy, max=80) 25 | losses = torch.log(1.+torch.sum(safe_z.exp()*J, dim=1)) 26 | 27 | return losses.mean() 28 | -------------------------------------------------------------------------------- /smooth-topk/src/losses/functional.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.autograd as ag 3 | 4 | from losses.polynomial.sp import log_sum_exp, LogSumExp 5 | from losses.logarithm import LogTensor 6 | from losses.utils import delta, split 7 | 8 | 9 | def Top1_Hard_SVM(labels, alpha=1.): 10 | def fun(x, y): 11 | # max oracle 12 | max_, _ = (x + delta(y, labels, alpha)).max(1) 13 | # subtract ground truth 14 | loss = max_ - x.gather(1, y[:, None]).squeeze() 15 | return loss 16 | return fun 17 | 18 | 19 | def Topk_Hard_SVM(labels, k, alpha=1.): 20 | def fun(x, y): 21 | x_1, x_2 = split(x, y, labels) 22 | 23 | max_1, _ = (x_1 + alpha).topk(k, dim=1) 24 | max_1 = max_1.mean(1) 25 | 26 | max_2, _ = x_1.topk(k - 1, dim=1) 27 | max_2 = (max_2.sum(1) + x_2) / k 28 | 29 | loss = torch.clamp(max_1 - max_2, min=0) 30 | 31 | return loss 32 | return fun 33 | 34 | 35 | def Top1_Smooth_SVM(labels, tau, alpha=1.): 36 | def fun(x, y): 37 | # add loss term and subtract ground truth score 38 | x = x + delta(y, labels, alpha) - x.gather(1, y[:, None]) 39 | # compute loss 40 | loss = tau * log_sum_exp(x / tau) 41 | 42 | return loss 43 | return fun 44 | 45 | 46 | def Topk_Smooth_SVM(labels, k, tau, alpha=1.): 47 | 48 | lsp = LogSumExp(k) 49 | 50 | def fun(x, y): 51 | x_1, x_2 = split(x, y, labels) 52 | # all scores are divided by (k * tau) 53 | x_1.div_(k * tau) 54 | x_2.div_(k * tau) 55 | 56 | # term 1: all terms that will *not* include the ground truth score 57 | # term 2: all terms that will include the ground truth score 58 | res = lsp(x_1) 59 | term_1, term_2 = res[1], res[0] 60 | term_1, term_2 = LogTensor(term_1), LogTensor(term_2) 61 | 62 | X_2 = LogTensor(x_2) 63 | cst = x_2.data.new(1).fill_(float(alpha) / tau) 64 | One_by_tau = LogTensor(ag.Variable(cst, requires_grad=False)) 65 | Loss_ = term_2 * X_2 66 | 67 | loss_pos = (term_1 * One_by_tau + Loss_).torch() 68 | loss_neg = Loss_.torch() 69 | loss = tau * (loss_pos - loss_neg) 70 | 71 | return loss 72 | return fun 73 | -------------------------------------------------------------------------------- /smooth-topk/src/losses/lml_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | import sys 5 | sys.path.append('../../') # TODO 6 | 7 | 8 | from lml import LML 9 | 10 | class LMLLoss(nn.Module): 11 | def __init__(self, n_classes, k=5, tau=1.0): 12 | super(LMLLoss, self).__init__() 13 | self.n_classes = n_classes 14 | self.k = k 15 | self.tau = tau 16 | 17 | def forward(self, x, y): 18 | n_batch = x.shape[0] 19 | 20 | p = LML(N=self.k, eps=1e-4)(x/self.tau) 21 | losses = -torch.log(p.gather(1, y.unsqueeze(1)) + 1e-8) 22 | return losses.mean() 23 | -------------------------------------------------------------------------------- /smooth-topk/src/losses/logarithm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.autograd as ag 3 | 4 | from numbers import Number 5 | 6 | 7 | def log(x, like): 8 | """ 9 | Get log-value of x. 10 | If x is a LogTensor, simply access its stored data 11 | If x is a Number, transform it to a tensor / variable, 12 | in the log space, with the same type and size as like. 13 | """ 14 | if isinstance(x, LogTensor): 15 | return x.torch() 16 | 17 | if not isinstance(x, Number): 18 | raise TypeError('Not supported type: received {}, ' 19 | 'was expected LogTensor or Number' 20 | .format(type(x))) 21 | 22 | # transform x to variable / tensor of 23 | # same type and size as like 24 | like_is_var = isinstance(like, ag.Variable) 25 | data = like.data if like_is_var else like 26 | new = data.new(1).fill_(x).log_().expand_as(data) 27 | new = ag.Variable(new) if like_is_var else new 28 | return new 29 | 30 | 31 | def _imul_inplace(x1, x2): 32 | return x1.add_(x2) 33 | 34 | 35 | def _imul_outofplace(x1, x2): 36 | return x1 + x2 37 | 38 | 39 | def _add_inplace(x1, x2): 40 | M = torch.max(x1, x2) 41 | M.add_(((x1 - M).exp_().add_((x2 - M).exp_())).log_()) 42 | return M 43 | 44 | 45 | def _add_outofplace(x1, x2): 46 | M = torch.max(x1, x2) 47 | return M + torch.log(torch.exp(x1 - M) + torch.exp(x2 - M)) 48 | 49 | 50 | class LogTensor(object): 51 | """ 52 | Stable log-representation for torch tensors 53 | _x stores the value in the log space 54 | """ 55 | def __init__(self, x): 56 | super(LogTensor, self).__init__() 57 | 58 | self.var = isinstance(x, ag.Variable) 59 | self._x = x 60 | self.add = _add_outofplace if self.var else _add_inplace 61 | self.imul = _imul_outofplace if self.var else _imul_inplace 62 | 63 | def __add__(self, other): 64 | other_x = log(other, like=self._x) 65 | return LogTensor(self.add(self._x, other_x)) 66 | 67 | def __imul__(self, other): 68 | other_x = log(other, like=self._x) 69 | self._x = self.imul(self._x, other_x) 70 | return self 71 | 72 | def __iadd__(self, other): 73 | other_x = log(other, like=self._x) 74 | self._x = self.add(self._x, other_x) 75 | return self 76 | 77 | def __radd__(self, other): 78 | """ 79 | Addition is commutative. 80 | """ 81 | return self.__add__(other) 82 | 83 | def __sub__(self, other): 84 | """ 85 | NB: assumes self - other > 0. 86 | Will return nan otherwise. 87 | """ 88 | other_x = log(other, like=self._x) 89 | diff = other_x - self._x 90 | x = self._x + log1mexp(diff) 91 | return LogTensor(x) 92 | 93 | def __pow__(self, power): 94 | return LogTensor(self._x * power) 95 | 96 | def __mul__(self, other): 97 | other_x = log(other, like=self._x) 98 | x = self._x + other_x 99 | return LogTensor(x) 100 | 101 | def __rmul__(self, other): 102 | """ 103 | Multiplication is commutative. 104 | """ 105 | return self.__mul__(other) 106 | 107 | def __div__(self, other): 108 | """ 109 | Division (python 2) 110 | """ 111 | other_x = log(other, like=self._x) 112 | x = self._x - other_x 113 | return LogTensor(x) 114 | 115 | def __truediv__(self, other): 116 | """ 117 | Division (python 3) 118 | """ 119 | return self.__div__(other) 120 | 121 | def torch(self): 122 | """ 123 | Returns value of tensor in torch format (either variable or tensor) 124 | """ 125 | return self._x 126 | 127 | def __repr__(self): 128 | tensor = self._x.data if self.var else self._x 129 | s = 'Log Tensor with value:\n{}'.format(tensor) 130 | return s 131 | 132 | 133 | def log1mexp(U, eps=1e-3): 134 | """ 135 | Compute log(1 - exp(u)) for u <= 0. 136 | """ 137 | res = torch.log1p(-torch.exp(U)) 138 | 139 | # |U| << 1 requires care for numerical stability: 140 | # 1 - exp(U) = -U + o(U) 141 | small = torch.lt(U.abs(), eps) 142 | res[small] = torch.log(-U[small]) 143 | 144 | return res 145 | -------------------------------------------------------------------------------- /smooth-topk/src/losses/main.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from losses.svm import SmoothSVM 3 | from losses.lml_loss import LMLLoss 4 | from losses.ml import MLLoss 5 | from losses.entr import EntrLoss 6 | 7 | def get_loss(xp, args): 8 | if args.loss == "svm": 9 | print("Using SVM loss") 10 | loss = SmoothSVM(n_classes=args.num_classes, k=args.topk, tau=args.tau, alpha=args.alpha) 11 | elif args.loss == 'ce': 12 | print("Using CE loss") 13 | loss = nn.CrossEntropyLoss() 14 | loss.tau = -1 15 | elif args.loss == 'lml': 16 | print("Using LML loss") 17 | loss = LMLLoss(n_classes=args.num_classes, k=args.topk, tau=args.tau) 18 | elif args.loss == 'ml': 19 | loss = MLLoss(n_classes=args.num_classes) 20 | elif args.loss == 'entr': 21 | print("Using truncated entr (Lapin) loss") 22 | loss = EntrLoss(n_classes=args.num_classes, k=args.topk, tau=args.tau) 23 | else: 24 | raise ValueError('Invalid choice of loss ({})'.format(args.loss)) 25 | 26 | xp.Temperature.set_fun(lambda: loss.tau) 27 | 28 | return loss 29 | -------------------------------------------------------------------------------- /smooth-topk/src/losses/ml.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | class MLLoss(nn.Module): 5 | def __init__(self, n_classes): 6 | super(MLLoss, self).__init__() 7 | self.n_classes = n_classes 8 | self.tau = 1.0 9 | 10 | def forward(self, x, y): 11 | n_batch = x.shape[0] 12 | y_onehot = torch.zeros(n_batch, self.n_classes).type_as(x) 13 | y_onehot.scatter_(1, y.unsqueeze(1), 1) 14 | loss = nn.BCEWithLogitsLoss()(x, y_onehot) 15 | return loss 16 | -------------------------------------------------------------------------------- /smooth-topk/src/losses/polynomial/__init__.py: -------------------------------------------------------------------------------- 1 | from losses.polynomial.sp import LogSumExp, log_sum_exp 2 | -------------------------------------------------------------------------------- /smooth-topk/src/losses/polynomial/divide_conquer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def divide_and_conquer(x, k, mul): 5 | """ 6 | Divide and conquer method for polynomial expansion 7 | x is a 2d tensor of size (n_classes, n_roots) 8 | The objective is to obtain the k first coefficients of the expanded 9 | polynomial 10 | """ 11 | 12 | to_merge = [] 13 | 14 | while x[0].dim() > 1 and x[0].size(0) > 1: 15 | size = x[0].size(0) 16 | half = size // 2 17 | if 2 * half < size: 18 | to_merge.append([t[-1] for t in x]) 19 | x = mul([t[:half] for t in x], 20 | [t[half: 2 * half] for t in x]) 21 | 22 | for row in to_merge: 23 | x = mul(x, row) 24 | x = torch.cat(x) 25 | return x 26 | -------------------------------------------------------------------------------- /smooth-topk/src/losses/polynomial/grad.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from future.builtins import range 4 | from losses.logarithm import LogTensor 5 | 6 | 7 | def recursion(S, X, j): 8 | """ 9 | Apply recursive formula to compute the gradient 10 | for coefficient of degree j. 11 | d S[j] / d X = S[j-1] - X * (S[j-2] - X * (S[j-3] - ...) ... ) 12 | = S[j-1] + X ** 2 * S[j-3] + ... 13 | - (X * S[j-2] + X ** 3 * S[j-4] + ...) 14 | """ 15 | 16 | # Compute positive and negative parts separately 17 | _P_ = sum(S[i] * X ** (j - 1 - i) for i in range(j - 1, -1, -2)) 18 | _N_ = sum(S[i] * X ** (j - 1 - i) for i in range(j - 2, -1, -2)) 19 | 20 | return _N_, _P_ 21 | 22 | 23 | def approximation(S, X, j, p): 24 | """ 25 | Compute p-th order approximation for d S[j] / d X: 26 | d S[j] / d X ~ S[j] / X - S[j + 1] / X ** 2 + ... 27 | + (-1) ** (p - 1) * S[j + p - 1] / X ** p 28 | """ 29 | 30 | # Compute positive and negative parts separately 31 | _P_ = sum(S[j + i] / X ** (i + 1) for i in range(0, p, 2)) 32 | _N_ = sum(S[j + i] / X ** (i + 1) for i in range(1, p, 2)) 33 | 34 | return _N_, _P_ 35 | 36 | 37 | def d_logS_d_expX(S, X, j, p, grad, thresh, eps=1e-5): 38 | """ 39 | Compute the gradient of log S[j] w.r.t. exp(X). 40 | For unstable cases, use p-th order approximnation. 41 | """ 42 | 43 | # ------------------------------------------------------------------------ 44 | # Detect unstabilites 45 | # ------------------------------------------------------------------------ 46 | 47 | _X_ = LogTensor(X) 48 | _S_ = [LogTensor(S[i]) for i in range(S.size(0))] 49 | 50 | # recursion of gradient formula (separate terms for stability) 51 | _N_, _P_ = recursion(_S_, _X_, j) 52 | 53 | # detect instability: small relative difference in log-space 54 | P, N = _P_.torch(), _N_.torch() 55 | diff = (P - N) / (N.abs() + eps) 56 | 57 | # split into stable and unstable indices 58 | u_indices = torch.lt(diff, thresh) # unstable 59 | s_indices = u_indices.eq(0) # stable 60 | 61 | # ------------------------------------------------------------------------ 62 | # Compute d S[j] / d X 63 | # ------------------------------------------------------------------------ 64 | 65 | # make grad match size and type of X 66 | grad = grad.type_as(X).resize_as_(X) 67 | 68 | # exact gradient for s_indices (stable) elements 69 | if s_indices.sum(): 70 | # re-use positive and negative parts of recursion (separate for stability) 71 | _N_ = LogTensor(_N_.torch()[s_indices]) 72 | _P_ = LogTensor(_P_.torch()[s_indices]) 73 | _X_ = LogTensor(X[s_indices]) 74 | _S_ = [LogTensor(S[i][s_indices]) for i in range(S.size(0))] 75 | 76 | # d log S[j] / d exp(X) = (d S[j] / d X) * X / S[j] 77 | _SG_ = (_P_ - _N_) * _X_ / _S_[j] 78 | grad.masked_scatter_(s_indices, _SG_.torch().exp()) 79 | 80 | # approximate gradients for u_indices (unstable) elements 81 | if u_indices.sum(): 82 | _X_ = LogTensor(X[u_indices]) 83 | _S_ = [LogTensor(S[i][u_indices]) for i in range(S.size(0))] 84 | 85 | # positive and negative parts of approximation (separate for stability) 86 | _N_, _P_ = approximation(_S_, _X_, j, p) 87 | 88 | # d log S[j] / d exp(X) = (d S[j] / d X) * X / S[j] 89 | _UG_ = (_P_ - _N_) * _X_ / _S_[j] 90 | grad.masked_scatter_(u_indices, _UG_.torch().exp()) 91 | 92 | return grad 93 | -------------------------------------------------------------------------------- /smooth-topk/src/losses/polynomial/multiplication.py: -------------------------------------------------------------------------------- 1 | import operator 2 | import itertools 3 | 4 | from future.builtins import range 5 | from functools import reduce 6 | from losses.logarithm import LogTensor 7 | 8 | 9 | def Multiplication(k): 10 | """ 11 | Generate a function that performs a polynomial multiplication and return coefficients up to degree k 12 | """ 13 | assert isinstance(k, int) and k > 0 14 | 15 | def isum(factors): 16 | init = next(factors) 17 | return reduce(operator.iadd, factors, init) 18 | 19 | def mul_function(x1, x2): 20 | 21 | # prepare indices for convolution 22 | l1, l2 = len(x1), len(x2) 23 | M = min(k + 1, l1 + l2 - 1) 24 | indices = [[] for _ in range(M)] 25 | for (i, j) in itertools.product(range(l1), range(l2)): 26 | if i + j >= M: 27 | continue 28 | indices[i + j].append((i, j)) 29 | 30 | # wrap with log-tensors for stability 31 | X1 = [LogTensor(x1[i]) for i in range(l1)] 32 | X2 = [LogTensor(x2[i]) for i in range(l2)] 33 | 34 | # perform convolution 35 | coeff = [] 36 | for c in range(M): 37 | coeff.append(isum(X1[i] * X2[j] for (i, j) in indices[c]).torch()) 38 | return coeff 39 | 40 | return mul_function 41 | -------------------------------------------------------------------------------- /smooth-topk/src/losses/polynomial/sp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.autograd as ag 4 | 5 | from losses.polynomial.divide_conquer import divide_and_conquer 6 | from losses.polynomial.multiplication import Multiplication 7 | from losses.polynomial.grad import d_logS_d_expX 8 | 9 | 10 | class LogSumExp(nn.Module): 11 | def __init__(self, k, p=None, thresh=1e-5): 12 | super(LogSumExp, self).__init__() 13 | self.k = k 14 | self.p = int(1 + 0.2 * k) if p is None else p 15 | self.mul = Multiplication(self.k + self.p - 1) 16 | self.thresh = thresh 17 | 18 | self.register_buffer('grad_k', torch.Tensor(0)) 19 | self.register_buffer('grad_km1', torch.Tensor(0)) 20 | 21 | self.buffers = (self.grad_km1, self.grad_k) 22 | 23 | def forward(self, x): 24 | f = LogSumExp_F(self.k, self.p, self.thresh, self.mul, self.buffers) 25 | return f(x) 26 | 27 | 28 | class LogSumExp_F(ag.Function): 29 | def __init__(self, k, p, thresh, mul, buffers): 30 | self.k = k 31 | self.p = p 32 | self.mul = mul 33 | self.thresh = thresh 34 | 35 | # unpack buffers 36 | self.grad_km1, self.grad_k = buffers 37 | 38 | def forward(self, x): 39 | """ 40 | Returns a matrix of size (2, n_samples) with sigma_{k-1} and sigma_{k} 41 | for each sample of the mini-batch. 42 | """ 43 | self.save_for_backward(x) 44 | 45 | # number of samples and number of coefficients to compute 46 | n_s = x.size(0) 47 | kp = self.k + self.p - 1 48 | 49 | assert kp <= x.size(1) 50 | 51 | # clone to allow in-place operations 52 | x = x.clone() 53 | 54 | # pre-compute normalization 55 | x_summed = x.sum(1) 56 | 57 | # invert in log-space 58 | x.t_().mul_(-1) 59 | 60 | # initialize polynomials (in log-space) 61 | x = [x, x.clone().fill_(0)] 62 | 63 | # polynomial multiplications 64 | log_res = divide_and_conquer(x, kp, mul=self.mul) 65 | 66 | # re-normalize 67 | coeff = log_res + x_summed[None, :] 68 | 69 | # avoid broadcasting issues (in particular if n_s = 1) 70 | coeff = coeff.view(kp + 1, n_s) 71 | 72 | # save all coeff for backward 73 | self.saved_coeff = coeff 74 | 75 | return coeff[self.k - 1: self.k + 1] 76 | 77 | def backward(self, grad_sk): 78 | """ 79 | Compute backward pass of LogSumExp. 80 | Python variables with an upper case first letter are in 81 | log-space, other are in standard space. 82 | """ 83 | 84 | # tensors from forward pass 85 | X, = self.saved_tensors 86 | S = self.saved_coeff 87 | 88 | # extend to shape (self.k + 1, n_samples, n_classes) for backward 89 | S = S.unsqueeze(2).expand(S.size(0), X.size(0), X.size(1)) 90 | 91 | # compute gradients for coeff of degree k and k - 1 92 | self.grad_km1 = d_logS_d_expX(S, X, self.k - 1, self.p, self.grad_km1, self.thresh) 93 | self.grad_k = d_logS_d_expX(S, X, self.k, self.p, self.grad_k, self.thresh) 94 | 95 | # chain rule: combine with incoming gradients (broadcast to all classes on third dim) 96 | grad_x = grad_sk[0, :, None] * self.grad_km1 + grad_sk[1, :, None] * self.grad_k 97 | 98 | return grad_x 99 | 100 | 101 | def log_sum_exp(x): 102 | """ 103 | Compute log(sum(exp(x), 1)) in a numerically stable way. 104 | Assumes x is 2d. 105 | """ 106 | max_score, _ = x.max(1) 107 | return max_score + torch.log(torch.sum(torch.exp(x - max_score[:, None]), 1)) 108 | -------------------------------------------------------------------------------- /smooth-topk/src/losses/svm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | import losses.functional as F 5 | 6 | from losses.utils import detect_large 7 | 8 | 9 | def SmoothSVM(n_classes, alpha=None, tau=1., k=5): 10 | if k == 1: 11 | return SmoothTop1SVM(n_classes, alpha, tau) 12 | else: 13 | return SmoothTopkSVM(n_classes, alpha, tau, k) 14 | 15 | 16 | class _SVMLoss(nn.Module): 17 | 18 | def __init__(self, n_classes, alpha): 19 | 20 | assert isinstance(n_classes, int) 21 | 22 | assert n_classes > 0 23 | assert alpha is None or alpha >= 0 24 | 25 | super(_SVMLoss, self).__init__() 26 | self.alpha = alpha if alpha is not None else 1 27 | self.register_buffer('labels', torch.from_numpy(np.arange(n_classes))) 28 | self.n_classes = n_classes 29 | self._tau = None 30 | 31 | def forward(self, x, y): 32 | 33 | raise NotImplementedError("Forward needs to be re-implemented for each loss") 34 | 35 | @property 36 | def tau(self): 37 | return self._tau 38 | 39 | @tau.setter 40 | def tau(self, tau): 41 | if self._tau != tau: 42 | print("Setting tau to {}".format(tau)) 43 | self._tau = float(tau) 44 | self.get_losses() 45 | 46 | def cuda(self, device=None): 47 | nn.Module.cuda(self, device) 48 | self.get_losses() 49 | 50 | def cpu(self): 51 | nn.Module.cpu() 52 | self.get_losses() 53 | 54 | 55 | class MaxTop1SVM(_SVMLoss): 56 | 57 | def __init__(self, n_classes, alpha=None): 58 | 59 | super(MaxTop1SVM, self).__init__(n_classes=n_classes, 60 | alpha=alpha) 61 | self.get_losses() 62 | 63 | def forward(self, x, y): 64 | return self.F(x, y).mean() 65 | 66 | def get_losses(self): 67 | self.F = F.Top1_Hard_SVM(self.labels, self.alpha) 68 | 69 | 70 | class MaxTopkSVM(_SVMLoss): 71 | 72 | def __init__(self, n_classes, alpha=None, k=5): 73 | 74 | super(MaxTopkSVM, self).__init__(n_classes=n_classes, 75 | alpha=alpha) 76 | self.k = k 77 | self.get_losses() 78 | 79 | def forward(self, x, y): 80 | return self.F(x, y).mean() 81 | 82 | def get_losses(self): 83 | self.F = F.Topk_Hard_SVM(self.labels, self.k, self.alpha) 84 | 85 | 86 | class SmoothTop1SVM(_SVMLoss): 87 | def __init__(self, n_classes, alpha=None, tau=1.): 88 | super(SmoothTop1SVM, self).__init__(n_classes=n_classes, 89 | alpha=alpha) 90 | self.tau = tau 91 | self.thresh = 1e3 92 | self.get_losses() 93 | 94 | def forward(self, x, y): 95 | smooth, hard = detect_large(x, 1, self.tau, self.thresh) 96 | 97 | loss = 0 98 | if smooth.data.sum(): 99 | x_s, y_s = x[smooth[:, None]], y[smooth] 100 | x_s = x_s.view(-1, x.size(1)) 101 | loss += self.F_s(x_s, y_s).sum() / x.size(0) 102 | if hard.data.sum(): 103 | x_h, y_h = x[hard[:, None]], y[hard] 104 | x_h = x_h.view(-1, x.size(1)) 105 | loss += self.F_h(x_h, y_h).sum() / x.size(0) 106 | 107 | return loss 108 | 109 | def get_losses(self): 110 | self.F_h = F.Top1_Hard_SVM(self.labels, self.alpha) 111 | self.F_s = F.Top1_Smooth_SVM(self.labels, self.tau, self.alpha) 112 | 113 | 114 | class SmoothTopkSVM(_SVMLoss): 115 | 116 | def __init__(self, n_classes, alpha=None, tau=1., k=5): 117 | super(SmoothTopkSVM, self).__init__(n_classes=n_classes, 118 | alpha=alpha) 119 | self.k = k 120 | self.tau = tau 121 | self.thresh = 1e3 122 | self.get_losses() 123 | 124 | def forward(self, x, y): 125 | smooth, hard = detect_large(x, self.k, self.tau, self.thresh) 126 | 127 | loss = 0 128 | if smooth.data.sum(): 129 | x_s, y_s = x[smooth], y[smooth] 130 | x_s = x_s.view(-1, x.size(1)) 131 | loss += self.F_s(x_s, y_s).sum() / x.size(0) 132 | if hard.data.sum(): 133 | x_h, y_h = x[hard], y[hard] 134 | x_h = x_h.view(-1, x.size(1)) 135 | loss += self.F_h(x_h, y_h).sum() / x.size(0) 136 | 137 | return loss 138 | 139 | def get_losses(self): 140 | self.F_h = F.Topk_Hard_SVM(self.labels, self.k, self.alpha) 141 | self.F_s = F.Topk_Smooth_SVM(self.labels, self.k, self.tau, self.alpha) 142 | -------------------------------------------------------------------------------- /smooth-topk/src/losses/utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | 4 | import torch.autograd as ag 5 | 6 | 7 | def delta(y, labels, alpha=None): 8 | """ 9 | Compute zero-one loss matrix for a vector of ground truth y 10 | """ 11 | 12 | if isinstance(y, ag.Variable): 13 | labels = ag.Variable(labels, requires_grad=False) 14 | 15 | delta = torch.ne(y[:, None], labels[None, :]).float() 16 | 17 | if alpha is not None: 18 | delta = alpha * delta 19 | return delta 20 | 21 | 22 | def split(x, y, labels): 23 | labels = ag.Variable(labels, requires_grad=False) 24 | mask = torch.ne(labels[None, :], y[:, None]) 25 | 26 | # gather result: 27 | # x_1: all scores that do contain the ground truth 28 | x_1 = x[mask].view(x.size(0), -1) 29 | # x_2: scores of the ground truth 30 | x_2 = x.gather(1, y[:, None]).view(-1) 31 | return x_1, x_2 32 | 33 | 34 | def detect_large(x, k, tau, thresh): 35 | top, _ = x.topk(k + 1, 1) 36 | # switch to hard top-k if (k+1)-largest element is much smaller 37 | # than k-largest element 38 | hard = torch.ge(top[:, k - 1] - top[:, k], k * tau * math.log(thresh)).detach() 39 | smooth = hard.eq(0) 40 | return smooth, hard 41 | -------------------------------------------------------------------------------- /smooth-topk/src/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # import waitGPU 4 | # waitGPU.wait(interval=10, nproc=0, ngpu=1) 5 | # import setGPU 6 | 7 | import argparse 8 | 9 | from cli import add_all_parsers, set_defaults 10 | import time 11 | import torch 12 | import logger 13 | 14 | from utils import create_experiment, get_optimizer, set_seed,\ 15 | update_optimizer, load_optimizer 16 | from data.main import get_loaders 17 | from epoch import train, test 18 | 19 | from losses.main import get_loss 20 | from models.main import get_model, load_model 21 | 22 | from setproctitle import setproctitle 23 | 24 | import sys 25 | from IPython.core import ultratb 26 | sys.excepthook = ultratb.FormattedTB(mode='Verbose', 27 | color_scheme='Linux', call_pdb=1) 28 | 29 | def run(args): 30 | tag = 'bamos.smooth-topk.seed={}.{}'.format(args.seed, args.dataset) 31 | if args.dataset == 'cifar100': 32 | tag += '.noise={}'.format(args.noise_labels) 33 | elif args.dataset == 'imagenet': 34 | tag += '-{}'.format(args.train_size) 35 | setproctitle(tag) 36 | 37 | set_seed(args.seed) 38 | xp = create_experiment(args) 39 | train_loader, val_loader, test_loader = get_loaders(args) 40 | loss = get_loss(xp, args) 41 | 42 | model = get_model(args) 43 | if args.load_model: 44 | load_model(model, args.load_model) 45 | 46 | if args.cuda: 47 | if args.parallel_gpu: 48 | model = torch.nn.DataParallel(model).cuda() 49 | else: 50 | torch.cuda.set_device(args.device) 51 | model.cuda() 52 | loss.cuda() 53 | 54 | optimizer = get_optimizer(model, args.mu, args.lr_0, xp) 55 | if args.load_optimizer: 56 | load_optimizer(optimizer, args.load_optimizer, args.lr_0) 57 | 58 | with logger.stdout_to("{}_log.txt".format(args.out_name)): 59 | clock = -time.time() 60 | for _ in range(args.epochs): 61 | 62 | xp.Epoch.update(1).log() 63 | optimizer = update_optimizer(args.lr_schedule, optimizer, 64 | model, loss, xp) 65 | 66 | xp.Learning_Rate.update().log() 67 | xp.Mu.update().log() 68 | xp.Temperature.update().log() 69 | 70 | train(model, loss, optimizer, train_loader, xp, args) 71 | test(model, loss, val_loader, xp, args) 72 | 73 | test(model, loss, test_loader, xp, args) 74 | clock += time.time() 75 | 76 | print("\nEvaluation time: \t {0:.2g} min".format(clock * 1. / 60)) 77 | 78 | 79 | if __name__ == '__main__': 80 | parser = argparse.ArgumentParser() 81 | add_all_parsers(parser) 82 | args = parser.parse_args() 83 | set_defaults(args) 84 | run(args) 85 | -------------------------------------------------------------------------------- /smooth-topk/src/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/smooth-topk/src/models/__init__.py -------------------------------------------------------------------------------- /smooth-topk/src/models/cifar.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch.nn as nn 3 | 4 | 5 | from models.densenet import DenseNet3 6 | 7 | 8 | class BasicConvNet(nn.Module): 9 | 10 | def __init__(self, dataset, planes=16): 11 | 12 | super(BasicConvNet, self).__init__() 13 | 14 | n_classes = 10 if dataset == 'cifar10' else 100 15 | 16 | self.p = planes 17 | 18 | conv1_1 = nn.Conv2d(3, self.p, 3, padding=1, bias=False) 19 | bn1_1 = nn.BatchNorm2d(self.p) 20 | conv1_2 = nn.Conv2d(self.p, self.p, 3, padding=1, bias=False) 21 | bn1_2 = nn.BatchNorm2d(self.p) 22 | 23 | conv2_1 = nn.Conv2d(self.p, self.p * 2, 3, padding=1, bias=False) 24 | bn2_1 = nn.BatchNorm2d(self.p * 2) 25 | conv2_2 = nn.Conv2d(self.p * 2, self.p * 2, 3, padding=1, bias=False) 26 | bn2_2 = nn.BatchNorm2d(self.p * 2) 27 | 28 | conv3_1 = nn.Conv2d(self.p * 2, self.p * 4, 3, padding=1, bias=False) 29 | bn3_1 = nn.BatchNorm2d(self.p * 4) 30 | conv3_2 = nn.Conv2d(self.p * 4, self.p * 4, 3, padding=1, bias=False) 31 | bn3_2 = nn.BatchNorm2d(self.p * 4) 32 | 33 | relu = nn.ReLU(inplace=True) 34 | maxpool = nn.MaxPool2d(2) 35 | avgpool = nn.AvgPool2d(4) 36 | 37 | self.base = nn.Sequential(conv1_1, bn1_1, relu, 38 | conv1_2, bn1_2, relu, maxpool, 39 | conv2_1, bn2_1, relu, 40 | conv2_2, bn2_2, relu, maxpool, 41 | conv3_1, bn3_1, relu, 42 | conv3_2, bn3_2, relu, maxpool, 43 | avgpool) 44 | 45 | self.fc = nn.Linear(self.p * 4, n_classes) 46 | 47 | for m in self.base.modules(): 48 | if isinstance(m, nn.Conv2d): 49 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 50 | m.weight.data.normal_(0, math.sqrt(2. / n)) 51 | elif isinstance(m, nn.BatchNorm2d): 52 | m.weight.data.fill_(1) 53 | m.bias.data.zero_() 54 | 55 | self.fc.bias.data.zero_() 56 | 57 | def forward(self, x): 58 | 59 | x = self.base(x) 60 | x = x.view(x.size(0), -1) 61 | x = self.fc(x) 62 | 63 | return x 64 | -------------------------------------------------------------------------------- /smooth-topk/src/models/main.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import models.cifar as cifar_models 3 | 4 | from models.parser import parse_model 5 | 6 | import torchvision.models as torch_models 7 | 8 | from collections import OrderedDict 9 | 10 | 11 | def get_model(args): 12 | parse_model(args) 13 | 14 | if args.dataset == 'imagenet': 15 | model = torch_models.__dict__[args.model]() 16 | args.model_name = args.model 17 | elif args.basic_model: 18 | model = cifar_models.BasicConvNet(args.dataset, args.planes) 19 | args.model_name = 'convnet_{}'.format(args.planes) 20 | else: 21 | model = cifar_models.DenseNet3(args.depth, args.num_classes, args.growth) 22 | args.model_name = 'densenet_{}_{}'.format(args.depth, args.growth) 23 | 24 | # Print the number of model parameters 25 | nparams = sum([p.data.nelement() for p in model.parameters()]) 26 | print('Number of model parameters: \t {}'.format(nparams)) 27 | 28 | return model 29 | 30 | 31 | def load_model(model, filename): 32 | # map location allows to load on CPU weights originally on GPU 33 | state_dict = torch.load(filename, map_location=lambda storage, loc: storage) 34 | # map from DataParallel to timple module if needed 35 | if 'DataParallel' in state_dict['model_repr']: 36 | new_state_dict = OrderedDict() 37 | for k, v in state_dict['model'].items(): 38 | name = k.replace("module.", "") 39 | new_state_dict[name] = v 40 | state_dict['model'] = new_state_dict 41 | model.load_state_dict(state_dict['model']) 42 | -------------------------------------------------------------------------------- /smooth-topk/src/models/parser.py: -------------------------------------------------------------------------------- 1 | def parse_model(args): 2 | if args.dataset == 'imagenet': 3 | pass 4 | elif 'basic' in args.model: 5 | parse_basic_convnet(args) 6 | elif 'densenet' in args.model: 7 | parse_densenet(args) 8 | 9 | 10 | def parse_basic_convnet(args): 11 | args.basic_model = 1 12 | args.densenet_model = 0 13 | 14 | param_str = args.model.replace("basic", "") 15 | param_str = param_str.replace("_", "-") 16 | args.planes = [int(p) for p in param_str.split("-") if p != ''].pop(0) 17 | 18 | 19 | def parse_densenet(args): 20 | args.densenet_model = 1 21 | args.basic_model = 0 22 | 23 | param_str = args.model.replace("densenet", "") 24 | param_str = param_str.replace("_", "-") 25 | args.depth, args.growth = \ 26 | [int(p) for p in param_str.split("-") if p != ''] 27 | -------------------------------------------------------------------------------- /smooth-topk/src/scripts/cifar100_noise_ce.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | device=$1 4 | 5 | echo "Using device" $device 6 | 7 | for p in 0 0.2 0.4 0.6 0.8 1 8 | do 9 | python main.py --dataset cifar100 --model densenet40-40 --device $device\ 10 | --out-name ../xp/cifar100/cifar100_${p}_ce --loss ce --noise $p --no-visdom; 11 | done -------------------------------------------------------------------------------- /smooth-topk/src/scripts/cifar100_noise_entr.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # echo "Using device" $device 4 | 5 | mkdir -p logs 6 | 7 | export CUDA_VISIBLE_DEVICES=0 8 | seed=2 9 | for p in 0.4 0.6; do 10 | # for p in 0.0 0.2; do 11 | python3 main.py --dataset cifar100 --model densenet40-40 \ 12 | --out-name ../xp/cifar100/cifar100_${p}_${seed}_entr \ 13 | --loss entr --noise $p --seed $seed \ 14 | --no-visdom --test-batch-size 64 &> /dev/null & 15 | export CUDA_VISIBLE_DEVICES=$(((CUDA_VISIBLE_DEVICES + 1) % 4)) 16 | done 17 | -------------------------------------------------------------------------------- /smooth-topk/src/scripts/cifar100_noise_lml.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # echo "Using device" $device 4 | 5 | mkdir -p logs 6 | 7 | for seed in 0 1 2 3; do 8 | # for p in 0.0 0.2 0.4 0.6 0.8 1.0; do 9 | export CUDA_VISIBLE_DEVICES=$seed 10 | for p in 0.4; do 11 | python3 main.py --dataset cifar100 --model densenet40-40 \ 12 | --out-name ../xp/cifar100/cifar100_${p}_${seed}_lml_v2 \ 13 | --loss lml --noise $p --seed $seed \ 14 | --no-visdom --test-batch-size 64 &> /dev/null & 15 | done 16 | done 17 | -------------------------------------------------------------------------------- /smooth-topk/src/scripts/cifar100_noise_ml.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # echo "Using device" $device 4 | 5 | mkdir -p logs 6 | 7 | # for p in 0.0 0.2 0.4 0.6 0.8 1.0; do 8 | # for p in 0.2 0.4 0.6 0.8 1.0; do 9 | for p in 0.2 0.4; do 10 | for seed in 0 3; do 11 | export CUDA_VISIBLE_DEVICES=$seed 12 | python3 main.py --dataset cifar100 --model densenet40-40 \ 13 | --out-name ../xp/cifar100/cifar100_${p}_${seed}_ml \ 14 | --loss ml --noise $p --seed $seed \ 15 | --no-visdom --test-batch-size 64 &> /dev/null & 16 | done 17 | wait 18 | done 19 | -------------------------------------------------------------------------------- /smooth-topk/src/scripts/cifar100_noise_svm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # echo "Using device" $device 4 | 5 | mkdir -p logs 6 | 7 | for seed in 0 1 2 3; do 8 | # for p in 0.0 0.2 0.4 0.6 0.8 1.0; do 9 | export CUDA_VISIBLE_DEVICES=$seed 10 | for p in 0.4 0.6; do 11 | python3 main.py --dataset cifar100 --model densenet40-40 \ 12 | --out-name ../xp/cifar100/cifar100_${p}_${seed}_svm \ 13 | --loss svm --noise $p --seed $seed \ 14 | --no-visdom --test-batch-size 64 &> /dev/null & 15 | done 16 | done 17 | -------------------------------------------------------------------------------- /smooth-topk/src/scripts/eval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import subprocess 5 | import pprint 6 | 7 | dataset = 'cifar100' 8 | xp_dir = '../xp/{}'.format(dataset) 9 | 10 | # multiple crops option 11 | if dataset == 'imagenet': 12 | crops_opt = '--multiple-crops' 13 | elif dataset == 'cifar100': 14 | crops_opt = '' 15 | else: 16 | raise ValueError 17 | 18 | for _, _, files in os.walk(xp_dir): 19 | to_analyze = sorted(filter( 20 | lambda x: 'best' in x and x.endswith('.pkl'), 21 | files 22 | )) 23 | n_analyze = len(to_analyze) 24 | print("Found {} files to evaluate:".format(n_analyze)) 25 | pp = pprint.PrettyPrinter(indent=4) 26 | msg = pp.pformat(to_analyze) 27 | print(msg) 28 | 29 | for idx, xp_file in enumerate(to_analyze): 30 | 31 | print('-' * 80) 32 | print('Evaluating {} ({} out of {})'.format(xp_file, idx + 1, n_analyze)) 33 | 34 | # find loss used for training 35 | if 'svm' in xp_file: 36 | loss = 'svm' 37 | elif 'ce' in xp_file: 38 | loss = 'ce' 39 | elif 'lml' in xp_file: 40 | loss = 'lml' 41 | elif '_ml' in xp_file: 42 | loss = 'ml' 43 | elif 'entr' in xp_file: 44 | loss = 'entr' 45 | else: 46 | raise ValueError('Could not parse loss name from filename') 47 | 48 | filename = os.path.join(xp_dir, xp_file) 49 | cmd = "python main.py --loss {} --load-model {} --dataset {} --eval --parallel-gpu {}"\ 50 | .format(loss, filename, dataset, crops_opt) 51 | cmd = cmd.split() 52 | subprocess.call(cmd) 53 | -------------------------------------------------------------------------------- /smooth-topk/src/scripts/imagenet_split.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import shutil 5 | import torchvision.datasets as datasets 6 | 7 | import sys 8 | sys.path.append('/nethome/bamos/2018-intel/smooth-topk/src') 9 | 10 | from collections import defaultdict 11 | from data.utils import random_subsets 12 | 13 | 14 | data_root = os.environ['VISION_DATA_SSD'] 15 | 16 | 17 | train_root = '{}/ILSVRC2012-prepr-split/images/train'.format(data_root) 18 | val_root = '{}/ILSVRC2012-prepr-split/images/val'.format(data_root) 19 | dataset_train = datasets.ImageFolder(train_root) 20 | 21 | if not os.path.exists(val_root): 22 | os.makedirs(val_root) 23 | else: 24 | assert len(os.listdir(val_root)) == 0, \ 25 | "{} is not empty: split already performed?".format(val_root) 26 | print("{} initially empty".format(val_root)) 27 | 28 | n_classes = len(dataset_train.classes) 29 | val_size_per_class = 50 30 | assert val_size_per_class > 0 31 | my_dict = defaultdict(list) 32 | [my_dict[e[1]].append(e[0]) for e in dataset_train.imgs] 33 | val_imgs = [] 34 | for k in my_dict.keys(): 35 | imgs = sorted(my_dict[k]) 36 | val_indices, = random_subsets((val_size_per_class,), 37 | len(imgs), 38 | seed=1234 + int(k)) 39 | val_imgs += [imgs[idx] for idx in val_indices] 40 | 41 | counter = dict() 42 | for img in val_imgs: 43 | id_ = img.split('/')[-2] 44 | if id_ in counter.keys(): 45 | counter[id_] += 1 46 | else: 47 | counter[id_] = 1 48 | 49 | balanced = len(set(counter.values())) == 1 50 | if balanced: 51 | print("data set is properly balanced") 52 | else: 53 | raise ValueError("data set should be balanced") 54 | 55 | print("Number of labels: {}".format(len(counter))) 56 | print("Number of images per label: {}".format(list(counter.values())[0])) 57 | 58 | print("Creating directories...") 59 | for new_dir in os.listdir(train_root): 60 | os.makedirs(os.path.join(val_root, new_dir)) 61 | 62 | for img in val_imgs: 63 | new_img = img.replace("train", "val") 64 | print("Moving {} to {}".format(img, new_img)) 65 | shutil.move(img, new_img) 66 | -------------------------------------------------------------------------------- /smooth-topk/src/scripts/imagenet_subsets.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SRC_DIR=$(dirname $0)/.. 4 | 5 | function run_imagenet() { 6 | cd $SRC_DIR 7 | 8 | local GPUS=$1 9 | local LOSS=$2 10 | local SIZE_TAG=$3 11 | local lr_0=$4 12 | local tau=$5 13 | local mu=$6 14 | local seed=$7 15 | 16 | ARGS="" 17 | case "$SIZE_TAG" in 18 | "64k") ARGS+="--train-size 64000";; 19 | "128k") ARGS="--train-size 128000";; 20 | "320k") ARGS="--train-size 320000";; 21 | "640k") ARGS="--train-size 640000";; 22 | "all") ;; 23 | *) echo "Unrecognized size.";; 24 | esac 25 | 26 | export CUDA_VISIBLE_DEVICES=$GPUS 27 | setopt shwordsplit 28 | ./main.py --dataset imagenet --loss $LOSS \ 29 | --out-name ../xp/imagenet/im${SIZE_TAG}_${LOSS}_lr=${lr_0}_mu=${mu}_tau=${tau} \ 30 | --parallel-gpu $ARGS --no-visdom \ 31 | --lr_0 $lr_0 --tau $tau --mu $mu --seed $seed \ 32 | --use_dali 33 | } 34 | -------------------------------------------------------------------------------- /smooth-topk/src/scripts/imagenet_subsets_ce.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # lr_0=0.1 4 | lr_0=1.0 5 | mu=2.5e-4 6 | 7 | cd $(dirname $0)/.. 8 | 9 | export CUDA_VISIBLE_DEVICES=3 10 | 11 | # python main.py --dataset imagenet --loss ce \ 12 | # --out-name ../xp/imagenet/im64k_ce_lr=${lr_0}_mu=$mu \ 13 | # --parallel-gpu --train-size 64000 --lr_0 $lr_0 --mu=$mu --no-visdom; 14 | 15 | # python main.py --dataset imagenet --loss ce \ 16 | # --out-name ../xp/imagenet/im128k_ce_lr=${lr_0}_mu=$mu \ 17 | # --parallel-gpu --train-size 128000 --lr_0 $lr_0 --mu=$mu --no-visdom; 18 | 19 | # python main.py --dataset imagenet --loss ce \ 20 | # --out-name ../xp/imagenet/im320k_ce_lr=${lr_0}_mu=$mu \ 21 | # --parallel-gpu --train-size 320000 --lr_0 $lr_0 --mu=$mu --no-visdom; 22 | 23 | # python main.py --dataset imagenet --loss ce \ 24 | # --out-name ../xp/imagenet/im640k_ce_lr=${lr_0}_mu=$mu \ 25 | # --parallel-gpu --train-size 640000 --lr_0 $lr_0 --mu=$mu \ 26 | # --no-visdom --use_dali 27 | 28 | # python main.py --dataset imagenet --loss ce \ 29 | # --out-name ../xp/imagenet/imall_ce_lr=${lr_0}_mu=$mu \ 30 | # --parallel-gpu --lr_0 $lr_0 --mu=$mu --no-visdom; 31 | -------------------------------------------------------------------------------- /smooth-topk/src/scripts/imagenet_subsets_entr.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | lr_0=1 4 | tau=1.0 5 | mu=0.00025 6 | 7 | cd $(dirname $0)/.. 8 | 9 | source ~/imagenet-fast.sh 10 | export CUDA_VISIBLE_DEVICES=3 11 | 12 | # ./main.py --dataset imagenet --loss entr \ 13 | # --out-name ../xp/imagenet/im64k_entr_lr=${lr_0}_mu=${mu}_tau=${tau} \ 14 | # --parallel-gpu --train-size 64000 --no-visdom \ 15 | # --lr_0 $lr_0 --tau $tau --mu $mu --use_dali 16 | 17 | # ./main.py --dataset imagenet --loss entr \ 18 | # --out-name ../xp/imagenet/im128k_entr_lr=${lr_0}_mu=${mu}_tau=${tau} \ 19 | # --parallel-gpu --train-size 128000 --no-visdom \ 20 | # --lr_0 $lr_0 --tau $tau --mu $mu --use_dali 21 | 22 | # ./main.py --dataset imagenet --loss entr \ 23 | # --out-name ../xp/imagenet/im320k_entr_lr=${lr_0}_mu=${mu}_tau=${tau} \ 24 | # --parallel-gpu --train-size 320000 --no-visdom \ 25 | # --lr_0 $lr_0 --tau $tau --mu $mu --use_dali 26 | 27 | # ./main.py --dataset imagenet --loss entr \ 28 | # --out-name ../xp/imagenet/im640k_entr_lr=${lr_0}_mu=${mu}_tau=${tau} \ 29 | # --parallel-gpu --train-size 640000 --no-visdom \ 30 | # --lr_0 $lr_0 --tau $tau --mu $mu --use_dali 31 | 32 | # ./main.py --dataset imagenet --loss entr \ 33 | # --out-name ../xp/imagenet/imall_entr_lr=${lr_0}_mu=${mu}_tau=${tau} \ 34 | # --parallel-gpu --no-visdom \ 35 | # --lr_0 $lr_0 --tau $tau --mu $mu --use_dali 36 | -------------------------------------------------------------------------------- /smooth-topk/src/scripts/imagenet_subsets_lml.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | lr_0=1 4 | tau=1. 5 | mu=0.00025 6 | 7 | cd $(dirname $0)/.. 8 | 9 | # source ~/imagenet-fast.sh 10 | # source ~/.private 11 | export CUDA_VISIBLE_DEVICES=2 12 | 13 | # ./main.py --dataset imagenet --loss lml \ 14 | # --out-name ../xp/imagenet/im64k_lml_lr=${lr_0}_mu=${mu}_tau=${tau} \ 15 | # --parallel-gpu --train-size 64000 --no-visdom \ 16 | # --lr_0 $lr_0 --tau $tau --mu $mu --use_dali 17 | 18 | # ./main.py --dataset imagenet --loss lml \ 19 | # --out-name ../xp/imagenet/im128k_lml_lr=${lr_0}_mu=${mu}_tau=${tau} \ 20 | # --parallel-gpu --train-size 128000 --no-visdom \ 21 | # --lr_0 $lr_0 --tau $tau --mu $mu --use_dali 22 | 23 | # ./main.py --dataset imagenet --loss lml \ 24 | # --out-name ../xp/imagenet/im320k_lml_lr=${lr_0}_mu=${mu}_tau=${tau} \ 25 | # --parallel-gpu --train-size 320000 --no-visdom \ 26 | # --lr_0 $lr_0 --tau $tau --mu $mu --use_dali 27 | 28 | # ./main.py --dataset imagenet --loss lml \ 29 | # --out-name ../xp/imagenet/im640k_lml_lr=${lr_0}_mu=${mu}_tau=${tau} \ 30 | # --parallel-gpu --train-size 640000 --no-visdom \ 31 | # --lr_0 $lr_0 --tau $tau --mu $mu --use_dali 32 | 33 | # ./main.py --dataset imagenet --loss lml \ 34 | # --out-name ../xp/imagenet/imall_lml_lr=${lr_0}_mu=${mu}_tau=${tau} \ 35 | # --parallel-gpu --no-visdom \ 36 | # --lr_0 $lr_0 --tau $tau --mu $mu --use_dali 37 | -------------------------------------------------------------------------------- /smooth-topk/src/scripts/imagenet_subsets_svm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | lr_0=1 4 | tau=0.1 5 | mu=2.5e-4 6 | 7 | cd $(dirname $0)/.. 8 | export CUDA_VISIBLE_DEVICES=0 9 | 10 | source ~/imagenet-fast.sh 11 | 12 | # python main.py --dataset imagenet --loss svm --out-name ../xp/imagenet/im64k_svm \ 13 | # --parallel-gpu --train-size 64000 --lr_0 $lr_0 --tau $tau --mu $mu --no-visdom; 14 | 15 | python main.py --dataset imagenet --loss svm --out-name ../xp/imagenet/im128k_svm \ 16 | --parallel-gpu --train-size 128000 --lr_0 $lr_0 --tau $tau --mu $mu --no-visdom; 17 | 18 | # python main.py --dataset imagenet --loss svm --out-name ../xp/imagenet/im320k_svm \ 19 | # --parallel-gpu --train-size 320000 --lr_0 $lr_0 --tau $tau --mu $mu --no-visdom; 20 | 21 | # python main.py --dataset imagenet --loss svm --out-name ../xp/imagenet/im640k_svm \ 22 | # --parallel-gpu --train-size 640000 --lr_0 $lr_0 --tau $tau --mu $mu --no-visdom; 23 | 24 | # python main.py --dataset imagenet --loss svm --out-name ../xp/imagenet/imall_svm \ 25 | # --parallel-gpu --lr_0 $lr_0 --tau $tau --mu $mu --no-visdom; 26 | -------------------------------------------------------------------------------- /smooth-topk/src/scripts/perf-all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd $(dirname $0)/.. 4 | 5 | export CUDA_VISIBLE_DEVICES=0 6 | export OMP_NUM_THREADS=1 7 | 8 | for NCLS in 1000 10000; do 9 | for K in 5 50 100; do 10 | ./scripts/perf.py --n_classes $NCLS --k $K --n_trials 50 11 | ./scripts/perf.py --n_classes $NCLS --k $K --n_trials 50 --no-cuda 12 | done 13 | done 14 | -------------------------------------------------------------------------------- /smooth-topk/src/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/locuslab/lml/cf8251e56332bfe89d2078b06c92d8d489352266/smooth-topk/src/tests/__init__.py -------------------------------------------------------------------------------- /smooth-topk/src/tests/py_ref.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.misc as sp 3 | import itertools 4 | 5 | from tests.utils import to_numpy 6 | 7 | 8 | def log1mexp_py(x): 9 | x = to_numpy(x).astype(np.float128) 10 | res = np.log(-np.expm1(x)) 11 | return res 12 | 13 | 14 | def max_svm_py(scores, y_truth, alpha=1.): 15 | 16 | scores = scores.data.numpy() 17 | y_truth = y_truth.data.numpy() 18 | 19 | objective = 0 20 | n_samples = scores.shape[0] 21 | n_classes = scores.shape[1] 22 | for i in range(n_samples): 23 | # find maximally violated constraint 24 | loss_augmented = np.array([scores[i, y] + alpha * int(y != y_truth[i]) 25 | for y in range(n_classes)]) 26 | y_star = np.argmax(loss_augmented) 27 | 28 | # update metrics 29 | delta = int(y_truth[i] != y_star) * alpha 30 | objective += max(delta + scores[i, y_star] - scores[i, y_truth[i]], 0) 31 | 32 | objective *= 1. / n_samples 33 | 34 | return objective 35 | 36 | 37 | def svm_topk_max_py(scores, y_truth, k): 38 | 39 | assert k > 1 40 | 41 | scores = scores.data.numpy() 42 | y_truth = y_truth.data.numpy() 43 | 44 | objective = 0 45 | n_samples = scores.shape[0] 46 | n_classes = scores.shape[1] 47 | for i in range(n_samples): 48 | # all scores for sample i except ground truth score 49 | scores_ = np.array([scores[i, y] for y in range(n_classes) 50 | if y != y_truth[i]]) 51 | 52 | # k maximal scores excluding y_truth + loss of 1 53 | obj_1 = np.mean(np.sort(scores_)[-k:]) + 1. 54 | 55 | # k - 1 maximal scores excluding y_truth + score of y_truth 56 | obj_2 = (np.sum(np.sort(scores_)[-k + 1:]) + scores[i, y_truth[i]]) / k 57 | 58 | # update metrics 59 | objective += max(obj_1, obj_2) - obj_2 60 | 61 | objective *= 1. / n_samples 62 | 63 | return objective 64 | 65 | 66 | def smooth_svm_py(x, y, tau): 67 | x, y = to_numpy(x), to_numpy(y) 68 | n_samples, n_classes = x.shape 69 | scores = x + np.not_equal(np.arange(n_classes)[None, :], y[:, None]) - \ 70 | x[np.arange(n_samples), y][:, None] 71 | loss = tau * np.mean(sp.logsumexp(scores / tau, axis=1)) 72 | return loss 73 | 74 | 75 | def sum_product_py(x, k): 76 | x = to_numpy(x) 77 | n_samples, n_classes = x.shape 78 | res = np.zeros(n_samples) 79 | for indices in itertools.combinations(range(n_classes), k): 80 | res += np.product(x[:, indices], axis=1) 81 | return res 82 | 83 | 84 | def svm_topk_smooth_py_1(x, y, tau, k): 85 | x, y = to_numpy(x), to_numpy(y) 86 | x = x.astype(np.float128) 87 | tau = float(tau) 88 | n_samples, n_classes = x.shape 89 | exp = np.exp(x * 1. / (k * tau)) 90 | 91 | term_1 = np.zeros(n_samples) 92 | for indices in itertools.combinations(range(n_classes), k): 93 | delta = 1. - np.sum(indices == y[:, None], axis=1) 94 | term_1 += np.product(exp[:, indices], axis=1) * np.exp(delta / tau) 95 | 96 | term_2 = np.zeros(n_samples) 97 | for i in range(n_samples): 98 | all_but_y = [j for j in range(n_classes) if j != y[i]] 99 | for indices in itertools.combinations(all_but_y, k - 1): 100 | term_2[i] += np.product(exp[i, indices]) * exp[i, y[i]] 101 | 102 | loss = tau * (np.log(term_1) - np.log(term_2)) 103 | 104 | return loss 105 | 106 | 107 | def svm_topk_smooth_py_2(x, y, tau, k): 108 | x, y = to_numpy(x), to_numpy(y) 109 | n_samples, n_classes = x.shape 110 | exp = np.exp(x * 1. / (k * tau)) 111 | 112 | term_1 = np.zeros(n_samples) 113 | for i in range(n_samples): 114 | all_but_y = [j for j in range(n_classes) if j != y[i]] 115 | for indices in itertools.combinations(all_but_y, k - 1): 116 | term_1[i] += np.product(exp[i, indices]) 117 | 118 | term_2 = np.zeros(n_samples) 119 | for i in range(n_samples): 120 | all_but_y = [j for j in range(n_classes) if j != y[i]] 121 | for indices in itertools.combinations(all_but_y, k): 122 | term_2[i] += np.product(exp[i, indices]) 123 | 124 | all_ = np.arange(n_samples) 125 | loss = tau * (np.log(term_1 * exp[all_, y] + np.exp(1. / tau) * term_2) - 126 | np.log(term_1 * exp[all_, y])) 127 | return loss 128 | -------------------------------------------------------------------------------- /smooth-topk/src/tests/test_log.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import unittest 4 | import numpy as np 5 | 6 | from losses.logarithm import LogTensor, log1mexp 7 | from tests.utils import assert_all_close, V 8 | from tests.py_ref import log1mexp_py 9 | 10 | 11 | class TestLogTensor(unittest.TestCase): 12 | 13 | def setUp(self): 14 | 15 | torch.manual_seed(1234) 16 | 17 | self.n_element = 50 18 | self.x = torch.randn(self.n_element).abs() 19 | self.y = torch.randn(self.n_element).abs() 20 | self.nonzero_const = np.random.rand() 21 | 22 | def testSumTensors(self): 23 | 24 | sum_ = LogTensor(V(self.x)) + LogTensor(V(self.y)) 25 | res_sb = sum_.torch() 26 | res_th = torch.log(torch.exp(self.x.double()) + 27 | torch.exp(self.y.double())) 28 | 29 | assert_all_close(res_th, res_sb) 30 | 31 | def testSumNonZero(self): 32 | 33 | sum_ = LogTensor(V(self.x)) + self.nonzero_const 34 | res_sb = sum_.torch() 35 | res_th = torch.log(torch.exp(self.x.double()) + 36 | self.nonzero_const) 37 | 38 | assert_all_close(res_th, res_sb) 39 | 40 | def testSumZero(self): 41 | 42 | sum_ = LogTensor(V(self.x)) + 0 43 | res_sb = sum_.torch() 44 | res_th = self.x 45 | 46 | assert_all_close(res_th, res_sb) 47 | 48 | def testMulTensors(self): 49 | 50 | sum_ = LogTensor(V(self.x)) * LogTensor(V(self.y)) 51 | res_sb = sum_.torch() 52 | res_th = self.x.double() + self.y.double() 53 | 54 | assert_all_close(res_th, res_sb) 55 | 56 | def testMulNonZero(self): 57 | 58 | sum_ = LogTensor(V(self.x)) * self.nonzero_const 59 | res_sb = sum_.torch() 60 | res_th = self.x.double() + math.log(self.nonzero_const) 61 | 62 | assert_all_close(res_th, res_sb) 63 | 64 | def testMulZero(self): 65 | 66 | sum_ = LogTensor(V(self.x)) * 0 67 | res_sb = sum_.torch() 68 | res_th = -np.inf * np.ones(res_sb.size()) 69 | 70 | assert_all_close(res_th, res_sb) 71 | 72 | 73 | class Test1MExp(unittest.TestCase): 74 | 75 | def setUp(self): 76 | torch.manual_seed(1234) 77 | shape = (100, 100) 78 | self.x = -torch.randn(shape).abs() 79 | 80 | 81 | def gen_test_exp1m(scale): 82 | def test(cls): 83 | x = cls.x * 10 ** scale 84 | res_th = log1mexp(x) 85 | res_py = log1mexp_py(x) 86 | assert_all_close(res_th, res_py, rtol=1e-4, atol=1e-5) 87 | return test 88 | 89 | 90 | def add_scale_tests_1mexp(): 91 | for scale in (-3, -2, -1, 0, 1, 2, 3, 4): 92 | test = gen_test_exp1m(scale) 93 | test_name = 'test_scale_{}'.format(str(scale)) 94 | setattr(Test1MExp, test_name, test) 95 | 96 | 97 | add_scale_tests_1mexp() 98 | -------------------------------------------------------------------------------- /smooth-topk/src/tests/test_losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import unittest 3 | import numpy as np 4 | 5 | from torch.autograd import Variable 6 | from losses.svm import SmoothTop1SVM, SmoothTopkSVM, MaxTop1SVM, MaxTopkSVM 7 | from losses.functional import Topk_Smooth_SVM 8 | from tests.utils import assert_all_close, V 9 | from tests.py_ref import svm_topk_smooth_py_1, svm_topk_smooth_py_2,\ 10 | smooth_svm_py, max_svm_py, svm_topk_max_py 11 | 12 | from torch.autograd.gradcheck import gradcheck 13 | 14 | 15 | class TestMaxSVM(unittest.TestCase): 16 | 17 | def setUp(self): 18 | 19 | torch.manual_seed(1234) 20 | np.random.seed(1234) 21 | 22 | self.n_samples = 20 23 | self.n_classes = 7 24 | self.alpha = 1. 25 | self.x = torch.randn(self.n_samples, self.n_classes) 26 | self.y = torch.from_numpy(np.random.randint(0, self.n_classes, 27 | size=self.n_samples)) 28 | self.k = 3 29 | 30 | def testMaxSVM(self): 31 | 32 | max_svm_th = MaxTop1SVM(self.n_classes, alpha=self.alpha) 33 | res_th = max_svm_th(V(self.x), V(self.y)) 34 | res_py = max_svm_py(V(self.x), V(self.y), alpha=self.alpha) 35 | 36 | assert_all_close(res_th, res_py) 37 | 38 | def testMaxSVMtopk(self): 39 | 40 | max_svm_th = MaxTopkSVM(self.n_classes, k=self.k) 41 | res_th = max_svm_th(V(self.x), V(self.y)) 42 | res_py = svm_topk_max_py(V(self.x), V(self.y), k=self.k) 43 | 44 | assert_all_close(res_th, res_py) 45 | 46 | 47 | class TestSmoothSVM(unittest.TestCase): 48 | 49 | def setUp(self): 50 | 51 | torch.manual_seed(1234) 52 | np.random.seed(1234) 53 | 54 | self.n_samples = 20 55 | self.n_classes = 7 56 | self.tau = float(2.) 57 | self.x = torch.randn(self.n_samples, self.n_classes) 58 | self.y = torch.from_numpy(np.random.randint(0, self.n_classes, 59 | size=self.n_samples)) 60 | 61 | def testSmoothSVM(self): 62 | 63 | smooth_svm_th = SmoothTop1SVM(self.n_classes, tau=self.tau) 64 | res_th = smooth_svm_th(V(self.x), V(self.y)) 65 | res_py = smooth_svm_py(V(self.x), V(self.y), self.tau) 66 | 67 | assert_all_close(res_th, res_py) 68 | 69 | 70 | class TestSmoothSVMTopk(unittest.TestCase): 71 | 72 | def setUp(self): 73 | 74 | torch.manual_seed(1234) 75 | np.random.seed(1234) 76 | 77 | self.n_samples = 2 78 | self.n_classes = 7 79 | self.k = 5 80 | self.tau = float(2.) 81 | self.x = torch.randn(self.n_samples, self.n_classes) 82 | self.y = torch.from_numpy(np.random.randint(0, self.n_classes, 83 | size=self.n_samples)) 84 | self.labels = torch.from_numpy(np.arange(self.n_classes)) 85 | 86 | def testSmoothSVMpy(self): 87 | 88 | res_py_1 = svm_topk_smooth_py_1(V(self.x), V(self.y), self.tau, self.k) 89 | res_py_2 = svm_topk_smooth_py_2(V(self.x), V(self.y), self.tau, self.k) 90 | 91 | assert_all_close(res_py_1, res_py_2) 92 | 93 | def testSmoothSVMth_functional(self): 94 | 95 | F = Topk_Smooth_SVM(self.labels, self.k, self.tau) 96 | res_th = F(V(self.x), V(self.y)) 97 | res_py = svm_topk_smooth_py_1(V(self.x), V(self.y), self.tau, self.k) 98 | 99 | assert_all_close(res_th, res_py) 100 | 101 | def testSmoothSVMth_loss(self): 102 | 103 | svm_topk_smooth_th = SmoothTopkSVM(self.n_classes, tau=self.tau, 104 | k=self.k) 105 | res_th = svm_topk_smooth_th(V(self.x), V(self.y)) 106 | res_py = svm_topk_smooth_py_1(V(self.x), 107 | V(self.y), 108 | self.tau, self.k).mean() 109 | 110 | assert_all_close(res_th, res_py) 111 | 112 | def testSmoothSVMth_loss_scales(self): 113 | 114 | svm_topk_smooth_th = SmoothTopkSVM(self.n_classes, tau=self.tau, k=self.k) 115 | for scale in (1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3): 116 | x = self.x * scale 117 | res_th = svm_topk_smooth_th(V(x), V(self.y)) 118 | res_py = svm_topk_smooth_py_1(V(x), V(self.y), self.tau, self.k).mean() 119 | assert_all_close(res_th, res_py) 120 | 121 | def testGradSmoothSVMth_loss(self): 122 | 123 | svm_topk_smooth_th = SmoothTopkSVM(self.n_classes, tau=self.tau, k=self.k) 124 | for scale in (1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4): 125 | x = self.x * scale 126 | x = Variable(x, requires_grad=True) 127 | assert gradcheck(lambda x: svm_topk_smooth_th(x, V(self.y)), 128 | (x,), atol=1e-2, rtol=1e-3, eps=max(1e-4 * scale, 1e-2)), \ 129 | "failed with scale {}".format(scale) 130 | -------------------------------------------------------------------------------- /smooth-topk/src/tests/test_sum_product.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import unittest 4 | import numpy as np 5 | 6 | from losses.polynomial import LogSumExp 7 | from tests.utils import assert_all_close, V 8 | from tests.py_ref import sum_product_py 9 | from tests.th_ref import log_sum_exp_k 10 | from torch.autograd import Variable, gradcheck 11 | 12 | 13 | class TestSumProduct(unittest.TestCase): 14 | 15 | def setUp(self): 16 | 17 | torch.set_printoptions(linewidth=160, threshold=1e3) 18 | 19 | seed = 7 20 | np.random.seed(1234) 21 | seed = np.random.randint(1e5) 22 | torch.manual_seed(seed) 23 | 24 | self.eps = 1e-4 25 | 26 | def testLogSumProductExp(self): 27 | 28 | self.n_samples = 25 29 | self.n_classes = 20 30 | self.k = 7 31 | self.x = torch.randn(self.n_samples, self.n_classes) 32 | 33 | res_th = LogSumExp(self.k, p=1)(V(self.x)).squeeze() 34 | res1_th, res2_th = res_th[0], res_th[1] 35 | res1_py = np.log(sum_product_py(V(torch.exp(self.x)), self.k - 1)) 36 | res2_py = np.log(sum_product_py(V(torch.exp(self.x)), self.k)) 37 | 38 | assert_all_close(res1_th, res1_py) 39 | assert_all_close(res2_th, res2_py) 40 | 41 | def test_backward(self): 42 | 43 | self.n_samples = 25 44 | self.n_classes = 1000 45 | self.k = 100 46 | self.k = 20 47 | self.x = torch.randn(self.n_samples, self.n_classes) 48 | self.x, _ = torch.sort(self.x, dim=1, descending=True) 49 | 50 | for tau in (5e-3, 1e-2, 5e-2, 1e-1, 5e-1, 1, 5, 1e1, 5e2, 1e3): 51 | x = self.x / (tau * self.k) 52 | top, _ = x.topk(self.k + 1, 1) 53 | thresh = 1e2 54 | hard = torch.ge(top[:, self.k - 1] - top[:, self.k], 55 | math.log(thresh)) 56 | smooth = hard.eq(0) 57 | 58 | x = x[smooth.unsqueeze(1).expand_as(x)].view(-1, x.size(1)) 59 | if not x.size(): 60 | print('empty tensor') 61 | return 62 | 63 | X_auto = Variable(x.double(), requires_grad=True) 64 | X_man = Variable(x, requires_grad=True) 65 | 66 | res1_auto, res2_auto = log_sum_exp_k(X_auto, self.k) 67 | res1_auto, res2_auto = res1_auto.squeeze(), res2_auto.squeeze() 68 | 69 | res_man = LogSumExp(self.k)(X_man).squeeze() 70 | res1_man = res_man[0] 71 | res2_man = res_man[1] 72 | 73 | proj1 = torch.randn(res1_auto.size()).fill_(1) 74 | proj2 = torch.randn(res2_auto.size()).fill_(1) 75 | 76 | proj_auto = torch.dot(V(proj1.double()), res1_auto) +\ 77 | torch.dot(V(proj2.double()), res2_auto) 78 | proj_man = torch.dot(V(proj1), res1_man) +\ 79 | torch.dot(V(proj2), res2_man) 80 | proj_auto.backward() 81 | proj_man.backward() 82 | 83 | # check forward 84 | assert_all_close(res1_auto, res1_man, atol=1e0, rtol=1e-3) 85 | assert_all_close(res2_auto, res2_man, atol=1e0, rtol=1e-3) 86 | 87 | # check backward 88 | assert_all_close(X_auto.grad, X_man.grad, atol=0.05, rtol=1e-2) -------------------------------------------------------------------------------- /smooth-topk/src/tests/th_ref.py: -------------------------------------------------------------------------------- 1 | from losses.polynomial.multiplication import Multiplication 2 | from losses.polynomial.divide_conquer import divide_and_conquer 3 | 4 | 5 | def log_sum_exp_k(x, k): 6 | # number of samples and number of coefficients to compute 7 | n_s = x.size(0) 8 | 9 | assert k <= x.size(1) 10 | 11 | # clone to allow in-place operations 12 | x = x.clone() 13 | 14 | # pre-compute normalization 15 | x_summed = x.sum(1) 16 | 17 | # invert in log-space 18 | x.t_().mul_(-1) 19 | 20 | # initialize polynomials (in log-space) 21 | x = [x, x.clone().fill_(0)] 22 | 23 | # polynomial mulitplications 24 | log_res = divide_and_conquer(x, k, mul=Multiplication(k)) 25 | 26 | # re-normalize 27 | coeff = log_res + x_summed[None, :] 28 | 29 | # avoid broadcasting issues (in particular if n_s = 1) 30 | coeff = coeff.view(k + 1, n_s) 31 | 32 | return coeff[k - 1: k + 1] 33 | -------------------------------------------------------------------------------- /smooth-topk/src/tests/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from torch.autograd import Variable 5 | 6 | 7 | def V(x, requires_grad=False): 8 | """ 9 | returns clone of tensor x wrapped in a Variable 10 | Avoids issue of inplace operations if x used in several functions 11 | """ 12 | assert torch.is_tensor(x) 13 | return Variable(x.clone(), requires_grad=requires_grad) 14 | 15 | 16 | def to_numpy(tensor): 17 | if isinstance(tensor, Variable): 18 | tensor = tensor.data 19 | if torch.is_tensor(tensor): 20 | tensor = tensor.clone().cpu().numpy() 21 | if not hasattr(tensor, '__len__'): 22 | tensor = np.array([tensor]) 23 | assert isinstance(tensor, np.ndarray) 24 | tensor = tensor.squeeze() 25 | return tensor 26 | 27 | 28 | def assert_all_close(tensor_1, tensor_2, rtol=1e-4, atol=1e-4): 29 | tensor_1 = to_numpy(tensor_1).astype(np.float64) 30 | tensor_2 = to_numpy(tensor_2).astype(np.float64) 31 | np.testing.assert_equal(np.isposinf(tensor_1), 32 | np.isposinf(tensor_2)) 33 | np.testing.assert_equal(np.isneginf(tensor_1), 34 | np.isneginf(tensor_2)) 35 | indices = np.isfinite(tensor_1) 36 | if indices.sum(): 37 | tensor_1 = tensor_1[indices] 38 | tensor_2 = tensor_2[indices] 39 | err = np.max(np.abs(tensor_1 - tensor_2)) 40 | err_msg = "Max abs error: {0:.3g}".format(err) 41 | np.testing.assert_allclose(tensor_1, tensor_2, rtol=rtol, atol=atol, 42 | err_msg=err_msg) 43 | --------------------------------------------------------------------------------