├── .gitignore ├── README.md ├── assets ├── overview.png └── results.png ├── cutpaste ├── __init__.py ├── anno.py ├── background.py ├── clip_postprocess.py ├── config │ ├── bg │ │ └── VOC │ │ │ └── 1shot │ │ │ ├── bg_template.yaml │ │ │ ├── bg_template_plus_dalle.yaml │ │ │ └── real.yaml │ ├── config.yaml │ ├── ds │ │ └── VOC.yaml │ ├── exp │ │ ├── demo_cutpaste.yaml │ │ ├── demo_syn+real.yaml │ │ ├── demo_syn.yaml │ │ └── demo_synFg.yaml │ ├── fg │ │ └── VOC │ │ │ ├── 1shot │ │ │ └── real.yaml │ │ │ └── fg_template.yaml │ └── paster │ │ └── defaults.yaml ├── foreground.py ├── paste.py ├── paster.py ├── pb.py ├── pyblur3 │ ├── BoxBlur.py │ ├── DefocusBlur.py │ ├── GaussianBlur.py │ ├── LineDictionary.py │ ├── LinearMotionBlur.py │ ├── PsfBlur.py │ ├── RandomizedBlur.py │ ├── __init__.py │ └── psf.pkl └── utils.py ├── data ├── test_data │ ├── VOC2012 │ │ ├── Annotations │ │ │ ├── 2007_000039.xml │ │ │ ├── 2007_000063.xml │ │ │ ├── 2007_000648.xml │ │ │ ├── 2007_001420.xml │ │ │ ├── 2007_001709.xml │ │ │ ├── 2007_001901.xml │ │ │ ├── 2007_002216.xml │ │ │ ├── 2007_002668.xml │ │ │ ├── 2007_002669.xml │ │ │ ├── 2007_002845.xml │ │ │ ├── 2007_003207.xml │ │ │ ├── 2007_003565.xml │ │ │ ├── 2007_003778.xml │ │ │ ├── 2007_003876.xml │ │ │ ├── 2007_004166.xml │ │ │ ├── 2007_005273.xml │ │ │ ├── 2007_005702.xml │ │ │ ├── 2007_006303.xml │ │ │ ├── 2007_006400.xml │ │ │ └── 2007_006673.xml │ │ ├── JPEGImages │ │ │ ├── 2007_000039.jpg │ │ │ ├── 2007_000063.jpg │ │ │ ├── 2007_000648.jpg │ │ │ ├── 2007_001420.jpg │ │ │ ├── 2007_001709.jpg │ │ │ ├── 2007_001901.jpg │ │ │ ├── 2007_002216.jpg │ │ │ ├── 2007_002668.jpg │ │ │ ├── 2007_002669.jpg │ │ │ ├── 2007_002845.jpg │ │ │ ├── 2007_003207.jpg │ │ │ ├── 2007_003565.jpg │ │ │ ├── 2007_003778.jpg │ │ │ ├── 2007_003876.jpg │ │ │ ├── 2007_004166.jpg │ │ │ ├── 2007_005273.jpg │ │ │ ├── 2007_005702.jpg │ │ │ ├── 2007_006303.jpg │ │ │ ├── 2007_006400.jpg │ │ │ └── 2007_006673.jpg │ │ └── SegmentationObject │ │ │ ├── 2007_000039.png │ │ │ ├── 2007_000063.png │ │ │ ├── 2007_000648.png │ │ │ ├── 2007_001420.png │ │ │ ├── 2007_001709.png │ │ │ ├── 2007_001901.png │ │ │ ├── 2007_002216.png │ │ │ ├── 2007_002668.png │ │ │ ├── 2007_002669.png │ │ │ ├── 2007_002845.png │ │ │ ├── 2007_003207.png │ │ │ ├── 2007_003565.png │ │ │ ├── 2007_003778.png │ │ │ ├── 2007_003876.png │ │ │ ├── 2007_004166.png │ │ │ ├── 2007_005273.png │ │ │ ├── 2007_005702.png │ │ │ ├── 2007_006303.png │ │ │ ├── 2007_006400.png │ │ │ └── 2007_006673.png │ ├── background │ │ ├── bg_template │ │ │ ├── forest │ │ │ │ └── A real photo of forest │ │ │ │ │ ├── 589.png │ │ │ │ │ ├── 590.png │ │ │ │ │ ├── 591.png │ │ │ │ │ ├── 593.png │ │ │ │ │ └── 598.png │ │ │ └── railway without train │ │ │ │ └── A real photo of railway without train │ │ │ │ ├── 367.png │ │ │ │ ├── 373.png │ │ │ │ ├── 429.png │ │ │ │ └── 475.png │ │ └── context_augment │ │ │ ├── 2007_000504.jpg │ │ │ └── a colored photo of an empty pile of trash on the ground │ │ │ │ └── 49.png │ │ │ ├── 2007_001609.jpg │ │ │ └── a colored photo of an empty living room │ │ │ │ └── 69.png │ │ │ ├── 2007_001764.jpg │ │ │ └── a real image of an empty grass covered field │ │ │ │ └── 71.png │ │ │ ├── 2007_002227.jpg │ │ │ └── a real image of an empty wall in a living room │ │ │ │ └── 45.png │ │ │ ├── 2007_002281.jpg │ │ │ └── a colored photo of an empty street next to a forest │ │ │ │ └── 79.png │ │ │ ├── 2007_002967.jpg │ │ │ └── a real image of an empty doorstep │ │ │ │ └── 58.png │ │ │ ├── 2007_003451.jpg │ │ │ └── a real image of an empty living room filled with furniture and a large window │ │ │ │ └── 40.png │ │ │ ├── 2007_005124.jpg │ │ │ └── a real image of an empty grass field │ │ │ │ └── 58.png │ │ │ ├── 2007_006136.jpg │ │ │ └── a real image of an empty grass near a forest │ │ │ │ └── 33.png │ │ │ ├── 2007_007585.jpg │ │ │ └── a real image of an empty grass field │ │ │ │ └── 67.png │ │ │ └── clip_postprocessed.json │ ├── foreground │ │ ├── foreground_mask │ │ │ ├── aeroplane_mask │ │ │ │ ├── The picture of an airplane on a pure background422.png │ │ │ │ ├── an airplane in a pure background449.png │ │ │ │ └── an airplane106.png │ │ │ ├── bicycle_mask │ │ │ │ ├── a bicycle in a pure background122.png │ │ │ │ ├── a bicycle in a pure background128.png │ │ │ │ └── a bicycle in a pure background225.png │ │ │ ├── bird_mask │ │ │ │ ├── a bird in a pure background162.png │ │ │ │ ├── a bird in a pure background293.png │ │ │ │ └── a bird in a pure background374.png │ │ │ ├── boat_mask │ │ │ │ ├── The picture of a boat on a pure background39.png │ │ │ │ ├── a boat in a pure background371.png │ │ │ │ └── a boat79.png │ │ │ ├── bottle_mask │ │ │ │ ├── The picture of a bottle on a pure background407.png │ │ │ │ ├── a bottle in a pure background108.png │ │ │ │ └── a bottle89.png │ │ │ ├── bus_mask │ │ │ │ ├── The picture of a bus on a pure background460.png │ │ │ │ ├── a bus105.png │ │ │ │ └── a bus410.png │ │ │ ├── car_mask │ │ │ │ ├── The picture of a car on a pure background100.png │ │ │ │ ├── The picture of a car on a pure background286.png │ │ │ │ └── a car97.png │ │ │ ├── cat_mask │ │ │ │ ├── a cat245.png │ │ │ │ ├── a cat58.png │ │ │ │ └── a cat62.png │ │ │ ├── chair_mask │ │ │ │ ├── The picture of a chair on a pure background423.png │ │ │ │ ├── a chair in a pure background301.png │ │ │ │ └── a chair402.png │ │ │ ├── cow_mask │ │ │ │ ├── a cow in a pure background9.png │ │ │ │ ├── a cow223.png │ │ │ │ └── a cow56.png │ │ │ ├── diningtable_mask │ │ │ │ ├── a dining table in a pure background63.png │ │ │ │ ├── a table123.png │ │ │ │ └── a table300.png │ │ │ ├── dog_mask │ │ │ │ ├── The picture of a dog on a pure background236.png │ │ │ │ ├── a dog in a pure background487.png │ │ │ │ └── a dog121.png │ │ │ ├── horse_mask │ │ │ │ ├── The picture of a horse on a pure background469.png │ │ │ │ ├── a horse in a pure background293.png │ │ │ │ └── a horse298.png │ │ │ ├── motorbike_mask │ │ │ │ ├── a motorbike in a pure background367.png │ │ │ │ ├── a motorbike in a pure background421.png │ │ │ │ └── a motorbike315.png │ │ │ ├── person_mask │ │ │ │ ├── a man146.png │ │ │ │ ├── a man253.png │ │ │ │ └── a man345.png │ │ │ ├── pottedplant_mask │ │ │ │ ├── a potted plant11.png │ │ │ │ ├── a potted plant374.png │ │ │ │ └── a potted plant434.png │ │ │ ├── sheep_mask │ │ │ │ ├── The picture of a sheep on a pure background212.png │ │ │ │ ├── a sheep in a pure background219.png │ │ │ │ └── a sheep351.png │ │ │ ├── sofa_mask │ │ │ │ ├── a sofa in a pure background122.png │ │ │ │ ├── a sofa in a pure background353.png │ │ │ │ └── a sofa in a pure background395.png │ │ │ ├── train_mask │ │ │ │ ├── The picture of a train on a pure background303.png │ │ │ │ ├── The picture of a train on a pure background32.png │ │ │ │ └── a train in a pure background133.png │ │ │ └── tvmonitor_mask │ │ │ │ ├── a tv monitor251.png │ │ │ │ ├── an old monitor107.png │ │ │ │ └── an old monitor426.png │ │ └── foreground_rgb │ │ │ ├── a bicycle │ │ │ └── a bicycle in a pure background │ │ │ │ ├── 122.png │ │ │ │ ├── 128.png │ │ │ │ └── 225.png │ │ │ ├── a bird │ │ │ └── a bird in a pure background │ │ │ │ ├── 162.png │ │ │ │ ├── 293.png │ │ │ │ └── 374.png │ │ │ ├── a boat │ │ │ ├── The picture of a boat on a pure background │ │ │ │ └── 39.png │ │ │ ├── a boat in a pure background │ │ │ │ └── 371.png │ │ │ └── a boat │ │ │ │ └── 79.png │ │ │ ├── a bottle │ │ │ ├── The picture of a bottle on a pure background │ │ │ │ └── 407.png │ │ │ ├── a bottle in a pure background │ │ │ │ └── 108.png │ │ │ └── a bottle │ │ │ │ └── 89.png │ │ │ ├── a bus │ │ │ ├── The picture of a bus on a pure background │ │ │ │ └── 460.png │ │ │ └── a bus │ │ │ │ ├── 105.png │ │ │ │ └── 410.png │ │ │ ├── a car │ │ │ ├── The picture of a car on a pure background │ │ │ │ ├── 100.png │ │ │ │ └── 286.png │ │ │ └── a car │ │ │ │ └── 97.png │ │ │ ├── a cat │ │ │ └── a cat │ │ │ │ ├── 245.png │ │ │ │ ├── 58.png │ │ │ │ └── 62.png │ │ │ ├── a chair │ │ │ ├── The picture of a chair on a pure background │ │ │ │ └── 423.png │ │ │ ├── a chair in a pure background │ │ │ │ └── 301.png │ │ │ └── a chair │ │ │ │ └── 402.png │ │ │ ├── a cow │ │ │ ├── a cow in a pure background │ │ │ │ └── 9.png │ │ │ └── a cow │ │ │ │ ├── 223.png │ │ │ │ └── 56.png │ │ │ ├── a dining table │ │ │ └── a dining table in a pure background │ │ │ │ └── 63.png │ │ │ ├── a dog │ │ │ ├── The picture of a dog on a pure background │ │ │ │ └── 236.png │ │ │ ├── a dog in a pure background │ │ │ │ └── 487.png │ │ │ └── a dog │ │ │ │ └── 121.png │ │ │ ├── a horse │ │ │ ├── The picture of a horse on a pure background │ │ │ │ └── 469.png │ │ │ ├── a horse in a pure background │ │ │ │ └── 293.png │ │ │ └── a horse │ │ │ │ └── 298.png │ │ │ ├── a man │ │ │ └── a man │ │ │ │ ├── 146.png │ │ │ │ ├── 253.png │ │ │ │ └── 345.png │ │ │ ├── a motorbike │ │ │ ├── a motorbike in a pure background │ │ │ │ ├── 367.png │ │ │ │ └── 421.png │ │ │ └── a motorbike │ │ │ │ └── 315.png │ │ │ ├── a potted plant │ │ │ └── a potted plant │ │ │ │ ├── 11.png │ │ │ │ ├── 374.png │ │ │ │ └── 434.png │ │ │ ├── a sheep │ │ │ ├── The picture of a sheep on a pure background │ │ │ │ └── 212.png │ │ │ ├── a sheep in a pure background │ │ │ │ └── 219.png │ │ │ └── a sheep │ │ │ │ └── 351.png │ │ │ ├── a sofa │ │ │ └── a sofa in a pure background │ │ │ │ ├── 122.png │ │ │ │ ├── 353.png │ │ │ │ └── 395.png │ │ │ ├── a table │ │ │ └── a table │ │ │ │ ├── 123.png │ │ │ │ └── 300.png │ │ │ ├── a train │ │ │ ├── The picture of a train on a pure background │ │ │ │ ├── 303.png │ │ │ │ └── 32.png │ │ │ └── a train in a pure background │ │ │ │ └── 133.png │ │ │ ├── a tv monitor │ │ │ └── a tv monitor │ │ │ │ └── 251.png │ │ │ ├── an airplane │ │ │ ├── The picture of an airplane on a pure background │ │ │ │ └── 422.png │ │ │ ├── an airplane in a pure background │ │ │ │ └── 449.png │ │ │ └── an airplane │ │ │ │ └── 106.png │ │ │ └── an old monitor │ │ │ └── an old monitor │ │ │ ├── 107.png │ │ │ └── 426.png │ ├── labels.txt │ └── train_cls-1shot.txt └── voc2012 │ ├── 1k_10_shot.json │ ├── 1k_1_shot.json │ ├── label2id.json │ ├── labels.txt │ ├── prompt_replace_original_200_selected.json │ ├── test.txt │ ├── train_1k_cls.txt │ ├── train_aug.txt │ ├── train_cls-10shot.txt │ ├── train_cls-1shot.txt │ ├── train_cls.txt │ └── val_cls.txt ├── detection ├── train.py ├── utils.py └── wandb_writer.py ├── instance_seg ├── run.sh ├── seg.py └── seg_lazy.py ├── requirements.txt ├── t2i_generate ├── background_captions.py ├── foreground_captions.py └── stable_diffusion2.py └── viz └── viz.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | cutpaste/outputs 3 | artifact/ 4 | viz/out 5 | **/__pycache__/ 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Text2Image-for-Detection 2 | 3 | Official Implementation for ["DALL-E for Detection: Language-driven Compositional Image Synthesis for Object Detection"](https://arxiv.org/pdf/2309.05956.pdf) 4 | 5 | Extention version: ["Beyond Generation: Harnessing Text to Image Models for Object Detection and Segmentation"](https://arxiv.org/pdf/2309.05956.pdf) 6 | 7 |
8 | Yunhao Ge*,   9 | Jiashu Xu*,   10 | Brian Nlong Zhao,   11 | Neel Joshi,   12 | Laurent Itti,   13 | Vibhav Vineet 14 |
15 | 16 | Contact: [yunhaoge@usc.edu](mailto:yunhaoge@usc.edu); [jxu1@g.harvard.edu](mailto:jxu1@g.harvard.edu) 17 | 18 | ## Install 19 | 20 | This project is developed using Python 3.10 and PyTorch 1.10.1 under CUDA 11.3. We recommend you to use the same version of Python and PyTorch. 21 | 22 | ```bash 23 | pip install -r requirements.txt 24 | ``` 25 | 26 | ## Our method 27 | 28 |

29 | Arch 30 |

31 | 32 | We propose a noval approach for generating diverse and large-scale pseudo-labeled training datasets, tailored specifically to enhance downstream 33 | object detection and segmentation models. 34 | We leverage text-to-image models (e.g. your favourite diffusion model) to independently generate foregrounds and backgrounds. 35 | Then we composite foregrounds onto the backgrounds, a process where we obtain the bounding boxes or segmentation masks of the foregrounds, to be used in the downstream models. 36 | 37 | Specifically, 38 | - Foregrounds are generated using text-to-image models with fixed templates (such as "A photo of ", Table 2 of the paper). Note this is purely zero-shot, i.e. no training data whatsoever is required. 39 | - Backgrounds are slightly more complicated. 40 | - For zero-shot, we have another set of fixed templates (such as "A real photo of forest", Table 3 of the paper). 41 | - For few-shot where we have a limited set of training images, we first image caption each images, 42 | then extract the background contextual words from the caption. Those contextual words are words such as "grass field," indicating the location and context of the training images. 43 | Lastly we augment the original caption by inputting the contextual words into the templates. 44 | This step is necessary as otherwise the caption would possibly contain the foreground objects (eg a dog) as well, 45 | and using this original caption into the text-to-image models will likely generate a dog that we do not have a segmentation or bounding box label, and thus confusing the model. 46 | - Both foregrounds and backgrounds are feed into CLIP to control the quality of the generated images. 47 | 48 | ## Usage 49 | 50 | ### Data 51 | In this project we use Pascal VOC in a low-resource regime. 52 | 53 | You should download original dataset, e.g. [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/). 54 | Note that for Pascal we use train & Val set from the [nsrom repo](https://github.com/NUST-Machine-Intelligence-Laboratory/nsrom). 55 | The data structure will be 56 | ``` 57 | data 58 | ├── COCO2017 59 | └── voc2012 60 | ├── labels.txt 61 | ├── train_aug.txt 62 | ├── ... 63 | └── VOC2012 64 | ├── Annotations 65 | ├── ImageSets 66 | ... 67 | ``` 68 | We have k-shot selections on `data/voc2012`: 69 | [1 shot](data/voc2012/train_cls-1shot.txt) and [10 shot](data/voc2012/train_cls-10shot.txt). 70 | 71 | ### Diffusion Generation 72 | The code to generate foregrounds and backgrounds are in `t2i_generate/` folder. 73 | First you need to generate captions for [foreground](t2i_generate/foreground_captions.py) and [background](t2i_generate/background_captions.py). 74 | Then you can use stable diffusion 2 to generate images via `python stable_diffusion2.py`. 75 | 76 | ### Cut Paste 77 | The code to paste foregrounds onto backgrounds are in `cutpaste/` folder. 78 | We use [hydra+torch lightning](https://github.com/ashleve/lightning-hydra-template) to run different variants. 79 | Example config files are in `configs/` folder, and we include a test dataset in `data/test_data/` folder. 80 | For example, you can use `python paste.py exp=` to launch the script, where `` is 81 | 82 | - [`demo_cutpaste`](cutpaste/config/exp/demo_cutpaste.yaml): paste real foregrounds onto real backgrounds. This is **Pure Real + cut paste** in [Results](#results). 83 | - [`demo_synFg`](cutpaste/config/exp/demo_synFg.yaml): paste synthetic foregrounds onto real backgrounds. This is **Syn Fg** in [Results](#results). 84 | - [`demo_syn`](cutpaste/config/exp/demo_syn.yaml): paste synthetic foregrounds onto synthetic backgrounds. This is **Pure Syn** in [Results](#results). 85 | - [`demo_syn+real`](cutpaste/config/exp/demo_syn+real.yaml): paste synthetic and real foregrounds onto synthetic and real backgrounds. This is **Syn + real** in [Results](#results). 86 | We recommend you to always use this setting as this offers the benefit of both synthetic and real data. 87 | 88 | You can also use `viz/` to visualize the generated datasets. Simple do 89 | ```shell 90 | python viz/viz.py 91 | ``` 92 | This will generate 30 randomly sampled annotated images in `viz/` folder. 93 | 94 | Readers are welcome to check the config files for more parameters to control the process. Some notable mentions: 95 | - by default the foreground is pasted with Space Maximize Paste algorithm described in [EM-paste](https://arxiv.org/pdf/2212.07629.pdf): i.e. each 96 | foreground is re-scaled and pasted within the max inscribing circle of the background contour. 97 | - `repeat_background` is the number of time the algorithm repeats the pasting process. By default it is 2, i.e. each background is used twice, but each time with different foregrounds pasted onto it. 98 | - `num_workers` is the number of workers to use for multiprocessing. We recommend you to use at least 4. 99 | - `use_random_paste` is for random paste (i.e. paste foreground on a random location of background image) while ranom scale the pasted foreground 0.3-0.7. This is adapted from [object cut and paste](https://arxiv.org/abs/1708.01642). 100 | - by default `num_cut_images=4` foregrounds are pasted in each background. You can also set `num_cut_lowerbound` to paste Unif(num_cut_lowerbound, num_cut_images) foregrounds. 101 | - The codebase supports 5 total blendings, namely Guassian Blur, Poisson Blur, Box Blur, Motion Blur and None (do not apply any blendings). We empirically find Gaussian alone achieves the best results. 102 | - Space Maximize Paste will scale the foreground object to match the diameter of the max inscribing circle of the background contour, but sometimes producing small foregrounds. `use_random_scaling=True` 103 | will apply the random scaling factor (0.3-0.7) as [object cut and paste](https://arxiv.org/abs/1708.01642) paper. 104 | - `scale_factor=factor` additionally apply another scaling to the pasted foreground after Space Maximize Paste. 105 | - `center_translation_factor=factor` shift the location to paste the foreground by ± height x factor or ± width x factor, instead of pasting on the center of the max inscribing circle. 106 | 107 | 108 | ### Model Training 109 | Once the dataset is created, you can train object detection model using `detection/` and instance segmentation model using 110 | `instance_seg/`. Both are based on the battle-tested [detectron2](https://github.com/facebookresearch/detectron2). 111 | 112 | For example, on VOC 2012 with 2 GPUs, you can run 113 | ```shell 114 | # object detection 115 | python detection/train.py -s syn \ # use synthetic data 116 | --syn_dir \ 117 | -t voc_val \ # test on VOC val 118 | --test_dir \ # data/voc2012/VOC2012, we need to find val set in this folder 119 | -g 4 \ # use 4 GPUs on 1 machine 120 | --freeze --data_aug --bsz 32 --epoch 200 --resnet 50 --lr 0.01 # hyperparameters 121 | ``` 122 | For instance segmentation, use `instance_seg/seg.py` instead of `detection/train.py`. The flags are the same. 123 | 124 | For inference, simply apply the additional flag `--eval_checkpoint `. 125 | 126 | 127 | ## Results 128 | Our method results in significant improvement over the baseline on Pascal VOC and MS COCO, especially in the low-resource regime. 129 | We refer details [in the paper](https://arxiv.org/pdf/2309.05956.pdf). 130 |

131 | Results 132 |

133 | -------------------------------------------------------------------------------- /assets/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/assets/overview.png -------------------------------------------------------------------------------- /assets/results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/assets/results.png -------------------------------------------------------------------------------- /cutpaste/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/cutpaste/__init__.py -------------------------------------------------------------------------------- /cutpaste/anno.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | from typing import Optional, Tuple 3 | 4 | import numpy as np 5 | import ujson as json 6 | from PIL import Image 7 | from pycocotools import mask as cocomask 8 | 9 | 10 | class Anno: 11 | label2id: dict 12 | id2label: dict 13 | 14 | @abstractmethod 15 | def objects(self): 16 | raise NotImplementedError 17 | 18 | @abstractmethod 19 | def create_mask(self, for_object: Optional[int] = None) -> Image.Image: 20 | raise NotImplementedError 21 | 22 | @abstractmethod 23 | def create_instance_mask(self) -> Tuple[Image.Image, dict]: 24 | raise NotImplementedError 25 | 26 | @staticmethod 27 | def factory(anno_path, seg_img_path): 28 | if anno_path is None: 29 | return EntityAnno(seg_img_path) 30 | elif seg_img_path is None: 31 | return COCOAnno(anno_path) 32 | return VOCAnno(anno_path, seg_img_path) 33 | 34 | 35 | class VOCAnno(Anno): 36 | def __init__(self, anno_path, seg_img_path): 37 | import xml.etree.ElementTree as ET 38 | self.anno_path = anno_path 39 | self.anno = ET.parse(anno_path).getroot() 40 | self.seg_img_path = seg_img_path 41 | 42 | def size(self): 43 | size = self.anno.find("size") 44 | height, width = size.find("./height").text, size.find("./width").text 45 | return int(height), int(width) 46 | 47 | def filename(self) -> str: 48 | return self.anno.find("filename").text 49 | 50 | def objects(self): 51 | objects = self.anno.findall("object") 52 | # hardcode, remove wrong seg annotation 53 | if "2009_005069" in self.anno_path: 54 | objects = objects[:-1] 55 | return objects 56 | 57 | def create_mask(self, for_object: Optional[int] = None): 58 | """ 59 | create boolean mask with same shape as .size() 60 | gt (is object) is positive, dummy is 0 61 | if for_object = None, OR all mask 62 | else, mask for this specific object (0 if dummy, positive for this category) 63 | """ 64 | # consists of: objects (object number in anno), 0 (dummy bg), 255 (white mask outline) 65 | seg_mask = np.array(Image.open(self.seg_img_path)) 66 | objects = self.objects() 67 | if for_object is None: 68 | ids = list(range(1, len(objects) + 1)) 69 | categories = [ 70 | object.find("./name").text 71 | for object in objects 72 | ] 73 | id2categoryid = { 74 | i: self.label2id[c] 75 | for i, c in zip(ids, categories) 76 | } 77 | # plus mapping to get dummy 255 78 | id2categoryid[0] = 0 79 | id2categoryid[255] = 0 80 | if len(np.unique(seg_mask)) != len(id2categoryid): 81 | # when seg is wrong, there are mismatch 82 | seg_mask = np.where(np.isin(seg_mask, list(id2categoryid)), seg_mask, 0) 83 | 84 | # rn if seg_mask == i, it's ith object, make it ith object's category 85 | mask = np.vectorize(id2categoryid.get)(seg_mask).astype('uint8') 86 | return Image.fromarray(mask) 87 | 88 | assert type(for_object) is int 89 | assert 1 <= for_object <= len(objects) 90 | id = for_object 91 | category = objects[id - 1].find("./name").text 92 | 93 | mask = np.where(seg_mask == id, self.label2id[category], 0).astype("uint8") 94 | return Image.fromarray(mask) 95 | 96 | def create_instance_mask(self): 97 | """ 98 | instance mask where each non-dummy object is positive with id (starts from 1, NOT label id) 99 | 0 if background dummy 100 | """ 101 | seg_mask = np.array(Image.open(self.seg_img_path)) 102 | instance_mask = np.where(np.isin(seg_mask, [0, 255]), 0, seg_mask).astype("uint8") 103 | objects = self.objects() 104 | ids = list(range(1, len(objects) + 1)) 105 | categories = [ 106 | object.find("./name").text 107 | for object in objects 108 | ] 109 | instance_mask_id2category = { 110 | i: self.label2id[c] 111 | for i, c in zip(ids, categories) 112 | } 113 | return Image.fromarray(instance_mask), instance_mask_id2category 114 | 115 | 116 | class EntityAnno(Anno): 117 | def __init__(self, seg_img_path): 118 | # eg data/voc2012/entity_mask/bottle_mask/2009_000562.png 119 | self.seg_img_path = seg_img_path 120 | _, label, filename = seg_img_path.rsplit("/", 2) 121 | self.label = self.label2id[label.replace("_mask", "")] 122 | 123 | def objects(self): 124 | return [self.label] 125 | 126 | def create_mask(self, for_object: Optional[int] = None): 127 | # if for_object is not None: 128 | # assert for_object in self.objects() 129 | # 0 or 255 130 | mask = np.array(Image.open(self.seg_img_path)) 131 | mask = np.where(mask == 255, self.label, 0).astype("uint8") 132 | return Image.fromarray(mask) 133 | 134 | def create_instance_mask(self): 135 | instance_mask = np.array(Image.open(self.seg_img_path)) 136 | # 0 or 255 137 | instance_mask = np.where(instance_mask == 255, 1, 0).astype("uint8") 138 | return Image.fromarray(instance_mask), {1: self.label} 139 | 140 | class COCOAnno(Anno): 141 | def __init__(self, anno_path): 142 | with open(anno_path) as f: 143 | self.anno = json.load(f) 144 | 145 | self.id2annos = { 146 | id: [] 147 | for id in self.objects() 148 | } 149 | for anno in self.anno["annotations"]: 150 | self.id2annos[anno["category_id"]].append(anno) 151 | 152 | def size(self): 153 | return int(self.anno['images']['height']), int(self.anno['images']['width']) 154 | 155 | def objects(self): 156 | return sorted(set([ 157 | anno['category_id'] 158 | for anno in self.anno["annotations"] 159 | ])) 160 | 161 | def create_mask(self, for_object: Optional[int] = None): 162 | if for_object: # i-th (1 based) 163 | category = self.objects()[for_object-1] 164 | annos = self.id2annos[category] 165 | mask = np.zeros(self.size(), dtype=int) 166 | for anno in annos: 167 | objs = cocomask.frPyObjects(anno["segmentation"], *self.size()) 168 | binary_mask = cocomask.decode(objs) # (h, w, n) binary {0 (dummy), 1 (obj)} where n is \# disjoint anno 169 | if binary_mask.ndim == 2: 170 | binary_mask = binary_mask[:, :, np.newaxis] 171 | for n in range(binary_mask.shape[-1]): # 172 | mask[binary_mask[:, :, n] == 1] = category 173 | # binary_mask = np.where(binary_mask == 1, category, 0) 174 | # mask = np.ma.mask_or(mask, binary_mask) 175 | return Image.fromarray(mask.astype(np.uint8)) 176 | 177 | mask = np.zeros(self.size(), dtype=int) 178 | for i, category in enumerate(self.objects(), 1): 179 | mask2 = self.create_mask(for_object = i) 180 | mask[np.array(mask2) == category] = category 181 | return Image.fromarray(mask.astype(np.uint8)) 182 | 183 | def create_instance_mask(self): 184 | instance_mask = np.zeros(self.size(), dtype=int) 185 | instance_mask_id2category = {} 186 | for anno in self.anno["annotations"]: 187 | objs = cocomask.frPyObjects(anno["segmentation"], *self.size()) 188 | binary_mask = cocomask.decode(objs) # (h, w) binary {0 (dummy), 1 (obj)} 189 | if binary_mask.ndim == 2: 190 | binary_mask = binary_mask[:, :, np.newaxis] 191 | next_id = len(instance_mask_id2category) + 1 192 | for n in range(binary_mask.shape[-1]): # 193 | instance_mask[binary_mask[:, :, n] == 1] = next_id 194 | instance_mask_id2category[next_id] = anno['category_id'] 195 | 196 | return Image.fromarray(instance_mask.astype(np.uint8)), instance_mask_id2category -------------------------------------------------------------------------------- /cutpaste/background.py: -------------------------------------------------------------------------------- 1 | import math 2 | import random 3 | from pathlib import Path 4 | from typing import Optional, Iterator 5 | 6 | import cv2 7 | import numpy as np 8 | from PIL import Image 9 | from pycocotools import mask as cocomask 10 | 11 | from anno import Anno 12 | from foreground import CutObject, CutObjects 13 | from pb import create_mask, poisson_blend 14 | from pyblur3 import LinearMotionBlur 15 | 16 | 17 | def binarize_mask(mask, set_boundary=False) -> np.ndarray: 18 | """ 19 | make it 255 if occupied, regardless of pixel category 20 | """ 21 | mask = np.array(mask) 22 | mask = np.where(mask > 0, 255, 0).astype('uint8') 23 | if set_boundary: 24 | mask[:, 0] = 255 25 | mask[:, -1] = 255 26 | mask[0, :] = 255 27 | mask[-1, :] = 255 28 | return mask 29 | 30 | 31 | class PastedBackground: 32 | """ 33 | background image to be pasted on 34 | """ 35 | 36 | def __init__(self, imagepath: str, anno: Optional[Anno] = None): 37 | """ 38 | if anno is None, no need to find annotation in given image, i.e. we ignore potential foregrounds 39 | """ 40 | self.image: Image.Image = Image.open(imagepath) 41 | self.imagepath = imagepath 42 | if anno: 43 | self.ignore_foreground = False 44 | # semantic mask 45 | # 0 if dummy, positive int (label id for each of the potential object/foreground) is object mask 46 | self.mask = anno.create_mask(for_object=None) 47 | # Image Mask (each instance a unique id) & instance id to actual category (starts from 1) 48 | self.instance_mask, self.instance_mask_id2category = anno.create_instance_mask() 49 | else: 50 | self.ignore_foreground = True 51 | w, h = self.size 52 | self.mask = Image.fromarray(np.zeros((h, w), dtype="uint8")) 53 | self.instance_mask, self.instance_mask_id2category = self.mask.copy(), {} 54 | 55 | @property 56 | def size(self): 57 | """ w and h """ 58 | return self.image.size 59 | 60 | def resize(self, out_size: int): 61 | self.image = self.image.resize((out_size, out_size), Image.LANCZOS) 62 | self.mask = self.mask.resize((out_size, out_size), Image.NEAREST) 63 | self.instance_mask = self.instance_mask.resize((out_size, out_size), Image.NEAREST) 64 | 65 | def find_paste_location(self, 66 | foregrounds: CutObjects, max_degree, random_paste=False, 67 | scale_factor=0, center_translation_factor=0, use_random_scaling=False): 68 | """ 69 | modify self.mask in place (paste with foreground) 70 | return new foregrounds (after scaling and rotation) 71 | if random_paste: select random paste location (for abalation) and random scale 0.3-0.7 72 | """ 73 | # 1. loop through all objects 74 | foregrounds.shuffle() 75 | foregrounds_to_paste = [] 76 | locations = [] 77 | for foreground_object in foregrounds: 78 | w, h = self.size 79 | if random_paste: 80 | (x, y) = random.randint(0, h), random.randint(0, w) 81 | radius2, center2 = foreground_object.min_enclosing_circle() 82 | scaling = random.uniform(0.3, 0.7) 83 | radius = scaling * radius2 84 | else: 85 | # 2. find max inscribing circle in the background non-occupied area 86 | radius, (x, y) = self.max_inscribing_circle() 87 | if center_translation_factor != 0: # translate center 88 | sgn = 1 if random.random() < 0.5 else -1 89 | x += sgn * h * (center_translation_factor / 100) 90 | y += sgn * w * (center_translation_factor / 100) 91 | 92 | # 3. compute per-object min enclosing circle 93 | radius2, center2 = foreground_object.min_enclosing_circle() 94 | 95 | # 4. after scale, make foreground_object align with max inscribing circle 96 | if use_random_scaling: 97 | scaling = random.uniform(0.3, 0.7) 98 | else: 99 | scaling = radius / radius2 100 | if scale_factor != 0: # scale by @scale_factor % 101 | scaling *= (1 + scale_factor / 100) 102 | try: 103 | foreground_object.scale(scaling) 104 | o_w, o_h = foreground_object.img.size 105 | assert w - o_w >= 0 and h - o_h >= 0 and o_w > 0 and o_h > 0 106 | except: 107 | continue 108 | 109 | foreground_object.rotate(size=self.size, max_degree=max_degree) 110 | 111 | foregrounds_to_paste.append(foreground_object) 112 | locations.append(( 113 | int(x - radius), int(y - radius) 114 | )) 115 | self.mask.paste(foreground_object.mask, locations[-1], 116 | Image.fromarray(binarize_mask(foreground_object.mask))) 117 | new_instance_id = len(self.instance_mask_id2category) + 1 118 | self.instance_mask_id2category[new_instance_id] = foreground_object.category 119 | self.instance_mask.paste( 120 | Image.fromarray( 121 | np.where(np.array(foreground_object.mask) == foreground_object.category, new_instance_id, 0).astype( 122 | "uint8")), 123 | locations[-1], Image.fromarray(binarize_mask(foreground_object.mask)) 124 | ) 125 | if self.ignore_foreground: 126 | # even if ignore foreground first, now it will not since one foreground was pasted 127 | self.ignore_foreground = False 128 | 129 | return locations, CutObjects(foregrounds_to_paste) 130 | 131 | def save(self, name, path=None): 132 | """ 133 | before save, mask is label 1 - 20, and 0 if dummy 134 | change it to 0-19 label (corresponding to labels.txt but minus 1), 255 if dummy 135 | """ 136 | if path is None: 137 | img_path = mask_path = Path(".") 138 | else: 139 | img_path = path / "Images" 140 | mask_path = path / "Masks" 141 | mask = self.mask 142 | self.image.save(img_path / f"{name}.png") 143 | mask.save(mask_path / f"{name}.png") 144 | 145 | def max_inscribing_circle(self): 146 | """ 147 | max inscribing circle that contains all background objects 148 | """ 149 | if self.ignore_foreground: 150 | assert list(np.unique(self.mask)) == [0], "should be only 0 i.e. dummy" 151 | w, h = self.size 152 | x, y = random.randint(0, h), random.randint(0, w) 153 | dist_x = min(abs(x), abs(x - h // 2)) 154 | dist_y = min(abs(y), abs(y - w // 2)) 155 | return min(dist_x, dist_y), (x, y) 156 | 157 | background_mask = binarize_mask(self.mask, set_boundary=True) 158 | dist_map = cv2.distanceTransform(255 - background_mask, cv2.DIST_L2, cv2.DIST_MASK_PRECISE) 159 | _, radius, _, center = cv2.minMaxLoc(dist_map) 160 | return radius, center 161 | 162 | def paste(self, blending: str, paste_location: tuple, foreground_object: CutObject): 163 | def LinearMotionBlur3C(img): 164 | """Performs motion blur on an image with 3 channels. Used to simulate 165 | blurring caused due to motion of camera. 166 | Args: 167 | img(NumPy Array): Input image with 3 channels 168 | Returns: 169 | Image: Blurred image by applying a motion blur with random parameters 170 | """ 171 | 172 | def randomAngle(kerneldim): 173 | """Returns a random angle used to produce motion blurring 174 | Args: 175 | kerneldim (int): size of the kernel used in motion blurring 176 | Returns: 177 | int: Random angle 178 | """ 179 | kernelCenter = int(math.floor(kerneldim / 2)) 180 | numDistinctLines = kernelCenter * 4 181 | validLineAngles = np.linspace(0, 180, numDistinctLines, endpoint=False) 182 | angleIdx = np.random.randint(0, len(validLineAngles)) 183 | return int(validLineAngles[angleIdx]) 184 | 185 | lineLengths = [3, 5, 7, 9] 186 | lineTypes = ["right", "left", "full"] 187 | lineLengthIdx = np.random.randint(0, len(lineLengths)) 188 | lineTypeIdx = np.random.randint(0, len(lineTypes)) 189 | lineLength = lineLengths[lineLengthIdx] 190 | lineType = lineTypes[lineTypeIdx] 191 | lineAngle = randomAngle(lineLength) 192 | blurred_img = img 193 | for i in range(3): 194 | blurred_img[:, :, i] = np.asarray(LinearMotionBlur(img[:, :, i], lineLength, lineAngle, lineType)) 195 | blurred_img = Image.fromarray(blurred_img, 'RGB') 196 | return blurred_img 197 | 198 | x, y = paste_location 199 | foreground = foreground_object.img 200 | foreground_mask = Image.fromarray(np.where(np.array(foreground_object.mask) != 0, 255, 0).astype('uint8')) 201 | background = self.image.copy() 202 | if blending == 'none': 203 | background.paste(foreground, (x, y), foreground_mask) 204 | elif blending == 'motion': 205 | background.paste(foreground, (x, y), foreground_mask) 206 | background = LinearMotionBlur3C(np.asarray(background)) 207 | 208 | elif blending == 'poisson': 209 | offset = (y, x) 210 | img_mask = np.asarray(foreground_mask) 211 | img_src = np.asarray(foreground).astype(np.float64) 212 | img_target = np.asarray(background) 213 | img_mask, img_src, offset_adj \ 214 | = create_mask(img_mask.astype(np.float64), 215 | img_target, img_src, offset=offset) 216 | background_array = poisson_blend(img_mask, img_src, img_target, 217 | method='normal', offset_adj=offset_adj) 218 | background = Image.fromarray(background_array, 'RGB') 219 | elif blending == 'gaussian': 220 | background.paste(foreground, (x, y), Image.fromarray( 221 | cv2.GaussianBlur(np.asarray(foreground_mask), (5, 5), 2))) 222 | elif blending == 'box': 223 | background.paste(foreground, (x, y), Image.fromarray( 224 | cv2.blur(np.asarray(foreground_mask), (3, 3)))) 225 | else: 226 | raise NotImplementedError 227 | self.image = background 228 | 229 | def to_COCO_ann(self) -> Iterator[tuple]: 230 | """ polygons, bbox, area """ 231 | for id, category in self.instance_mask_id2category.items(): 232 | mask = np.array(self.instance_mask) 233 | mask = np.where(mask == id, mask, 0) 234 | contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)[-2:] 235 | polygons = [] 236 | for contour in contours: 237 | if contour.size >= 6: 238 | polygons.append(contour.flatten().tolist()) 239 | if len(polygons) == 0: 240 | continue 241 | RLEs = cocomask.frPyObjects(polygons, mask.shape[0], mask.shape[0]) 242 | RLE = cocomask.merge(RLEs) 243 | area = cocomask.area(RLE) 244 | [x, y, w, h] = cv2.boundingRect(mask) 245 | yield polygons, [x, y, w, h], float(area), category -------------------------------------------------------------------------------- /cutpaste/clip_postprocess.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from concurrent import futures 3 | from pathlib import Path 4 | 5 | import json 6 | import torch 7 | from PIL import Image 8 | from tqdm.auto import tqdm 9 | from transformers import CLIPProcessor, CLIPModel 10 | 11 | voc_texts = [ 12 | f"a photo of {obj}" 13 | for obj in [ 14 | "person", 15 | "bird", 'cat', 'cow', 'dog', 'horse', 'sheep', 16 | 'aeroplane', 'airplane', 'bicycle', 'boat', 'bus', 'car', 'motorbike', 'train', 17 | 'bottle', 'chair', 'dining table', 'potted plant', 'sofa', "tv/ monitor" 18 | ] 19 | ] 20 | 21 | def batchify(lst, n): 22 | """Yield successive n-sized chunks from lst.""" 23 | for i in range(0, len(lst), n): 24 | yield lst[i:i + n] 25 | 26 | @torch.no_grad() 27 | def get_CLIP_score(caption: str, images: list): 28 | logits_per_images = [] 29 | for img in batchify(images, 400): 30 | inputs = processor(text=[caption] + voc_texts, images=img, return_tensors="pt", padding=True).to("cuda") 31 | outputs = model(**inputs) 32 | logits_per_image = outputs.logits_per_image # this is the image-text similarity score 33 | logits_per_images.append(logits_per_image) 34 | # probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities 35 | return torch.cat(logits_per_images, dim=0) 36 | 37 | def scores_for_one_caption(caption: Path): 38 | keep_files = 30 39 | images = [] 40 | for image in caption.iterdir(): # eg 1.png 41 | try: 42 | images.append(Image.open(image)) 43 | except: 44 | pass # weird generation error 45 | scores = get_CLIP_score(caption.stem, images) # (#images, 22) 46 | 47 | # 1. select top keep_files*2 lowest consistent_with_voc_labels 48 | consistent_with_voc_labels = scores[:, 1:].max(1).values 49 | double_keep_files = min(keep_files * 2, scores.size(0)) 50 | _, indices = torch.topk(-consistent_with_voc_labels.squeeze(), min(double_keep_files, scores.size(0))) 51 | # 2. select top keep_files highest consistent_with_caption 52 | consistent_with_caption = scores[indices, 0] 53 | _, indices = torch.topk(consistent_with_caption, keep_files) 54 | selected_images = [ 55 | images[i].filename.split("/")[-1] 56 | for i in indices.detach().cpu().numpy().tolist() 57 | ] 58 | return caption.stem, selected_images 59 | 60 | def sort_images(images): 61 | return sorted(images, key=lambda x: int(x.split(".png")[0])) 62 | 63 | if __name__ == "__main__": 64 | pwd = Path(__file__).parent.resolve() 65 | # root = pwd / "artifact" / "syn" / "voc_1k_bg" / "diffusion_wordnet_v1-10shot" 66 | # root = pwd.parent / "data" / "voc2012" / "background" / "critical_distractor_v1-10shot" 67 | # root = pwd.parent / "data" / "voc2012" / "background" / "critical_distractor_v1-10shot" 68 | # root = pwd.parent / "data" / "voc2012" / "background" / "critical_wordnet_diffusion_v2-10shot" 69 | # root = pwd.parent / "data" / "voc2012" / "background" / "diffusion_v1_600each" 70 | # root = pwd.parent / "data" / "voc2012" / "background" / "critical_wordnet_diffusion_v2-1shot" 71 | # root = pwd.parent / "data" / "voc2012" / "background" / "critical_wordnet_diffusion_v2-10shot_refined" 72 | root = pwd.parent / "data" / "voc2012" / "background" / "critical_context_only-10shot" 73 | # root = pwd.parent / "data" / "voc2012" / "background" / "context_augment" 74 | 75 | model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to("cuda").eval() 76 | processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14") 77 | 78 | to_save = defaultdict(lambda: {}) 79 | 80 | # for object in ['empty living room', 'railway without train', 'stable']: 81 | for object in tqdm(root.iterdir(), total=len(list(root.iterdir()))): # eg a bicycle 82 | object = root / object 83 | if not object.name.endswith(".jpg"): 84 | continue 85 | captions = list(object.iterdir()) # eg a bicyle in a black background 86 | with futures.ThreadPoolExecutor(80) as executor: 87 | res = executor.map(scores_for_one_caption, captions) 88 | for caption, images in res: 89 | to_save[object.stem][caption] = sort_images(images) 90 | # with open(root / "clip_postprocessed.json", "w") as f: 91 | # json.dump(to_save, f, indent=4) 92 | with open("clip_postprocessed.json", "w") as f: 93 | json.dump(to_save, f, indent=4) -------------------------------------------------------------------------------- /cutpaste/config/bg/VOC/1shot/bg_template.yaml: -------------------------------------------------------------------------------- 1 | - _target_: utils.read_background_template 2 | data_dir: ${dataset_dir}/background/bg_template 3 | -------------------------------------------------------------------------------- /cutpaste/config/bg/VOC/1shot/bg_template_plus_dalle.yaml: -------------------------------------------------------------------------------- 1 | - _target_: utils.read_dalle_backgrounds 2 | data_dir: ${dataset_dir}/background/context_augment 3 | - _target_: utils.read_background_template 4 | data_dir: ${dataset_dir}/background/bg_template -------------------------------------------------------------------------------- /cutpaste/config/bg/VOC/1shot/real.yaml: -------------------------------------------------------------------------------- 1 | _target_: utils.read_real_VOC_backgrounds 2 | 3 | # assume dataset_dir: data/voc2012/ 4 | data_list: ${dataset_dir}/train_cls-1shot.txt 5 | data_dir: ${dataset_dir}/VOC2012/JPEGImages 6 | anno_dir: ${dataset_dir}/VOC2012/Annotations 7 | seg_dir: ${dataset_dir}/VOC2012/SegmentationObject -------------------------------------------------------------------------------- /cutpaste/config/config.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # specify here default training configuration 4 | defaults: 5 | - _self_ 6 | - ds: null 7 | - fg@fg_real: null 8 | - fg@fg_syn: null 9 | - bg@bg_syn: null 10 | - bg@bg_real: null 11 | - paster: defaults 12 | - exp: null 13 | 14 | # enable color logging 15 | - override hydra/hydra_logging: colorlog 16 | - override hydra/job_logging: colorlog 17 | 18 | # path to original working directory 19 | # hydra hijacks working directory by changing it to the new log directory 20 | # so it's useful to have this path as a special variable 21 | # https://hydra.cc/docs/next/tutorials/basic/running_your_app/working_directory 22 | work_dir: ${hydra:runtime.cwd} 23 | 24 | dataset: ${ds.name} 25 | 26 | name: ??? # name of exp 27 | 28 | output_dir: ${work_dir}/../artifact/${dataset}/${name} 29 | 30 | # seed for random number generators in pytorch, numpy and python.random 31 | seed: 42 32 | 33 | hydra: 34 | job: 35 | chdir: True -------------------------------------------------------------------------------- /cutpaste/config/ds/VOC.yaml: -------------------------------------------------------------------------------- 1 | name: VOC 2 | label2id: 3 | aeroplane: 1 4 | bicycle: 2 5 | bird: 3 6 | boat: 4 7 | bottle: 5 8 | bus: 6 9 | car: 7 10 | cat: 8 11 | chair: 9 12 | cow: 10 13 | diningtable: 11 14 | dog: 12 15 | horse: 13 16 | motorbike: 14 17 | person: 15 18 | pottedplant: 16 19 | sheep: 17 20 | sofa: 18 21 | train: 19 22 | tvmonitor: 20 -------------------------------------------------------------------------------- /cutpaste/config/exp/demo_cutpaste.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - override /ds: VOC 5 | - override /fg@fg_real: VOC/1shot/real # 1 shot real foregrounds 6 | - override /bg@bg_real: VOC/1shot/real # 1 shot real backgrounds 7 | 8 | name: VOC-demo-1shot-cutpaste 9 | dataset_dir: ${work_dir}/../data/test_data 10 | paster: 11 | debug: True 12 | -------------------------------------------------------------------------------- /cutpaste/config/exp/demo_syn+real.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - override /ds: VOC 5 | - override /fg@fg_real: VOC/1shot/real # 1 shot real foregrounds 6 | - override /fg@fg_syn: VOC/fg_template # zero-shot fg template 7 | - override /bg@bg_real: VOC/1shot/real # 1 shot real backgrounds 8 | - override /bg@bg_syn: VOC/1shot/bg_template_plus_dalle # zero-shot bg template + 1 shot context augmented captions 9 | 10 | name: VOC-demo-1shot-syn+real 11 | dataset_dir: ${work_dir}/../data/test_data 12 | paster: 13 | debug: True -------------------------------------------------------------------------------- /cutpaste/config/exp/demo_syn.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - override /ds: VOC 5 | - override /fg@fg_syn: VOC/fg_template # zero-shot fg template 6 | - override /bg@bg_syn: VOC/1shot/bg_template_plus_dalle # zero-shot bg template + 1 shot context augmented captions 7 | 8 | name: VOC-demo-1shot-syn 9 | dataset_dir: ${work_dir}/../data/test_data 10 | paster: 11 | debug: True 12 | -------------------------------------------------------------------------------- /cutpaste/config/exp/demo_synFg.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | defaults: 4 | - override /ds: VOC 5 | - override /fg@fg_real: VOC/1shot/real # 1 shot real foregrounds 6 | - override /fg@fg_syn: VOC/fg_template # zero-shot fg template 7 | - override /bg@bg_real: VOC/1shot/real # 1 shot real backgrounds 8 | 9 | name: VOC-demo-1shot-synFg 10 | dataset_dir: ${work_dir}/../data/test_data 11 | paster: 12 | debug: True 13 | -------------------------------------------------------------------------------- /cutpaste/config/fg/VOC/1shot/real.yaml: -------------------------------------------------------------------------------- 1 | _target_: utils.read_real_VOC_foregrounds 2 | 3 | # assume dataset_dir: data/voc2012/ 4 | data_list: ${dataset_dir}/train_cls-1shot.txt 5 | data_dir: ${dataset_dir}/VOC2012/JPEGImages 6 | anno_dir: ${dataset_dir}/VOC2012/Annotations 7 | seg_dir: ${dataset_dir}/VOC2012/SegmentationObject -------------------------------------------------------------------------------- /cutpaste/config/fg/VOC/fg_template.yaml: -------------------------------------------------------------------------------- 1 | _target_: utils.read_entity_foregrounds 2 | 3 | dataset: ${dataset} 4 | rgb_dir: ${dataset_dir}/foreground/foreground_rgb 5 | mask_dir: ${dataset_dir}/foreground/foreground_mask 6 | -------------------------------------------------------------------------------- /cutpaste/config/paster/defaults.yaml: -------------------------------------------------------------------------------- 1 | debug: False # debug mode, if True, only paste 40 images for debugging purpose 2 | 3 | r: 2 # number of repeat 4 | repeat_background: ${paster.r} # alias 5 | c: 4 # number of foregrounds pasted onto each background 6 | num_cut_images: ${paster.c} # alias 7 | j: 1 # number of workers, 1 means no multiprocessing, to accelerate, set to something like 50 8 | max_workers: ${paster.j} # alias 9 | 10 | # random scale in percentage 11 | scale_factor: 0 12 | # random center translation in percentage 13 | center_translation_factor: 0 14 | # if true, after find circle still random scale instead of matching radius 15 | use_random_scaling: False 16 | # if set, each time sample num_cut from [lowerbound, num_cut_images] 17 | num_cut_lowerbound: null 18 | # if true, ablation study of random paste images in anywhere 19 | use_random_paste: False 20 | 21 | select_prob: uniform # how to sample backgrounds 22 | out_size: 512 23 | max_degree: 30 24 | 25 | # if set, only paste [start:to] instead of entire 26 | start: null 27 | to: null 28 | -------------------------------------------------------------------------------- /cutpaste/foreground.py: -------------------------------------------------------------------------------- 1 | import random 2 | from typing import List 3 | 4 | import cv2 5 | import numpy as np 6 | from PIL import Image 7 | 8 | from anno import Anno 9 | 10 | 11 | def get_box(mask): 12 | rows = np.any(mask, axis=1) 13 | cols = np.any(mask, axis=0) 14 | assert len(np.where(rows)[0]) > 0 15 | ymin, ymax = np.where(rows)[0][[0, -1]] 16 | xmin, xmax = np.where(cols)[0][[0, -1]] 17 | assert ymax >= ymin and xmax >= xmin 18 | return int(xmin), int(xmax), int(ymin), int(ymax) 19 | 20 | 21 | def get_area(xmin, xmax, ymin, ymax): 22 | return (xmax - xmin) * (ymax - ymin) 23 | 24 | 25 | class CutObject: 26 | """ 27 | mask object 28 | input mask dummy is 0, positive if occupied (dep on category, 1-20 for VOC) 29 | """ 30 | def __init__(self, img_path: str, img: Image.Image, mask: Image.Image): 31 | self.img_path = img_path 32 | self.img = img 33 | self.mask = mask 34 | uniques = set(np.unique(self.mask)) 35 | assert len(uniques) in [2, 1] # it's possible to get perfect mask, so only positive val (1) 36 | # if not 0, then it's category for mask 37 | uniques: set = uniques.difference({0}) 38 | self.category, = uniques 39 | self.category_name = Anno.id2label[self.category] 40 | 41 | 42 | def min_enclosing_circle(self): 43 | contours, _ = cv2.findContours(np.array(self.mask), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)[-2:] 44 | center, radius = cv2.minEnclosingCircle(np.concatenate(contours, 0)) 45 | return radius, center 46 | 47 | def scale(self, scaling_factor: float): 48 | orig_w, orig_h = self.img.size 49 | o_w, o_h = int(scaling_factor * orig_w), int(scaling_factor * orig_h) 50 | self.img = self.img.resize((o_w, o_h), Image.ANTIALIAS) 51 | self.mask = self.mask.resize((o_w, o_h), Image.NEAREST) 52 | 53 | def rotate(self, size, max_degree=60): 54 | w, h = size 55 | while True: 56 | rot_degrees = random.randint(-max_degree, max_degree) 57 | foreground_tmp = self.img.rotate(rot_degrees, expand=True) 58 | foreground_mask_tmp = self.mask.rotate(rot_degrees, expand=True) 59 | o_w, o_h = foreground_tmp.size 60 | if w - o_w > 0 and h - o_h > 0: 61 | break 62 | self.img = foreground_tmp 63 | self.mask = foreground_mask_tmp 64 | 65 | def save(self, name): 66 | self.img.save(f"{name}-fg-image.png") 67 | self.mask.save(f"{name}-fg-mask.png") 68 | 69 | class CutObjects(list): 70 | """ 71 | list of objects (i.e. foregrounds) to cut, and later will be pasted on PastedImage 72 | can contain multiple foregrounds from the same image 73 | """ 74 | def __init__(self, *args): 75 | super().__init__(*args) 76 | self: List[CutObject] 77 | 78 | def add_image(self, img_path, foreground_anno: Anno, area_threshold=700): 79 | """ 80 | add per-object mask of the given image 81 | only add if area exceeds area_threshold 82 | """ 83 | foreground_img = Image.open(img_path) 84 | for i, foreground_object in enumerate(foreground_anno.objects(), 1): 85 | """ 86 | binary mask, 0 is dummy, positive int (label id for ith object) is object mask 87 | """ 88 | foreground_mask = foreground_anno.create_mask(for_object=i) 89 | xmin, xmax, ymin, ymax = get_box(foreground_mask) 90 | if get_area(xmin, xmax, ymin, ymax) < area_threshold: 91 | continue 92 | foreground = foreground_img.crop((xmin, ymin, xmax, ymax)) 93 | foreground_mask = foreground_mask.crop((xmin, ymin, xmax, ymax)) 94 | self.append(CutObject(img_path, foreground, foreground_mask)) 95 | return self 96 | 97 | def shuffle(self): 98 | random.shuffle(self) -------------------------------------------------------------------------------- /cutpaste/paste.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import shutil 4 | from pathlib import Path 5 | 6 | import hydra 7 | from omegaconf import DictConfig, open_dict, ListConfig 8 | import numpy as np 9 | 10 | from paster import Paster 11 | 12 | from logging import Logger, getLogger 13 | 14 | logger = getLogger(__file__) 15 | @hydra.main(config_path="config", config_name="config", version_base="1.2") 16 | def paste(cfg: DictConfig): 17 | assert cfg.get("dataset") and cfg.get("name") 18 | assert cfg['paster'].get("select_prob") in ["uniform", "balanced"] 19 | with open_dict(cfg): 20 | if not cfg.get("dataset_dir"): 21 | if cfg.dataset == "VOC": 22 | cfg.dataset_dir = Path(cfg.work_dir).parent / "data" / "voc2012" 23 | else: 24 | cfg.dataset_dir = Path(cfg.work_dir).parent / "data" / "COCO2017" 25 | else: 26 | cfg.dataset_dir = Path(cfg.dataset_dir) 27 | cfg.output_dir = Path(cfg.output_dir) 28 | if cfg.get("debug") and cfg.output_dir.exists(): 29 | shutil.rmtree(cfg.output_dir) 30 | os.makedirs(cfg.output_dir, exist_ok=True) 31 | 32 | np.random.seed(cfg.seed) 33 | random.seed(cfg.seed) 34 | os.environ["PYTHONHASHSEED"] = str(cfg.seed) 35 | paster = Paster( 36 | label2id=cfg.ds.label2id, 37 | out_size=cfg.paster.out_size, 38 | repeat_background=cfg.paster.repeat_background, 39 | select_prob=cfg.paster.select_prob, 40 | random_paste=cfg.paster.use_random_paste, 41 | ) 42 | if (cfg.output_dir / "paster.pt").exists(): # if paster is saved, load it instead of creating a new one 43 | paster = paster.from_save(cfg.output_dir) 44 | else: 45 | #### foreground 46 | if cfg.get("fg_real"): 47 | # from utils import read_real_VOC 48 | paster.foregrounds.extend(hydra.utils.call(cfg.fg_real)) 49 | if cfg.get("fg_syn"): 50 | # from utils import read_entity_foregrounds 51 | paster.foregrounds.extend(hydra.utils.call(cfg.fg_syn)) 52 | #### background 53 | if cfg.get("bg_real"): 54 | # from utils import read_real_VOC 55 | paster.backgrounds.extend(hydra.utils.call(cfg.bg_real)) 56 | if cfg.get("bg_syn"): 57 | # from utils import read_dalle_backgrounds 58 | assert isinstance(cfg.bg_syn, ListConfig) 59 | for syn_cfg in cfg.bg_syn: 60 | paster.backgrounds.extend(hydra.utils.call(syn_cfg)) 61 | 62 | paster.save(cfg.output_dir) 63 | 64 | paster.validate() 65 | if cfg.paster.debug: 66 | random.shuffle(paster.backgrounds) 67 | random.shuffle(paster.foregrounds) 68 | paster.truncate(slice(0, 40)) 69 | cfg.paster.max_workers = 1 70 | cfg.output_dir /= "debug" 71 | if cfg.output_dir.exists(): 72 | shutil.rmtree(cfg.output_dir) 73 | else: 74 | random_str = "-random" if cfg.use_random_paste else "" 75 | prob_str = '' if cfg.select_prob == 'uniform' else "-balanced" 76 | num_cut = cfg.num_cut_images if cfg.num_cut_lowerbound is None else f"{cfg.num_cut_lowerbound}~{cfg.num_cut_images}" 77 | cfg.output_dir = cfg.output_dir \ 78 | / f"[{cfg.foreground},{cfg.background}]{random_str}{prob_str}[{cfg.repeat_each_image},{num_cut},{cfg.max_degree},{cfg.out_size}]" 79 | 80 | os.makedirs(cfg.output_dir, exist_ok=True) 81 | 82 | if cfg.paster.get("start") and cfg.paster.get("to"): 83 | slice_idx = slice(cfg.paster.start, cfg.paster.to) 84 | paster.truncate(slice_idx) 85 | 86 | logger.info(f"size of background {len(paster)}; size of foreground {len(paster.foregrounds)}") 87 | logger.info(f"saving to {cfg.output_dir}") 88 | 89 | # TODO, move json 90 | shutil.copy(Path(os.getcwd()) / ".hydra" / "config.yaml", cfg.output_dir / "config.yaml") 91 | shutil.copy(Path(os.getcwd()) / ".hydra" / "overrides.yaml", cfg.output_dir / "overrides.yaml") 92 | paster.cut_and_paste( 93 | out_dir=cfg.output_dir, 94 | max_workers=cfg.paster.max_workers, 95 | num_cut_images=cfg.paster.num_cut_images, max_degree=cfg.paster.max_degree, 96 | num_cut_lowerbound=cfg.paster.num_cut_lowerbound, 97 | scale_factor=cfg.paster.scale_factor, center_translation_factor=cfg.paster.center_translation_factor, 98 | use_random_scaling=cfg.paster.use_random_scaling 99 | ) 100 | 101 | 102 | if __name__ == "__main__": 103 | paste() -------------------------------------------------------------------------------- /cutpaste/paster.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import random 4 | from concurrent import futures 5 | from copy import deepcopy 6 | from functools import partial 7 | from pathlib import Path 8 | from typing import Union, Optional, List 9 | 10 | import numpy as np 11 | import ujson as json 12 | # from detectron2.utils.env import seed_all_rng 13 | from tqdm import tqdm 14 | 15 | from anno import Anno 16 | from background import PastedBackground 17 | from foreground import CutObjects 18 | from utils import convert_to_COCO 19 | 20 | 21 | class Paster: 22 | """ 23 | paste @self.foregrounds into @self.backgrounds 24 | """ 25 | 26 | def __init__(self, label2id: dict, 27 | out_size: int = 512, 28 | repeat_background: int = 1, select_prob: str = "uniform", 29 | random_paste=False): 30 | """ 31 | out_dir/ 32 | foregrounds.csv 33 | backgrounds.csv # without repeat 34 | TODO 35 | xxx 36 | Args: 37 | label2id: dict with label text & id 38 | repeat_background: # times background image is repeated 39 | select_prob: how to select foreground 40 | random_paste: whether to use random paste, if False use space maximization paste 41 | """ 42 | # blending_list = ['gaussian', 'poisson', 'none', 'box', 'motion'] 43 | assert select_prob in ["uniform", "balanced", "supercategory_CDI", "supercategory"] 44 | # seed_all_rng(seed) 45 | # TODO 46 | self.select_prob = select_prob 47 | self.random_paste = random_paste 48 | self.out_size = out_size 49 | self.blending_list: List[str] = ['gaussian', ] 50 | assert all(b in ['gaussian', 'poisson', 'none', 'box', 'motion'] for b in self.blending_list) 51 | self.repeat_background = repeat_background # repeat background only 52 | self.backgrounds = [] 53 | self.foregrounds = [] 54 | 55 | self.id2label = {v: k for k, v in label2id.items()} 56 | self.label2id = dict(label2id) 57 | Anno.label2id = label2id 58 | Anno.id2label = self.id2label 59 | 60 | def aggregate_json(self, 61 | input_dir: Path, max_workers=1, json_name="COCO"): 62 | """ 63 | convert instance mask to COCO format 64 | input_dir must contain @image_folder folder for pasted images, and json are saved here 65 | """ 66 | output_json_dict = { 67 | "images": [], 68 | "type": "instances", 69 | "annotations": [], 70 | "categories": [ 71 | {'supercategory': 'none', 'id': label_id, 'name': label} 72 | for label, label_id in self.label2id.items() 73 | ] 74 | } 75 | 76 | def read_json(path): 77 | with open(input_dir / "tmp" / path) as f: 78 | return json.load(f) 79 | 80 | files = list(os.listdir(input_dir / "tmp")) 81 | with tqdm(total=len(files), desc="COCO agg") as pbar, \ 82 | futures.ThreadPoolExecutor(max_workers) as executor: 83 | todos = [] 84 | bnd_id = 0 # coco need integer bnd ids 85 | for file in files: 86 | todos.append(executor.submit(read_json, file)) 87 | for future in futures.as_completed(todos): 88 | data = future.result() 89 | output_json_dict["images"].extend(data["images"]) 90 | anno = data["annotations"] 91 | for bbox in anno: 92 | bbox["id"] = bnd_id 93 | bnd_id += 1 94 | output_json_dict["annotations"].extend(anno) 95 | pbar.update(1) 96 | print(f"saving to {input_dir}") 97 | with open(input_dir / f"{json_name}.json", "w") as f: 98 | json.dump(output_json_dict, f) 99 | with open(input_dir / "label2id.json", "w") as f: 100 | json.dump(dict(self.label2id), f, indent=4) 101 | # can rm tmp folder if you want 102 | # shutil.rmtree(input_dir / "tmp") 103 | 104 | def cut_and_paste_one_image(self, i: int, out_dir: Path, out_size: int, 105 | num_cut_images: int, blending_list: List[str], probs: List[float], 106 | num_cut_lowerbound: Optional[int], 107 | max_degree: int, random_paste: bool, scale_factor: int, center_translation_factor: int, 108 | use_random_scaling: bool): 109 | """ return path of background image + all objects (original bg + pasted fg) for this image """ 110 | background: PastedBackground = deepcopy(self.backgrounds[i]) 111 | path_to_save = f"{Path(background.imagepath).stem}_{i}" 112 | if (out_dir / "tmp" / f"{path_to_save}.json").exists(): 113 | return 114 | 115 | background.resize(out_size) 116 | if num_cut_lowerbound: 117 | k = random.randint(num_cut_lowerbound, num_cut_images) 118 | else: 119 | k = num_cut_images 120 | cut_images = random.choices(self.foregrounds, k=k, weights=probs) 121 | foregrounds = CutObjects(cut_images) 122 | locations, foregrounds = background.find_paste_location(foregrounds, max_degree=max_degree, 123 | random_paste=random_paste, scale_factor=scale_factor, 124 | center_translation_factor=center_translation_factor, 125 | use_random_scaling=use_random_scaling) 126 | 127 | for blending in blending_list: 128 | pasted_background = deepcopy(background) 129 | for paste_location, foreground_object in zip(locations, foregrounds): 130 | pasted_background.paste( 131 | foreground_object=foreground_object, paste_location=paste_location, blending=blending) 132 | pasted_background.save(path=out_dir, name=f"{path_to_save}_{blending}") 133 | 134 | convert_to_COCO(out_dir, path_to_save, background, blending_list) 135 | 136 | def foreground_sample_prob_by_supercategory(self, bg_filename, probs): 137 | """ 138 | either sample based on supercategory of @bg_filename 139 | or fallback to @probs 140 | """ 141 | if "background" in bg_filename and self.select_prob == "supercategory_CDI": # bgtemplate 142 | return probs # random select fg on bg template images 143 | return probs 144 | 145 | def save(self, output_dir: Path): 146 | import torch 147 | with open(output_dir / "paster.json", "w") as f: 148 | json.dump({ 149 | "counts": [len(self.foregrounds), len(self)], 150 | "foreground": [str(fg.img_path) for fg in self.foregrounds], 151 | "background": [str(bg.imagepath) for bg in self.backgrounds], 152 | }, f) 153 | torch.save(self, output_dir / "paster.pt") 154 | # with open(output_dir / "paster.pt", "w") as f: 155 | 156 | @staticmethod 157 | def from_save(input_dir: Path): 158 | import torch 159 | return torch.load(input_dir / "paster.pt") 160 | 161 | def get_select_prob(self, select_prob) -> np.ndarray: 162 | """ 163 | return selection prob for each ele of @self.foregrounds 164 | """ 165 | if select_prob == "uniform": # uniform over provided data, thus can be balanced and reflect distribution of bg 166 | probs = np.ones(len(self.foregrounds)) / len(self.foregrounds) 167 | else: # balanced based on label st each label shows up equal likely 168 | labels = np.array([label for _, label, _, _ in self.foregrounds]) # (N, ) 169 | probs = np.zeros_like(labels).astype(float) 170 | for class_i in np.unique(labels): 171 | class_indices = labels == class_i # boolean (N, ) 172 | num_samples_class_i = class_indices.sum() 173 | assert num_samples_class_i > 0 174 | probs[class_indices] = 1.0 / num_samples_class_i 175 | probs /= probs.sum() # st sum(probs) == 1 176 | """ 177 | # check if probs work 178 | labels = [] 179 | for _ in range(10000): 180 | cut_images = random.choices(self.backgrounds, k=4, weights=probs) 181 | for _, l, _, _ in cut_images: 182 | labels.append(l) 183 | from collections import Counter 184 | ct = Counter(labels) 185 | print(ct) # should be almost same number for each class 186 | """ 187 | assert len(probs) == len(self.foregrounds) 188 | return probs 189 | 190 | def __len__(self): # len def as background 191 | return len(self.backgrounds) 192 | 193 | def truncate(self, slice): 194 | self.backgrounds = self.backgrounds[slice] 195 | 196 | def validate(self): 197 | assert len(self.foregrounds) > 0 and len(self.backgrounds) > 0 198 | 199 | def cut_and_paste( 200 | self, out_dir: Path, num_cut_images: int = 2, max_workers=1, 201 | # rotate 202 | max_degree: int = 30, 203 | # variant 204 | scale_factor=0, center_translation_factor=0, use_random_scaling=False, 205 | num_cut_lowerbound: Optional[int] = None 206 | ): 207 | """ 208 | will create the following in @out_dir: 209 | Images folder: pasted RGB images 210 | Masks folder: semantic level segmentation mask 211 | COCO.json: instance level COCO segmentation annotation 212 | """ 213 | self.validate() 214 | self.backgrounds = self.backgrounds * self.repeat_background 215 | self.save(output_dir=out_dir) # with updated backgrounds 216 | os.makedirs(out_dir / "Images", exist_ok=True) 217 | os.makedirs(out_dir / "Masks", exist_ok=True) 218 | 219 | probs = self.get_select_prob(self.select_prob) 220 | 221 | cut_and_paste_one_image = partial( 222 | self.cut_and_paste_one_image, out_dir=out_dir, out_size=self.out_size, probs=probs, 223 | num_cut_images=num_cut_images, max_degree=max_degree, blending_list=self.blending_list, 224 | num_cut_lowerbound=num_cut_lowerbound, 225 | random_paste=self.random_paste, scale_factor=scale_factor, center_translation_factor=center_translation_factor, 226 | use_random_scaling=use_random_scaling) 227 | for i in list(range(len(self))): 228 | cut_and_paste_one_image(i) 229 | 230 | # todos = [] 231 | # with tqdm(total=len(self), desc="cutpaste") as pbar, \ 232 | # futures.ThreadPoolExecutor(max_workers) as executor: 233 | # for i in list(range(len(self))): 234 | # todos.append(executor.submit( 235 | # cut_and_paste_one_image, i)) 236 | # for future in futures.as_completed(todos): 237 | # future.result() 238 | # pbar.update(1) 239 | print("converting to COCO format") 240 | self.aggregate_json(out_dir, max_workers, json_name="COCO") -------------------------------------------------------------------------------- /cutpaste/pb.py: -------------------------------------------------------------------------------- 1 | """ 2 | pb: Poisson Image Blending implemented by Python 3 | """ 4 | 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import scipy.sparse 8 | from skimage import io 9 | 10 | 11 | def create_mask(img_mask, img_target, img_src, offset=(0, 0)): 12 | ''' 13 | Takes the np.array from the grayscale image 14 | ''' 15 | 16 | # crop img_mask and img_src to fit to the img_target 17 | hm, wm = img_mask.shape 18 | ht, wt, nl = img_target.shape 19 | 20 | hd0 = max(0, -offset[0]) 21 | wd0 = max(0, -offset[1]) 22 | 23 | hd1 = hm - max(hm + offset[0] - ht, 0) 24 | wd1 = wm - max(wm + offset[1] - wt, 0) 25 | 26 | mask = np.zeros((hm, wm)) 27 | mask[img_mask > 0] = 1 28 | mask[img_mask == 0] = 0 29 | 30 | mask = mask[hd0:hd1, wd0:wd1] 31 | src = img_src[hd0:hd1, wd0:wd1] 32 | 33 | # fix offset 34 | offset_adj = (max(offset[0], 0), max(offset[1], 0)) 35 | 36 | # remove edge from the mask so that we don't have to check the 37 | # edge condition 38 | mask[:, -1] = 0 39 | mask[:, 0] = 0 40 | mask[-1, :] = 0 41 | mask[0, :] = 0 42 | 43 | return mask, src, offset_adj 44 | 45 | 46 | def get_gradient_sum(img, i, j, h, w): 47 | """ 48 | Return the sum of the gradient of the source imgae. 49 | * 3D array for RGB 50 | """ 51 | 52 | v_sum = np.array([0.0, 0.0, 0.0]) 53 | v_sum = img[i, j] * 4 \ 54 | - img[i + 1, j] - img[i - 1, j] - img[i, j + 1] - img[i, j - 1] 55 | 56 | return v_sum 57 | 58 | 59 | def get_mixed_gradient_sum(img_src, img_target, i, j, h, w, ofs, 60 | c=1.0): 61 | """ 62 | Return the sum of the gradient of the source imgae. 63 | * 3D array for RGB 64 | c(>=0): larger, the more important the target image gradient is 65 | """ 66 | 67 | v_sum = np.array([0.0, 0.0, 0.0]) 68 | nb = np.array([[1, 0], [-1, 0], [0, 1], [0, -1]]) 69 | 70 | for kk in range(4): 71 | fp = img_src[i, j] - img_src[i + nb[kk, 0], j + nb[kk, 1]] 72 | gp = img_target[i + ofs[0], j + ofs[1]] \ 73 | - img_target[i + nb[kk, 0] + ofs[0], j + nb[kk, 1] + ofs[1]] 74 | 75 | # if np.linalg.norm(fp) > np.linalg.norm(gp): 76 | # v_sum += fp 77 | # else: 78 | # v_sum += gp 79 | 80 | v_sum += np.array([fp[0] if abs(fp[0] * c) > abs(gp[0]) else gp[0], 81 | fp[1] if abs(fp[1] * c) > abs(gp[1]) else gp[1], 82 | fp[2] if abs(fp[2] * c) > abs(gp[2]) else gp[2]]) 83 | 84 | return v_sum 85 | 86 | 87 | def poisson_blend(img_mask, img_src, img_target, method='mix', c=1.0, 88 | offset_adj=(0, 0)): 89 | hm, wm = img_mask.shape 90 | region_size = hm * wm 91 | 92 | F = np.zeros((region_size, 3)) 93 | A = scipy.sparse.identity(region_size, format='lil') 94 | 95 | get_k = lambda i, j: i + j * hm 96 | 97 | # plane insertion 98 | if method in ['target', 'src']: 99 | for i in range(hm): 100 | for j in range(wm): 101 | k = get_k(i, j) 102 | 103 | # ignore the edge case (# of neighboor is always 4) 104 | if img_mask[i, j] == 1: 105 | 106 | if method == 'target': 107 | F[k] = img_target[i + offset_adj[0], j + offset_adj[1]] 108 | elif method == 'src': 109 | F[k] = img_src[i, j] 110 | else: 111 | F[k] = img_target[i + offset_adj[0], j + offset_adj[1]] 112 | 113 | # poisson blending 114 | else: 115 | if method == 'mix': 116 | grad_func = lambda ii, jj: get_mixed_gradient_sum( 117 | img_src, img_target, ii, jj, hm, wm, offset_adj, c=c) 118 | else: 119 | grad_func = lambda ii, jj: get_gradient_sum( 120 | img_src, ii, jj, hm, wm) 121 | 122 | for i in range(hm): 123 | for j in range(wm): 124 | k = get_k(i, j) 125 | 126 | # ignore the edge case (# of neighboor is always 4) 127 | if img_mask[i, j] == 1: 128 | f_star = np.array([0.0, 0.0, 0.0]) 129 | 130 | if img_mask[i - 1, j] == 1: 131 | A[k, k - 1] = -1 132 | else: 133 | f_star += img_target[i - 1 + 134 | offset_adj[0], j + offset_adj[1]] 135 | 136 | if img_mask[i + 1, j] == 1: 137 | A[k, k + 1] = -1 138 | else: 139 | f_star += img_target[i + 1 + 140 | offset_adj[0], j + offset_adj[1]] 141 | 142 | if img_mask[i, j - 1] == 1: 143 | A[k, k - hm] = -1 144 | else: 145 | f_star += img_target[i + 146 | offset_adj[0], j - 1 + offset_adj[1]] 147 | 148 | if img_mask[i, j + 1] == 1: 149 | A[k, k + hm] = -1 150 | else: 151 | f_star += img_target[i + 152 | offset_adj[0], j + 1 + offset_adj[1]] 153 | 154 | A[k, k] = 4 155 | F[k] = grad_func(i, j) + f_star 156 | 157 | else: 158 | F[k] = img_target[i + offset_adj[0], j + offset_adj[1]] 159 | 160 | A = A.tocsr() 161 | 162 | img_pro = np.empty_like(img_target.astype(np.uint8)) 163 | img_pro[:] = img_target.astype(np.uint8) 164 | 165 | for l in range(3): 166 | # x = pyamg.solve(A, F[:, l], verb=True, tol=1e-15, maxiter=100) 167 | x = scipy.sparse.linalg.spsolve(A, F[:, l]) 168 | x[x > 255] = 255 169 | x[x < 0] = 0 170 | x = np.array(x, img_pro.dtype) 171 | 172 | img_pro[offset_adj[0]:offset_adj[0] + hm, 173 | offset_adj[1]:offset_adj[1] + wm, l] \ 174 | = x.reshape(hm, wm, order='F') 175 | 176 | return img_pro 177 | 178 | 179 | if __name__ == "__main__": 180 | offset = (40, -30) 181 | img_mask = io.imread('/Users/ysakamoto/Projects/sccomp/mask.png', as_grey=True) 182 | img_src = io.imread('./testimages/0.png').astype(np.float64) 183 | img_target = io.imread('./testimages/0.png') 184 | 185 | # img_src = io.imread('./testimages/test1_src.png').astype(np.float64) 186 | # img_target = io.imread('./testimages/test1_target.png') 187 | # img_mask = io.imread('./testimages/test1_mask.png', as_grey=True) 188 | 189 | # resize src and mask images 190 | # import skimage.transform 191 | # from skimage import color 192 | # fac = 3 193 | # img_src = skimage.transform.resize(img_src, (np.array(img_src.shape)//fac)[:2]) 194 | # img_mask = io.imread('/Users/ysakamoto/Desktop/mask.png', as_grey=True) 195 | # img_mask = skimage.transform.resize(img_mask, (np.array(img_mask.shape)//fac)[:2]) 196 | # img_mask = color.rgb2grey(img_mask) 197 | 198 | img_mask, img_src, offset_adj \ 199 | = create_mask(img_mask.astype(np.float64), 200 | img_target, img_src, offset=offset) 201 | 202 | img_pro = poisson_blend(img_mask, img_src, img_target, 203 | method='normal', offset_adj=offset_adj) 204 | plt.imshow(img_pro) 205 | plt.show() 206 | io.imsave('./testimages/poisson_normal.png', img_pro) 207 | 208 | # pdb.set_trace() 209 | # i=14 210 | # for c in np.linspace(10.0, 50.0, 5): 211 | # i+=1 212 | # img_pro = poisson_blend(img_mask, img_src, img_target, method='mix', c=c) 213 | # plt.imshow(img_pro) 214 | # plt.show() 215 | # io.imsave('./testimages/poisson_mix_%d.png' %i, img_pro) 216 | 217 | # img_pro = poisson_blend(img_mask, img_src, img_target, method='src') 218 | # io.imsave('./testimages/poisson_src.png', img_pro) 219 | 220 | # img_pro = poisson_blend(img_mask, img_src, img_target, method='target') 221 | # io.imsave('./testimages/poisson_target.png', img_pro) 222 | 223 | # def plot_coo_matrix(m): 224 | # if not isinstance(m, coo_matrix): 225 | # m = coo_matrix(m) 226 | # fig = plt.figure() 227 | # ax = fig.add_subplot(111, axisbg='white') 228 | # ax.plot(m.col, m.row, 's', color='black', ms=1) 229 | # ax.set_xlim(0, m.shape[1]) 230 | # ax.set_ylim(0, m.shape[0]) 231 | # ax.set_aspect('equal') 232 | # for spine in ax.spines.values(): 233 | # spine.set_visible(False) 234 | # ax.invert_yaxis() 235 | # ax.set_aspect('equal') 236 | # ax.set_xticks([]) 237 | # ax.set_yticks([]) 238 | # return ax 239 | 240 | # B = A.tocoo() 241 | # plot_coo_matrix(B) 242 | # plt.show() 243 | -------------------------------------------------------------------------------- /cutpaste/pyblur3/BoxBlur.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | from scipy.signal import convolve2d 4 | 5 | boxKernelDims = [3, 5, 7, 9] 6 | 7 | 8 | def BoxBlur_random(img): 9 | kernelidx = np.random.randint(0, len(boxKernelDims)) 10 | kerneldim = boxKernelDims[kernelidx] 11 | return BoxBlur(img, kerneldim) 12 | 13 | 14 | def BoxBlur(img, dim): 15 | imgarray = np.array(img, dtype="float32") 16 | kernel = BoxKernel(dim) 17 | convolved = convolve2d(imgarray, kernel, mode='same', fillvalue=255.0).astype("uint8") 18 | img = Image.fromarray(convolved) 19 | return img 20 | 21 | 22 | def BoxKernel(dim): 23 | kernelwidth = dim 24 | kernel = np.ones((kernelwidth, kernelwidth), dtype=np.float32) 25 | normalizationFactor = np.count_nonzero(kernel) 26 | kernel = kernel / normalizationFactor 27 | return kernel 28 | -------------------------------------------------------------------------------- /cutpaste/pyblur3/DefocusBlur.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import numpy as np 3 | from PIL import Image 4 | from scipy.signal import convolve2d 5 | from skimage.draw import circle 6 | 7 | defocusKernelDims = [3, 5, 7, 9] 8 | 9 | 10 | def DefocusBlur_random(img): 11 | kernelidx = np.random.randint(0, len(defocusKernelDims)) 12 | kerneldim = defocusKernelDims[kernelidx] 13 | return DefocusBlur(img, kerneldim) 14 | 15 | 16 | def DefocusBlur(img, dim): 17 | imgarray = np.array(img, dtype="float32") 18 | kernel = DiskKernel(dim) 19 | convolved = convolve2d(imgarray, kernel, mode='same', fillvalue=255.0).astype("uint8") 20 | img = Image.fromarray(convolved) 21 | return img 22 | 23 | 24 | def DiskKernel(dim): 25 | kernelwidth = dim 26 | kernel = np.zeros((kernelwidth, kernelwidth), dtype=np.float32) 27 | circleCenterCoord = dim / 2 28 | circleRadius = circleCenterCoord + 1 29 | 30 | rr, cc = circle(circleCenterCoord, circleCenterCoord, circleRadius) 31 | kernel[rr, cc] = 1 32 | 33 | if (dim == 3 or dim == 5): 34 | kernel = Adjust(kernel, dim) 35 | 36 | normalizationFactor = np.count_nonzero(kernel) 37 | kernel = kernel / normalizationFactor 38 | return kernel 39 | 40 | 41 | def Adjust(kernel, kernelwidth): 42 | kernel[0, 0] = 0 43 | kernel[0, kernelwidth - 1] = 0 44 | kernel[kernelwidth - 1, 0] = 0 45 | kernel[kernelwidth - 1, kernelwidth - 1] = 0 46 | return kernel 47 | -------------------------------------------------------------------------------- /cutpaste/pyblur3/GaussianBlur.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import ImageFilter 3 | 4 | gaussianbandwidths = [0.5, 1, 1.5, 2, 2.5, 3, 3.5] 5 | 6 | 7 | def GaussianBlur_random(img): 8 | gaussianidx = np.random.randint(0, len(gaussianbandwidths)) 9 | gaussianbandwidth = gaussianbandwidths[gaussianidx] 10 | return GaussianBlur(img, gaussianbandwidth) 11 | 12 | 13 | def GaussianBlur(img, bandwidth): 14 | img = img.filter(ImageFilter.GaussianBlur(bandwidth)) 15 | return img 16 | -------------------------------------------------------------------------------- /cutpaste/pyblur3/LineDictionary.py: -------------------------------------------------------------------------------- 1 | class LineDictionary: 2 | def __init__(self): 3 | self.lines = {} 4 | self.Create3x3Lines() 5 | self.Create5x5Lines() 6 | self.Create7x7Lines() 7 | self.Create9x9Lines() 8 | return 9 | 10 | def Create3x3Lines(self): 11 | lines = {} 12 | lines[0] = [1, 0, 1, 2] 13 | lines[45] = [2, 0, 0, 2] 14 | lines[90] = [0, 1, 2, 1] 15 | lines[135] = [0, 0, 2, 2] 16 | self.lines[3] = lines 17 | return 18 | 19 | def Create5x5Lines(self): 20 | lines = {} 21 | lines[0] = [2, 0, 2, 4] 22 | lines[22.5] = [3, 0, 1, 4] 23 | lines[45] = [0, 4, 4, 0] 24 | lines[67.5] = [0, 3, 4, 1] 25 | lines[90] = [0, 2, 4, 2] 26 | lines[112.5] = [0, 1, 4, 3] 27 | lines[135] = [0, 0, 4, 4] 28 | lines[157.5] = [1, 0, 3, 4] 29 | self.lines[5] = lines 30 | return 31 | 32 | def Create7x7Lines(self): 33 | lines = {} 34 | lines[0] = [3, 0, 3, 6] 35 | lines[15] = [4, 0, 2, 6] 36 | lines[30] = [5, 0, 1, 6] 37 | lines[45] = [6, 0, 0, 6] 38 | lines[60] = [6, 1, 0, 5] 39 | lines[75] = [6, 2, 0, 4] 40 | lines[90] = [0, 3, 6, 3] 41 | lines[105] = [0, 2, 6, 4] 42 | lines[120] = [0, 1, 6, 5] 43 | lines[135] = [0, 0, 6, 6] 44 | lines[150] = [1, 0, 5, 6] 45 | lines[165] = [2, 0, 4, 6] 46 | self.lines[7] = lines 47 | return 48 | 49 | def Create9x9Lines(self): 50 | lines = {} 51 | lines[0] = [4, 0, 4, 8] 52 | lines[11.25] = [5, 0, 3, 8] 53 | lines[22.5] = [6, 0, 2, 8] 54 | lines[33.75] = [7, 0, 1, 8] 55 | lines[45] = [8, 0, 0, 8] 56 | lines[56.25] = [8, 1, 0, 7] 57 | lines[67.5] = [8, 2, 0, 6] 58 | lines[78.75] = [8, 3, 0, 5] 59 | lines[90] = [8, 4, 0, 4] 60 | lines[101.25] = [0, 3, 8, 5] 61 | lines[112.5] = [0, 2, 8, 6] 62 | lines[123.75] = [0, 1, 8, 7] 63 | lines[135] = [0, 0, 8, 8] 64 | lines[146.25] = [1, 0, 7, 8] 65 | lines[157.5] = [2, 0, 6, 8] 66 | lines[168.75] = [3, 0, 5, 8] 67 | self.lines[9] = lines 68 | return 69 | -------------------------------------------------------------------------------- /cutpaste/pyblur3/LinearMotionBlur.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import math 3 | 4 | import numpy as np 5 | from PIL import Image 6 | from scipy.signal import convolve2d 7 | from skimage.draw import line 8 | 9 | from .LineDictionary import LineDictionary 10 | 11 | lineLengths = [3, 5, 7, 9] 12 | lineTypes = ["full", "right", "left"] 13 | 14 | lineDict = LineDictionary() 15 | 16 | 17 | def LinearMotionBlur_random(img): 18 | lineLengthIdx = np.random.randint(0, len(lineLengths)) 19 | lineTypeIdx = np.random.randint(0, len(lineTypes)) 20 | lineLength = lineLengths[lineLengthIdx] 21 | lineType = lineTypes[lineTypeIdx] 22 | lineAngle = randomAngle(lineLength) 23 | return LinearMotionBlur(img, lineLength, lineAngle, lineType) 24 | 25 | 26 | def LinearMotionBlur(img, dim, angle, linetype): 27 | imgarray = np.array(img, dtype="float32") 28 | kernel = LineKernel(dim, angle, linetype) 29 | convolved = convolve2d(imgarray, kernel, mode='same', fillvalue=255.0).astype("uint8") 30 | img = Image.fromarray(convolved) 31 | return img 32 | 33 | 34 | def LineKernel(dim, angle, linetype): 35 | kernelwidth = dim 36 | kernelCenter = int(math.floor(dim / 2)) 37 | angle = SanitizeAngleValue(kernelCenter, angle) 38 | kernel = np.zeros((kernelwidth, kernelwidth), dtype=np.float32) 39 | lineAnchors = lineDict.lines[dim][angle] 40 | if (linetype == 'right'): 41 | lineAnchors[0] = kernelCenter 42 | lineAnchors[1] = kernelCenter 43 | if (linetype == 'left'): 44 | lineAnchors[2] = kernelCenter 45 | lineAnchors[3] = kernelCenter 46 | rr, cc = line(lineAnchors[0], lineAnchors[1], lineAnchors[2], lineAnchors[3]) 47 | kernel[rr, cc] = 1 48 | normalizationFactor = np.count_nonzero(kernel) 49 | kernel = kernel / normalizationFactor 50 | return kernel 51 | 52 | 53 | def SanitizeAngleValue(kernelCenter, angle): 54 | numDistinctLines = kernelCenter * 4 55 | angle = math.fmod(angle, 180.0) 56 | validLineAngles = np.linspace(0, 180, numDistinctLines, endpoint=False) 57 | angle = nearestValue(angle, validLineAngles) 58 | return angle 59 | 60 | 61 | def nearestValue(theta, validAngles): 62 | idx = (np.abs(validAngles - theta)).argmin() 63 | return validAngles[idx] 64 | 65 | 66 | def randomAngle(kerneldim): 67 | kernelCenter = int(math.floor(kerneldim / 2)) 68 | numDistinctLines = kernelCenter * 4 69 | validLineAngles = np.linspace(0, 180, numDistinctLines, endpoint=False) 70 | angleIdx = np.random.randint(0, len(validLineAngles)) 71 | return int(validLineAngles[angleIdx]) 72 | -------------------------------------------------------------------------------- /cutpaste/pyblur3/PsfBlur.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os.path 3 | import pickle 4 | 5 | import numpy as np 6 | from PIL import Image 7 | from scipy.signal import convolve2d 8 | 9 | pickledPsfFilename = os.path.join(os.path.dirname(__file__), "psf.pkl") 10 | 11 | with open(pickledPsfFilename, 'rb') as pklfile: 12 | psfDictionary = pickle.load(pklfile, encoding='latin1') 13 | 14 | 15 | def PsfBlur(img, psfid): 16 | imgarray = np.array(img, dtype="float32") 17 | kernel = psfDictionary[psfid] 18 | convolved = convolve2d(imgarray, kernel, mode='same', fillvalue=255.0).astype("uint8") 19 | img = Image.fromarray(convolved) 20 | return img 21 | 22 | 23 | def PsfBlur_random(img): 24 | psfid = np.random.randint(0, len(psfDictionary)) 25 | return PsfBlur(img, psfid) 26 | -------------------------------------------------------------------------------- /cutpaste/pyblur3/RandomizedBlur.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .BoxBlur import BoxBlur_random 4 | from .DefocusBlur import DefocusBlur_random 5 | from .GaussianBlur import GaussianBlur_random 6 | from .LinearMotionBlur import LinearMotionBlur_random 7 | from .PsfBlur import PsfBlur_random 8 | 9 | blurFunctions = {"0": BoxBlur_random, "1": DefocusBlur_random, "2": GaussianBlur_random, "3": LinearMotionBlur_random, 10 | "4": PsfBlur_random} 11 | 12 | 13 | def RandomizedBlur(img): 14 | blurToApply = blurFunctions[str(np.random.randint(0, len(blurFunctions)))] 15 | return blurToApply(img) 16 | -------------------------------------------------------------------------------- /cutpaste/pyblur3/__init__.py: -------------------------------------------------------------------------------- 1 | from .BoxBlur import BoxBlur, BoxBlur_random 2 | from .DefocusBlur import DefocusBlur, DefocusBlur_random 3 | from .GaussianBlur import GaussianBlur, GaussianBlur_random 4 | from .LinearMotionBlur import LinearMotionBlur, LinearMotionBlur_random 5 | from .PsfBlur import PsfBlur, PsfBlur_random 6 | from .RandomizedBlur import RandomizedBlur 7 | 8 | __all__ = ["BoxBlur", "BoxBlur_random", 9 | "DefocusBlur", "DefocusBlur_random", 10 | "GaussianBlur", "GaussianBlur_random", 11 | "LinearMotionBlur", "LinearMotionBlur_random", 12 | "PsfBlur", "PsfBlur_random", 13 | "RandomizedBlur"] 14 | -------------------------------------------------------------------------------- /cutpaste/pyblur3/psf.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/cutpaste/pyblur3/psf.pkl -------------------------------------------------------------------------------- /cutpaste/utils.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import json 3 | import os 4 | import re 5 | from concurrent import futures 6 | from pathlib import Path 7 | from typing import List 8 | 9 | from PIL import Image 10 | from tqdm import tqdm 11 | 12 | from anno import VOCAnno, Anno, EntityAnno 13 | from cutpaste.background import PastedBackground 14 | from foreground import CutObjects, CutObject 15 | 16 | 17 | #################################################################################################### 18 | # foregrounds 19 | def read_real_VOC_foregrounds(data_dir, anno_dir, seg_dir, data_list) -> List[CutObjects]: 20 | """ 21 | Read real foregrounds from VOC dataset 22 | Each has VOCAnno 23 | """ 24 | foregrounds = [] 25 | with open(data_list, 'r') as f: 26 | lines = f.readlines() 27 | for line in tqdm(lines, total=len(lines), desc="reading real VOC fg"): 28 | fields = line.strip().split() 29 | img_path = os.path.join(data_dir, fields[0] + '.jpg') 30 | anno_path = os.path.join(anno_dir, fields[0] + '.xml') 31 | seg_img_path = os.path.join(seg_dir, fields[0] + '.png') 32 | assert os.path.exists(anno_path) and os.path.exists(img_path) and os.path.exists(seg_img_path) 33 | anno: VOCAnno = Anno.factory(anno_path, seg_img_path) 34 | foregrounds.extend(CutObjects().add_image(img_path, anno)) 35 | return foregrounds 36 | 37 | 38 | def read_entity_foregrounds(dataset, rgb_dir, mask_dir) -> List[CutObjects]: 39 | """ 40 | Read syn foregrounds (processed by entity segmentation, then selected by GradCAM) 41 | Each has EntityAnno 42 | """ 43 | rgb_dir, mask_dir = map(Path, [rgb_dir, mask_dir]) 44 | 45 | foregrounds = [] 46 | # VOC 47 | def get_voc_image(mask_file): 48 | # eg voc2012/foreground/foreground_mask_old/car_mask/a car in a white background30.png 49 | _, label, filename = mask_file.rsplit("/", 2) 50 | label = label.replace("_mask", "") 51 | # infer rgb img_path 52 | # eg (a car in a white background, 30, _) 53 | target_caption, target_num, _ = re.split(r'(\d+)', filename) 54 | img_path = None 55 | for class_dir in rgb_dir.iterdir(): 56 | for caption in os.listdir(class_dir): 57 | if caption == target_caption: 58 | img_path = class_dir / caption / f"{target_num}.png" 59 | if img_path is not None: 60 | break 61 | assert img_path is not None, f"{str(mask_file)} Not found!" 62 | assert os.path.exists(img_path) 63 | anno: EntityAnno = Anno.factory(None, mask_file) 64 | return CutObjects().add_image(img_path, anno) 65 | 66 | todos = [] 67 | all_mask_files = list(glob.glob(str(mask_dir / "*_mask" / "*.png"))) 68 | with tqdm(total=len(all_mask_files), desc="collecting real fg") as pbar, \ 69 | futures.ThreadPoolExecutor(100) as executor: 70 | for mask_file in all_mask_files: 71 | todos.append(executor.submit(get_voc_image, mask_file)) 72 | for future in futures.as_completed(todos): 73 | res = future.result() 74 | foregrounds.extend(res) 75 | pbar.update(1) 76 | return foregrounds 77 | 78 | #################################################################################################### 79 | # backgrounds 80 | def read_real_VOC_backgrounds(data_dir, anno_dir, seg_dir, data_list) -> List[PastedBackground]: 81 | """ 82 | load list of image name and image labels ([i] is img_name_list[i]'s K+1 class boolean vector) 83 | can be used in reading fg or bg 84 | """ 85 | backgrounds = [] 86 | with open(data_list, 'r') as f: 87 | lines = f.readlines() 88 | for line in tqdm(lines, total=len(lines), desc="reading real VOC bg"): 89 | fields = line.strip().split() 90 | img_path = os.path.join(data_dir, fields[0] + '.jpg') 91 | anno_path = os.path.join(anno_dir, fields[0] + '.xml') 92 | seg_img_path = os.path.join(seg_dir, fields[0] + '.png') 93 | assert os.path.exists(anno_path) 94 | assert os.path.exists(img_path) 95 | assert os.path.exists(seg_img_path) 96 | backgrounds.append(PastedBackground( 97 | imagepath=img_path, anno=VOCAnno(anno_path, seg_img_path) 98 | )) 99 | return backgrounds 100 | def read_background_template(data_dir: str) -> List[PastedBackground]: 101 | backgrounds = [] 102 | for rgb_file in glob.iglob(f"{data_dir}/**/*.png", 103 | recursive=True): 104 | if "azDownload" in str(rgb_file): continue 105 | if "group_0" in str(rgb_file): continue 106 | backgrounds.append(PastedBackground(rgb_file)) 107 | return backgrounds 108 | 109 | def read_dalle_backgrounds(data_dir: str, clip_strategy="use") -> List[PastedBackground]: 110 | """ 111 | use syn images for background, and ignore possible foreground in syn images 112 | data_dir: path to get dalle generated syn images 113 | """ 114 | data_dir = Path(data_dir) 115 | backgrounds = [] 116 | if clip_strategy in ["use", "reverse"]: 117 | with open(data_dir / "clip_postprocessed.json") as f: 118 | data = json.load(f) 119 | if clip_strategy == "use": 120 | # keep only in CLIP processed 121 | for imageid, captions in data.items(): 122 | for caption, selected_ids in captions.items(): 123 | for id in selected_ids: 124 | img = data_dir / f"{imageid}.jpg" / caption / id 125 | assert img.exists() 126 | backgrounds.append(PastedBackground(str(img))) 127 | else: # keep only NOT in CLIP processed 128 | for imgid in data_dir.iterdir(): 129 | if not imgid.name.endswith(".jpg"): 130 | continue 131 | if imgid not in data: 132 | for caption in imgid.iterdir(): 133 | for img in caption.iterdir(): 134 | backgrounds.append(PastedBackground(str(img))) 135 | else: # do not use clip but raw 136 | for img in data_dir.iterdir(): 137 | backgrounds.append(PastedBackground(str(img))) 138 | return backgrounds 139 | 140 | def convert_to_COCO(input_dir, 141 | image_id_src, background, blending_list, 142 | image_folder="Images", image_suffix="png", output_dir=None): 143 | """ 144 | image in input_dir / image_folder / image_id_src 145 | save in output_dir / tmp 146 | COCO image path in the format of image_folder / image_id_src, relative so that in detectron we can provide input_dir (eg use in remote server) 147 | """ 148 | if output_dir is None: 149 | output_dir = input_dir 150 | output_json_dict = { 151 | "images": [], 152 | "annotations": [] 153 | } 154 | bnd_id = 1 155 | for blending in blending_list: 156 | # image_id eg 2007_000515_16 157 | if blending != "": 158 | image_id = f"{image_id_src}_{blending}" 159 | else: 160 | image_id = image_id_src 161 | file_name = os.path.join(image_folder, f"{image_id}.{image_suffix}") 162 | from PIL import PngImagePlugin 163 | LARGE_ENOUGH_NUMBER = 100 164 | PngImagePlugin.MAX_TEXT_CHUNK = LARGE_ENOUGH_NUMBER * (1024 ** 2) 165 | img = Image.open(input_dir / file_name) 166 | width, height = img.size 167 | output_json_dict["images"].append({ 168 | "file_name": file_name, 169 | "height": height, 170 | "width": width, 171 | "id": image_id 172 | }) 173 | for polygons, bbox, area, category in background.to_COCO_ann(): 174 | output_json_dict["annotations"].append({ 175 | "segmentation": polygons, 176 | "area": area, 177 | "iscrowd": 0, 178 | "image_id": image_id, 179 | "bbox": bbox, 180 | "category_id": int(category), 181 | "id": f"{image_id}_{bnd_id}" 182 | }) 183 | bnd_id += 1 184 | tmpdir = output_dir / "tmp" 185 | os.makedirs(tmpdir, exist_ok=True) 186 | tmp_json_path = tmpdir / f"{image_id_src}.json" 187 | with open(tmp_json_path, "w") as f: 188 | json.dump(output_json_dict, f) 189 | -------------------------------------------------------------------------------- /data/test_data/VOC2012/Annotations/2007_000039.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2012 3 | 2007_000039.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 9 | 10 | 500 11 | 375 12 | 3 13 | 14 | 1 15 | 16 | tvmonitor 17 | Frontal 18 | 0 19 | 0 20 | 21 | 156 22 | 89 23 | 344 24 | 279 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /data/test_data/VOC2012/Annotations/2007_000063.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2012 3 | 2007_000063.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 9 | 10 | 500 11 | 375 12 | 3 13 | 14 | 1 15 | 16 | dog 17 | Unspecified 18 | 0 19 | 0 20 | 21 | 123 22 | 115 23 | 379 24 | 275 25 | 26 | 27 | 28 | chair 29 | Frontal 30 | 1 31 | 0 32 | 33 | 75 34 | 1 35 | 428 36 | 375 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /data/test_data/VOC2012/Annotations/2007_000648.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2012 3 | 2007_000648.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 9 | 10 | 500 11 | 333 12 | 3 13 | 14 | 1 15 | 16 | person 17 | Unspecified 18 | 0 19 | 1 20 | 21 | 394 22 | 199 23 | 404 24 | 223 25 | 26 | 27 | 28 | person 29 | Unspecified 30 | 0 31 | 1 32 | 33 | 424 34 | 199 35 | 436 36 | 220 37 | 38 | 39 | 40 | person 41 | Unspecified 42 | 0 43 | 1 44 | 45 | 434 46 | 196 47 | 444 48 | 220 49 | 50 | 51 | 52 | person 53 | Unspecified 54 | 0 55 | 1 56 | 57 | 443 58 | 195 59 | 452 60 | 220 61 | 62 | 63 | 64 | bus 65 | Right 66 | 0 67 | 0 68 | 69 | 29 70 | 113 71 | 353 72 | 266 73 | 74 | 75 | 76 | aeroplane 77 | Unspecified 78 | 1 79 | 1 80 | 81 | 328 82 | 86 83 | 474 84 | 192 85 | 86 | 87 | 88 | car 89 | Unspecified 90 | 1 91 | 1 92 | 93 | 2 94 | 213 95 | 28 96 | 235 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /data/test_data/VOC2012/Annotations/2007_001420.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2012 3 | 2007_001420.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 9 | 10 | 500 11 | 332 12 | 3 13 | 14 | 1 15 | 16 | horse 17 | Unspecified 18 | 0 19 | 0 20 | 21 | 192 22 | 109 23 | 340 24 | 270 25 | 26 | 27 | 28 | person 29 | Unspecified 30 | 0 31 | 0 32 | 33 | 281 34 | 80 35 | 364 36 | 268 37 | 38 | 39 | 40 | pottedplant 41 | Unspecified 42 | 0 43 | 0 44 | 45 | 436 46 | 148 47 | 500 48 | 306 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /data/test_data/VOC2012/Annotations/2007_001709.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2012 3 | 2007_001709.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 9 | 10 | 500 11 | 333 12 | 3 13 | 14 | 1 15 | 16 | person 17 | Frontal 18 | 1 19 | 0 20 | 21 | 367 22 | 132 23 | 426 24 | 287 25 | 26 | 27 | 28 | person 29 | Unspecified 30 | 1 31 | 1 32 | 33 | 1 34 | 1 35 | 202 36 | 333 37 | 38 | 39 | 40 | motorbike 41 | Left 42 | 1 43 | 0 44 | 45 | 1 46 | 45 47 | 412 48 | 333 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /data/test_data/VOC2012/Annotations/2007_001901.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2012 3 | 2007_001901.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 9 | 10 | 375 11 | 500 12 | 3 13 | 14 | 1 15 | 16 | chair 17 | Unspecified 18 | 0 19 | 0 20 | 21 | 192 22 | 261 23 | 247 24 | 390 25 | 26 | 27 | 28 | chair 29 | Unspecified 30 | 1 31 | 1 32 | 33 | 292 34 | 255 35 | 312 36 | 415 37 | 38 | 39 | 40 | chair 41 | Right 42 | 0 43 | 0 44 | 45 | 233 46 | 258 47 | 308 48 | 400 49 | 50 | 51 | 52 | diningtable 53 | Unspecified 54 | 1 55 | 0 56 | 57 | 238 58 | 266 59 | 312 60 | 413 61 | 62 | 63 | 64 | sofa 65 | Unspecified 66 | 1 67 | 1 68 | 69 | 4 70 | 284 71 | 173 72 | 497 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /data/test_data/VOC2012/Annotations/2007_002216.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2012 3 | 2007_002216.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 9 | 10 | 500 11 | 340 12 | 3 13 | 14 | 1 15 | 16 | bus 17 | Unspecified 18 | 1 19 | 0 20 | 21 | 360 22 | 110 23 | 500 24 | 313 25 | 26 | 27 | 28 | bus 29 | Unspecified 30 | 0 31 | 0 32 | 33 | 65 34 | 104 35 | 396 36 | 319 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /data/test_data/VOC2012/Annotations/2007_002668.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2012 3 | 2007_002668.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 9 | 10 | 500 11 | 375 12 | 3 13 | 14 | 1 15 | 16 | diningtable 17 | Unspecified 18 | 1 19 | 1 20 | 21 | 86 22 | 312 23 | 499 24 | 374 25 | 26 | 27 | 28 | person 29 | Unspecified 30 | 1 31 | 1 32 | 33 | 320 34 | 102 35 | 500 36 | 319 37 | 38 | 39 | 40 | person 41 | Unspecified 42 | 1 43 | 0 44 | 45 | 426 46 | 105 47 | 462 48 | 134 49 | 50 | 51 | 52 | person 53 | Frontal 54 | 1 55 | 0 56 | 57 | 95 58 | 83 59 | 309 60 | 319 61 | 62 | 63 | 64 | person 65 | Frontal 66 | 1 67 | 0 68 | 69 | 89 70 | 169 71 | 112 72 | 190 73 | 74 | 75 | 76 | pottedplant 77 | Unspecified 78 | 1 79 | 1 80 | 81 | 19 82 | 124 83 | 97 84 | 187 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /data/test_data/VOC2012/Annotations/2007_002669.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2012 3 | 2007_002669.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 9 | 10 | 500 11 | 375 12 | 3 13 | 14 | 1 15 | 16 | cow 17 | Unspecified 18 | 1 19 | 1 20 | 21 | 292 22 | 137 23 | 372 24 | 174 25 | 26 | 27 | 28 | cow 29 | Left 30 | 0 31 | 0 32 | 33 | 116 34 | 191 35 | 245 36 | 287 37 | 38 | 39 | 40 | person 41 | Frontal 42 | 0 43 | 0 44 | 45 | 442 46 | 241 47 | 478 48 | 294 49 | 50 | 51 | 52 | cow 53 | Right 54 | 0 55 | 0 56 | 57 | 163 58 | 184 59 | 273 60 | 269 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /data/test_data/VOC2012/Annotations/2007_002845.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2012 3 | 2007_002845.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 9 | 10 | 375 11 | 500 12 | 3 13 | 14 | 1 15 | 16 | sheep 17 | Right 18 | 0 19 | 0 20 | 21 | 284 22 | 196 23 | 326 24 | 216 25 | 26 | 27 | 28 | sheep 29 | Right 30 | 0 31 | 0 32 | 33 | 14 34 | 265 35 | 175 36 | 347 37 | 38 | 39 | 40 | sheep 41 | Right 42 | 0 43 | 0 44 | 45 | 140 46 | 199 47 | 186 48 | 235 49 | 50 | 51 | 52 | sheep 53 | Frontal 54 | 0 55 | 0 56 | 57 | 220 58 | 210 59 | 248 60 | 229 61 | 62 | 63 | 64 | sheep 65 | Frontal 66 | 0 67 | 0 68 | 69 | 254 70 | 204 71 | 279 72 | 227 73 | 74 | 75 | 76 | sheep 77 | Unspecified 78 | 0 79 | 0 80 | 81 | 230 82 | 201 83 | 256 84 | 220 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /data/test_data/VOC2012/Annotations/2007_003207.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2012 3 | 2007_003207.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 9 | 10 | 500 11 | 333 12 | 3 13 | 14 | 1 15 | 16 | bottle 17 | Unspecified 18 | 0 19 | 0 20 | 21 | 124 22 | 6 23 | 162 24 | 102 25 | 26 | 27 | 28 | bottle 29 | Unspecified 30 | 1 31 | 0 32 | 33 | 94 34 | 36 35 | 132 36 | 91 37 | 38 | 39 | 40 | bottle 41 | Unspecified 42 | 1 43 | 0 44 | 45 | 71 46 | 57 47 | 108 48 | 166 49 | 50 | 51 | 52 | bottle 53 | Unspecified 54 | 0 55 | 0 56 | 57 | 100 58 | 58 59 | 150 60 | 188 61 | 62 | 63 | 64 | bottle 65 | Unspecified 66 | 1 67 | 0 68 | 69 | 13 70 | 71 71 | 61 72 | 184 73 | 74 | 75 | 76 | bottle 77 | Unspecified 78 | 0 79 | 0 80 | 81 | 152 82 | 115 83 | 203 84 | 237 85 | 86 | 87 | 88 | bottle 89 | Unspecified 90 | 0 91 | 0 92 | 93 | 216 94 | 102 95 | 261 96 | 222 97 | 98 | 99 | 100 | bottle 101 | Unspecified 102 | 0 103 | 0 104 | 105 | 259 106 | 134 107 | 307 108 | 260 109 | 110 | 111 | 112 | bottle 113 | Unspecified 114 | 0 115 | 0 116 | 117 | 308 118 | 126 119 | 356 120 | 251 121 | 122 | 123 | 124 | bottle 125 | Unspecified 126 | 0 127 | 0 128 | 129 | 350 130 | 139 131 | 408 132 | 264 133 | 134 | 135 | 136 | -------------------------------------------------------------------------------- /data/test_data/VOC2012/Annotations/2007_003565.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2012 3 | 2007_003565.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 9 | 10 | 500 11 | 375 12 | 3 13 | 14 | 1 15 | 16 | bird 17 | Rear 18 | 0 19 | 0 20 | 21 | 280 22 | 218 23 | 500 24 | 317 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /data/test_data/VOC2012/Annotations/2007_003778.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2012 3 | 2007_003778.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 9 | 10 | 500 11 | 500 12 | 3 13 | 14 | 1 15 | 16 | cat 17 | Frontal 18 | 0 19 | 0 20 | 21 | 38 22 | 67 23 | 443 24 | 466 25 | 26 | 27 | 28 | pottedplant 29 | Unspecified 30 | 1 31 | 0 32 | 33 | 55 34 | 75 35 | 221 36 | 256 37 | 38 | 39 | 40 | pottedplant 41 | Unspecified 42 | 1 43 | 0 44 | 45 | 380 46 | 94 47 | 496 48 | 270 49 | 50 | 51 | 52 | pottedplant 53 | Unspecified 54 | 0 55 | 0 56 | 57 | 432 58 | 54 59 | 500 60 | 289 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /data/test_data/VOC2012/Annotations/2007_003876.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2012 3 | 2007_003876.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 9 | 10 | 500 11 | 382 12 | 3 13 | 14 | 1 15 | 16 | aeroplane 17 | Unspecified 18 | 0 19 | 0 20 | 21 | 93 22 | 98 23 | 174 24 | 144 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /data/test_data/VOC2012/Annotations/2007_004166.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2012 3 | 2007_004166.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 9 | 10 | 500 11 | 332 12 | 3 13 | 14 | 1 15 | 16 | tvmonitor 17 | Unspecified 18 | 0 19 | 1 20 | 21 | 243 22 | 129 23 | 304 24 | 181 25 | 26 | 27 | 28 | chair 29 | Unspecified 30 | 1 31 | 1 32 | 33 | 485 34 | 189 35 | 500 36 | 222 37 | 38 | 39 | 40 | chair 41 | Unspecified 42 | 1 43 | 1 44 | 45 | 358 46 | 195 47 | 398 48 | 239 49 | 50 | 51 | 52 | chair 53 | Unspecified 54 | 1 55 | 1 56 | 57 | 300 58 | 205 59 | 355 60 | 254 61 | 62 | 63 | 64 | chair 65 | Unspecified 66 | 1 67 | 1 68 | 69 | 265 70 | 278 71 | 374 72 | 332 73 | 74 | 75 | 76 | sofa 77 | Unspecified 78 | 1 79 | 1 80 | 81 | 1 82 | 199 83 | 99 84 | 332 85 | 86 | 87 | 88 | diningtable 89 | Unspecified 90 | 1 91 | 0 92 | 93 | 234 94 | 204 95 | 500 96 | 332 97 | 98 | 99 | 100 | dog 101 | Unspecified 102 | 0 103 | 0 104 | 105 | 210 106 | 194 107 | 257 108 | 234 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /data/test_data/VOC2012/Annotations/2007_005273.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2012 3 | 2007_005273.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 9 | 10 | 500 11 | 375 12 | 3 13 | 14 | 1 15 | 16 | car 17 | Right 18 | 1 19 | 0 20 | 21 | 108 22 | 55 23 | 285 24 | 156 25 | 26 | 27 | 28 | car 29 | Right 30 | 1 31 | 0 32 | 33 | 235 34 | 84 35 | 418 36 | 153 37 | 38 | 39 | 40 | person 41 | Left 42 | 0 43 | 0 44 | 45 | 336 46 | 12 47 | 415 48 | 160 49 | 50 | 51 | 52 | person 53 | Unspecified 54 | 1 55 | 0 56 | 57 | 397 58 | 50 59 | 439 60 | 154 61 | 62 | 63 | 64 | bicycle 65 | Unspecified 66 | 1 67 | 0 68 | 69 | 1 70 | 112 71 | 305 72 | 357 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /data/test_data/VOC2012/Annotations/2007_005702.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2012 3 | 2007_005702.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 9 | 10 | 333 11 | 500 12 | 3 13 | 14 | 1 15 | 16 | bicycle 17 | Unspecified 18 | 0 19 | 0 20 | 21 | 38 22 | 227 23 | 292 24 | 500 25 | 26 | 27 | 28 | person 29 | Unspecified 30 | 0 31 | 0 32 | 33 | 76 34 | 63 35 | 327 36 | 465 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /data/test_data/VOC2012/Annotations/2007_006303.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2012 3 | 2007_006303.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 9 | 10 | 500 11 | 371 12 | 3 13 | 14 | 1 15 | 16 | cat 17 | Unspecified 18 | 0 19 | 0 20 | 21 | 181 22 | 136 23 | 302 24 | 320 25 | 26 | 27 | 28 | pottedplant 29 | Unspecified 30 | 1 31 | 0 32 | 33 | 354 34 | 3 35 | 500 36 | 371 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /data/test_data/VOC2012/Annotations/2007_006400.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2012 3 | 2007_006400.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 9 | 10 | 500 11 | 335 12 | 3 13 | 14 | 1 15 | 16 | train 17 | Frontal 18 | 1 19 | 0 20 | 21 | 433 22 | 80 23 | 500 24 | 256 25 | 26 | 27 | 28 | train 29 | Unspecified 30 | 1 31 | 0 32 | 33 | 189 34 | 101 35 | 440 36 | 235 37 | 38 | 39 | 40 | train 41 | Unspecified 42 | 1 43 | 0 44 | 45 | 40 46 | 125 47 | 199 48 | 203 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /data/test_data/VOC2012/Annotations/2007_006673.xml: -------------------------------------------------------------------------------- 1 | 2 | VOC2012 3 | 2007_006673.jpg 4 | 5 | The VOC2007 Database 6 | PASCAL VOC2007 7 | flickr 8 | 9 | 10 | 500 11 | 333 12 | 3 13 | 14 | 1 15 | 16 | boat 17 | Right 18 | 0 19 | 0 20 | 21 | 394 22 | 42 23 | 445 24 | 234 25 | 26 | 27 | 28 | boat 29 | Right 30 | 0 31 | 0 32 | 33 | 353 34 | 81 35 | 390 36 | 207 37 | 38 | 39 | 40 | boat 41 | Right 42 | 0 43 | 0 44 | 45 | 309 46 | 52 47 | 372 48 | 246 49 | 50 | 51 | 52 | boat 53 | Right 54 | 0 55 | 0 56 | 57 | 294 58 | 110 59 | 324 60 | 196 61 | 62 | 63 | 64 | boat 65 | Right 66 | 0 67 | 0 68 | 69 | 242 70 | 70 71 | 275 72 | 212 73 | 74 | 75 | 76 | boat 77 | Right 78 | 0 79 | 0 80 | 81 | 230 82 | 117 83 | 245 84 | 189 85 | 86 | 87 | 88 | boat 89 | Right 90 | 0 91 | 0 92 | 93 | 214 94 | 112 95 | 239 96 | 201 97 | 98 | 99 | 100 | boat 101 | Right 102 | 0 103 | 0 104 | 105 | 187 106 | 114 107 | 210 108 | 195 109 | 110 | 111 | 112 | boat 113 | Right 114 | 0 115 | 0 116 | 117 | 171 118 | 107 119 | 206 120 | 223 121 | 122 | 123 | 124 | boat 125 | Right 126 | 0 127 | 0 128 | 129 | 137 130 | 105 131 | 171 132 | 213 133 | 134 | 135 | 136 | boat 137 | Unspecified 138 | 0 139 | 1 140 | 141 | 43 142 | 224 143 | 71 144 | 234 145 | 146 | 147 | 148 | boat 149 | Unspecified 150 | 0 151 | 1 152 | 153 | 4 154 | 231 155 | 29 156 | 244 157 | 158 | 159 | 160 | -------------------------------------------------------------------------------- /data/test_data/VOC2012/JPEGImages/2007_000039.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_000039.jpg -------------------------------------------------------------------------------- /data/test_data/VOC2012/JPEGImages/2007_000063.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_000063.jpg -------------------------------------------------------------------------------- /data/test_data/VOC2012/JPEGImages/2007_000648.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_000648.jpg -------------------------------------------------------------------------------- /data/test_data/VOC2012/JPEGImages/2007_001420.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_001420.jpg -------------------------------------------------------------------------------- /data/test_data/VOC2012/JPEGImages/2007_001709.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_001709.jpg -------------------------------------------------------------------------------- /data/test_data/VOC2012/JPEGImages/2007_001901.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_001901.jpg -------------------------------------------------------------------------------- /data/test_data/VOC2012/JPEGImages/2007_002216.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_002216.jpg -------------------------------------------------------------------------------- /data/test_data/VOC2012/JPEGImages/2007_002668.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_002668.jpg -------------------------------------------------------------------------------- /data/test_data/VOC2012/JPEGImages/2007_002669.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_002669.jpg -------------------------------------------------------------------------------- /data/test_data/VOC2012/JPEGImages/2007_002845.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_002845.jpg -------------------------------------------------------------------------------- /data/test_data/VOC2012/JPEGImages/2007_003207.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_003207.jpg -------------------------------------------------------------------------------- /data/test_data/VOC2012/JPEGImages/2007_003565.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_003565.jpg -------------------------------------------------------------------------------- /data/test_data/VOC2012/JPEGImages/2007_003778.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_003778.jpg -------------------------------------------------------------------------------- /data/test_data/VOC2012/JPEGImages/2007_003876.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_003876.jpg -------------------------------------------------------------------------------- /data/test_data/VOC2012/JPEGImages/2007_004166.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_004166.jpg -------------------------------------------------------------------------------- /data/test_data/VOC2012/JPEGImages/2007_005273.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_005273.jpg -------------------------------------------------------------------------------- /data/test_data/VOC2012/JPEGImages/2007_005702.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_005702.jpg -------------------------------------------------------------------------------- /data/test_data/VOC2012/JPEGImages/2007_006303.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_006303.jpg -------------------------------------------------------------------------------- /data/test_data/VOC2012/JPEGImages/2007_006400.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_006400.jpg -------------------------------------------------------------------------------- /data/test_data/VOC2012/JPEGImages/2007_006673.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_006673.jpg -------------------------------------------------------------------------------- /data/test_data/VOC2012/SegmentationObject/2007_000039.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_000039.png -------------------------------------------------------------------------------- /data/test_data/VOC2012/SegmentationObject/2007_000063.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_000063.png -------------------------------------------------------------------------------- /data/test_data/VOC2012/SegmentationObject/2007_000648.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_000648.png -------------------------------------------------------------------------------- /data/test_data/VOC2012/SegmentationObject/2007_001420.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_001420.png -------------------------------------------------------------------------------- /data/test_data/VOC2012/SegmentationObject/2007_001709.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_001709.png -------------------------------------------------------------------------------- /data/test_data/VOC2012/SegmentationObject/2007_001901.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_001901.png -------------------------------------------------------------------------------- /data/test_data/VOC2012/SegmentationObject/2007_002216.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_002216.png -------------------------------------------------------------------------------- /data/test_data/VOC2012/SegmentationObject/2007_002668.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_002668.png -------------------------------------------------------------------------------- /data/test_data/VOC2012/SegmentationObject/2007_002669.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_002669.png -------------------------------------------------------------------------------- /data/test_data/VOC2012/SegmentationObject/2007_002845.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_002845.png -------------------------------------------------------------------------------- /data/test_data/VOC2012/SegmentationObject/2007_003207.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_003207.png -------------------------------------------------------------------------------- /data/test_data/VOC2012/SegmentationObject/2007_003565.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_003565.png -------------------------------------------------------------------------------- /data/test_data/VOC2012/SegmentationObject/2007_003778.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_003778.png -------------------------------------------------------------------------------- /data/test_data/VOC2012/SegmentationObject/2007_003876.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_003876.png -------------------------------------------------------------------------------- /data/test_data/VOC2012/SegmentationObject/2007_004166.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_004166.png -------------------------------------------------------------------------------- /data/test_data/VOC2012/SegmentationObject/2007_005273.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_005273.png -------------------------------------------------------------------------------- /data/test_data/VOC2012/SegmentationObject/2007_005702.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_005702.png -------------------------------------------------------------------------------- /data/test_data/VOC2012/SegmentationObject/2007_006303.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_006303.png -------------------------------------------------------------------------------- /data/test_data/VOC2012/SegmentationObject/2007_006400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_006400.png -------------------------------------------------------------------------------- /data/test_data/VOC2012/SegmentationObject/2007_006673.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_006673.png -------------------------------------------------------------------------------- /data/test_data/background/bg_template/forest/A real photo of forest/589.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/forest/A real photo of forest/589.png -------------------------------------------------------------------------------- /data/test_data/background/bg_template/forest/A real photo of forest/590.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/forest/A real photo of forest/590.png -------------------------------------------------------------------------------- /data/test_data/background/bg_template/forest/A real photo of forest/591.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/forest/A real photo of forest/591.png -------------------------------------------------------------------------------- /data/test_data/background/bg_template/forest/A real photo of forest/593.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/forest/A real photo of forest/593.png -------------------------------------------------------------------------------- /data/test_data/background/bg_template/forest/A real photo of forest/598.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/forest/A real photo of forest/598.png -------------------------------------------------------------------------------- /data/test_data/background/bg_template/railway without train/A real photo of railway without train/367.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/railway without train/A real photo of railway without train/367.png -------------------------------------------------------------------------------- /data/test_data/background/bg_template/railway without train/A real photo of railway without train/373.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/railway without train/A real photo of railway without train/373.png -------------------------------------------------------------------------------- /data/test_data/background/bg_template/railway without train/A real photo of railway without train/429.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/railway without train/A real photo of railway without train/429.png -------------------------------------------------------------------------------- /data/test_data/background/bg_template/railway without train/A real photo of railway without train/475.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/railway without train/A real photo of railway without train/475.png -------------------------------------------------------------------------------- /data/test_data/background/context_augment/2007_000504.jpg/a colored photo of an empty pile of trash on the ground/49.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_000504.jpg/a colored photo of an empty pile of trash on the ground/49.png -------------------------------------------------------------------------------- /data/test_data/background/context_augment/2007_001609.jpg/a colored photo of an empty living room/69.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_001609.jpg/a colored photo of an empty living room/69.png -------------------------------------------------------------------------------- /data/test_data/background/context_augment/2007_001764.jpg/a real image of an empty grass covered field/71.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_001764.jpg/a real image of an empty grass covered field/71.png -------------------------------------------------------------------------------- /data/test_data/background/context_augment/2007_002227.jpg/a real image of an empty wall in a living room/45.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_002227.jpg/a real image of an empty wall in a living room/45.png -------------------------------------------------------------------------------- /data/test_data/background/context_augment/2007_002281.jpg/a colored photo of an empty street next to a forest/79.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_002281.jpg/a colored photo of an empty street next to a forest/79.png -------------------------------------------------------------------------------- /data/test_data/background/context_augment/2007_002967.jpg/a real image of an empty doorstep/58.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_002967.jpg/a real image of an empty doorstep/58.png -------------------------------------------------------------------------------- /data/test_data/background/context_augment/2007_003451.jpg/a real image of an empty living room filled with furniture and a large window/40.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_003451.jpg/a real image of an empty living room filled with furniture and a large window/40.png -------------------------------------------------------------------------------- /data/test_data/background/context_augment/2007_005124.jpg/a real image of an empty grass field/58.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_005124.jpg/a real image of an empty grass field/58.png -------------------------------------------------------------------------------- /data/test_data/background/context_augment/2007_006136.jpg/a real image of an empty grass near a forest/33.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_006136.jpg/a real image of an empty grass near a forest/33.png -------------------------------------------------------------------------------- /data/test_data/background/context_augment/2007_007585.jpg/a real image of an empty grass field/67.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_007585.jpg/a real image of an empty grass field/67.png -------------------------------------------------------------------------------- /data/test_data/background/context_augment/clip_postprocessed.json: -------------------------------------------------------------------------------- 1 | { 2 | "2007_003451": { 3 | "a real image of an empty living room filled with furniture and a large window": [ 4 | "40.png" 5 | ] 6 | }, 7 | "2007_002227": { 8 | "a real image of an empty wall in a living room": [ 9 | "45.png" 10 | ] 11 | }, 12 | "2007_006136": { 13 | "a real image of an empty grass near a forest": [ 14 | "33.png" 15 | ] 16 | }, 17 | "2007_002281": { 18 | "a colored photo of an empty street next to a forest": [ 19 | "79.png" 20 | ] 21 | }, 22 | "2007_000504": { 23 | "a colored photo of an empty pile of trash on the ground": [ 24 | "49.png" 25 | ] 26 | }, 27 | "2007_007585": { 28 | "a real image of an empty grass field": [ 29 | "67.png" 30 | ] 31 | }, 32 | "2007_001764": { 33 | "a real image of an empty grass covered field": [ 34 | "71.png" 35 | ] 36 | }, 37 | "2007_002967": { 38 | "a real image of an empty doorstep": [ 39 | "58.png" 40 | ] 41 | }, 42 | "2007_001609": { 43 | "a colored photo of an empty living room": [ 44 | "69.png" 45 | ] 46 | }, 47 | "2007_005124": { 48 | "a real image of an empty grass field": [ 49 | "58.png" 50 | ] 51 | } 52 | } -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/aeroplane_mask/The picture of an airplane on a pure background422.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/aeroplane_mask/The picture of an airplane on a pure background422.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/aeroplane_mask/an airplane in a pure background449.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/aeroplane_mask/an airplane in a pure background449.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/aeroplane_mask/an airplane106.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/aeroplane_mask/an airplane106.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/bicycle_mask/a bicycle in a pure background122.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bicycle_mask/a bicycle in a pure background122.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/bicycle_mask/a bicycle in a pure background128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bicycle_mask/a bicycle in a pure background128.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/bicycle_mask/a bicycle in a pure background225.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bicycle_mask/a bicycle in a pure background225.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/bird_mask/a bird in a pure background162.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bird_mask/a bird in a pure background162.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/bird_mask/a bird in a pure background293.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bird_mask/a bird in a pure background293.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/bird_mask/a bird in a pure background374.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bird_mask/a bird in a pure background374.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/boat_mask/The picture of a boat on a pure background39.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/boat_mask/The picture of a boat on a pure background39.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/boat_mask/a boat in a pure background371.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/boat_mask/a boat in a pure background371.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/boat_mask/a boat79.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/boat_mask/a boat79.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/bottle_mask/The picture of a bottle on a pure background407.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bottle_mask/The picture of a bottle on a pure background407.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/bottle_mask/a bottle in a pure background108.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bottle_mask/a bottle in a pure background108.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/bottle_mask/a bottle89.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bottle_mask/a bottle89.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/bus_mask/The picture of a bus on a pure background460.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bus_mask/The picture of a bus on a pure background460.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/bus_mask/a bus105.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bus_mask/a bus105.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/bus_mask/a bus410.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bus_mask/a bus410.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/car_mask/The picture of a car on a pure background100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/car_mask/The picture of a car on a pure background100.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/car_mask/The picture of a car on a pure background286.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/car_mask/The picture of a car on a pure background286.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/car_mask/a car97.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/car_mask/a car97.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/cat_mask/a cat245.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/cat_mask/a cat245.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/cat_mask/a cat58.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/cat_mask/a cat58.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/cat_mask/a cat62.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/cat_mask/a cat62.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/chair_mask/The picture of a chair on a pure background423.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/chair_mask/The picture of a chair on a pure background423.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/chair_mask/a chair in a pure background301.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/chair_mask/a chair in a pure background301.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/chair_mask/a chair402.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/chair_mask/a chair402.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/cow_mask/a cow in a pure background9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/cow_mask/a cow in a pure background9.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/cow_mask/a cow223.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/cow_mask/a cow223.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/cow_mask/a cow56.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/cow_mask/a cow56.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/diningtable_mask/a dining table in a pure background63.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/diningtable_mask/a dining table in a pure background63.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/diningtable_mask/a table123.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/diningtable_mask/a table123.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/diningtable_mask/a table300.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/diningtable_mask/a table300.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/dog_mask/The picture of a dog on a pure background236.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/dog_mask/The picture of a dog on a pure background236.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/dog_mask/a dog in a pure background487.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/dog_mask/a dog in a pure background487.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/dog_mask/a dog121.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/dog_mask/a dog121.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/horse_mask/The picture of a horse on a pure background469.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/horse_mask/The picture of a horse on a pure background469.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/horse_mask/a horse in a pure background293.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/horse_mask/a horse in a pure background293.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/horse_mask/a horse298.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/horse_mask/a horse298.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/motorbike_mask/a motorbike in a pure background367.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/motorbike_mask/a motorbike in a pure background367.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/motorbike_mask/a motorbike in a pure background421.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/motorbike_mask/a motorbike in a pure background421.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/motorbike_mask/a motorbike315.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/motorbike_mask/a motorbike315.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/person_mask/a man146.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/person_mask/a man146.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/person_mask/a man253.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/person_mask/a man253.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/person_mask/a man345.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/person_mask/a man345.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/pottedplant_mask/a potted plant11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/pottedplant_mask/a potted plant11.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/pottedplant_mask/a potted plant374.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/pottedplant_mask/a potted plant374.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/pottedplant_mask/a potted plant434.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/pottedplant_mask/a potted plant434.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/sheep_mask/The picture of a sheep on a pure background212.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/sheep_mask/The picture of a sheep on a pure background212.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/sheep_mask/a sheep in a pure background219.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/sheep_mask/a sheep in a pure background219.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/sheep_mask/a sheep351.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/sheep_mask/a sheep351.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/sofa_mask/a sofa in a pure background122.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/sofa_mask/a sofa in a pure background122.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/sofa_mask/a sofa in a pure background353.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/sofa_mask/a sofa in a pure background353.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/sofa_mask/a sofa in a pure background395.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/sofa_mask/a sofa in a pure background395.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/train_mask/The picture of a train on a pure background303.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/train_mask/The picture of a train on a pure background303.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/train_mask/The picture of a train on a pure background32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/train_mask/The picture of a train on a pure background32.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/train_mask/a train in a pure background133.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/train_mask/a train in a pure background133.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/tvmonitor_mask/a tv monitor251.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/tvmonitor_mask/a tv monitor251.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/tvmonitor_mask/an old monitor107.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/tvmonitor_mask/an old monitor107.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_mask/tvmonitor_mask/an old monitor426.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/tvmonitor_mask/an old monitor426.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a bicycle/a bicycle in a pure background/122.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bicycle/a bicycle in a pure background/122.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a bicycle/a bicycle in a pure background/128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bicycle/a bicycle in a pure background/128.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a bicycle/a bicycle in a pure background/225.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bicycle/a bicycle in a pure background/225.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a bird/a bird in a pure background/162.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bird/a bird in a pure background/162.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a bird/a bird in a pure background/293.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bird/a bird in a pure background/293.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a bird/a bird in a pure background/374.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bird/a bird in a pure background/374.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a boat/The picture of a boat on a pure background/39.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a boat/The picture of a boat on a pure background/39.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a boat/a boat in a pure background/371.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a boat/a boat in a pure background/371.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a boat/a boat/79.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a boat/a boat/79.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a bottle/The picture of a bottle on a pure background/407.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bottle/The picture of a bottle on a pure background/407.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a bottle/a bottle in a pure background/108.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bottle/a bottle in a pure background/108.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a bottle/a bottle/89.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bottle/a bottle/89.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a bus/The picture of a bus on a pure background/460.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bus/The picture of a bus on a pure background/460.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a bus/a bus/105.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bus/a bus/105.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a bus/a bus/410.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bus/a bus/410.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a car/The picture of a car on a pure background/100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a car/The picture of a car on a pure background/100.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a car/The picture of a car on a pure background/286.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a car/The picture of a car on a pure background/286.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a car/a car/97.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a car/a car/97.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a cat/a cat/245.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a cat/a cat/245.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a cat/a cat/58.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a cat/a cat/58.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a cat/a cat/62.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a cat/a cat/62.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a chair/The picture of a chair on a pure background/423.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a chair/The picture of a chair on a pure background/423.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a chair/a chair in a pure background/301.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a chair/a chair in a pure background/301.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a chair/a chair/402.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a chair/a chair/402.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a cow/a cow in a pure background/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a cow/a cow in a pure background/9.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a cow/a cow/223.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a cow/a cow/223.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a cow/a cow/56.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a cow/a cow/56.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a dining table/a dining table in a pure background/63.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a dining table/a dining table in a pure background/63.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a dog/The picture of a dog on a pure background/236.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a dog/The picture of a dog on a pure background/236.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a dog/a dog in a pure background/487.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a dog/a dog in a pure background/487.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a dog/a dog/121.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a dog/a dog/121.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a horse/The picture of a horse on a pure background/469.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a horse/The picture of a horse on a pure background/469.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a horse/a horse in a pure background/293.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a horse/a horse in a pure background/293.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a horse/a horse/298.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a horse/a horse/298.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a man/a man/146.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a man/a man/146.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a man/a man/253.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a man/a man/253.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a man/a man/345.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a man/a man/345.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a motorbike/a motorbike in a pure background/367.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a motorbike/a motorbike in a pure background/367.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a motorbike/a motorbike in a pure background/421.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a motorbike/a motorbike in a pure background/421.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a motorbike/a motorbike/315.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a motorbike/a motorbike/315.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a potted plant/a potted plant/11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a potted plant/a potted plant/11.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a potted plant/a potted plant/374.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a potted plant/a potted plant/374.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a potted plant/a potted plant/434.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a potted plant/a potted plant/434.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a sheep/The picture of a sheep on a pure background/212.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a sheep/The picture of a sheep on a pure background/212.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a sheep/a sheep in a pure background/219.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a sheep/a sheep in a pure background/219.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a sheep/a sheep/351.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a sheep/a sheep/351.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a sofa/a sofa in a pure background/122.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a sofa/a sofa in a pure background/122.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a sofa/a sofa in a pure background/353.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a sofa/a sofa in a pure background/353.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a sofa/a sofa in a pure background/395.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a sofa/a sofa in a pure background/395.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a table/a table/123.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a table/a table/123.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a table/a table/300.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a table/a table/300.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a train/The picture of a train on a pure background/303.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a train/The picture of a train on a pure background/303.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a train/The picture of a train on a pure background/32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a train/The picture of a train on a pure background/32.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a train/a train in a pure background/133.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a train/a train in a pure background/133.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/a tv monitor/a tv monitor/251.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a tv monitor/a tv monitor/251.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/an airplane/The picture of an airplane on a pure background/422.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/an airplane/The picture of an airplane on a pure background/422.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/an airplane/an airplane in a pure background/449.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/an airplane/an airplane in a pure background/449.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/an airplane/an airplane/106.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/an airplane/an airplane/106.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/an old monitor/an old monitor/107.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/an old monitor/an old monitor/107.png -------------------------------------------------------------------------------- /data/test_data/foreground/foreground_rgb/an old monitor/an old monitor/426.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/an old monitor/an old monitor/426.png -------------------------------------------------------------------------------- /data/test_data/labels.txt: -------------------------------------------------------------------------------- 1 | 0 __background__ 2 | 1 aeroplane 3 | 2 bicycle 4 | 3 bird 5 | 4 boat 6 | 5 bottle 7 | 6 bus 8 | 7 car 9 | 8 cat 10 | 9 chair 11 | 10 cow 12 | 11 diningtable 13 | 12 dog 14 | 13 horse 15 | 14 motorbike 16 | 15 person 17 | 16 pottedplant 18 | 17 sheep 19 | 18 sofa 20 | 19 train 21 | 20 tvmonitor -------------------------------------------------------------------------------- /data/test_data/train_cls-1shot.txt: -------------------------------------------------------------------------------- 1 | 2007_000039 19 2 | 2007_000063 8 11 3 | 2007_000648 0 5 6 14 4 | 2007_001420 12 14 15 5 | 2007_001709 13 14 6 | 2007_001901 8 10 17 7 | 2007_002216 5 8 | 2007_002668 10 14 15 9 | 2007_002669 9 14 10 | 2007_002845 16 11 | 2007_003207 4 12 | 2007_003565 2 13 | 2007_003778 7 15 14 | 2007_003876 0 15 | 2007_004166 8 10 11 17 19 16 | 2007_005273 1 6 14 17 | 2007_005702 1 14 18 | 2007_006303 7 15 19 | 2007_006400 18 20 | 2007_006673 3 21 | -------------------------------------------------------------------------------- /data/voc2012/1k_10_shot.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": [ 3 | "2007_000480.jpg", 4 | "2007_002198.jpg", 5 | "2007_004009.jpg", 6 | "2007_000648.jpg", 7 | "2007_000738.jpg", 8 | "2007_003000.jpg", 9 | "2007_000032.jpg", 10 | "2007_002107.jpg", 11 | "2007_003876.jpg", 12 | "2007_002099.jpg", 13 | "2007_000256.jpg", 14 | "2007_000243.jpg" 15 | ], 16 | "1": [ 17 | "2007_004769.jpg", 18 | "2007_002227.jpg", 19 | "2007_000584.jpg", 20 | "2007_006317.jpg", 21 | "2007_000793.jpg", 22 | "2007_005273.jpg", 23 | "2007_001027.jpg", 24 | "2007_005430.jpg", 25 | "2007_000515.jpg", 26 | "2007_005368.jpg", 27 | "2007_005064.jpg", 28 | "2007_005702.jpg" 29 | ], 30 | "2": [ 31 | "2007_009607.jpg", 32 | "2007_003565.jpg", 33 | "2007_002212.jpg", 34 | "2007_009759.jpg", 35 | "2007_002403.jpg", 36 | "2007_003330.jpg", 37 | "2007_002896.jpg", 38 | "2007_003267.jpg", 39 | "2007_006490.jpg", 40 | "2007_003118.jpg", 41 | "2007_000645.jpg", 42 | "2007_000363.jpg", 43 | "2007_000068.jpg", 44 | "2007_002120.jpg" 45 | ], 46 | "3": [ 47 | "2007_006281.jpg", 48 | "2007_000713.jpg", 49 | "2007_000241.jpg", 50 | "2007_006660.jpg", 51 | "2007_006673.jpg", 52 | "2007_003910.jpg", 53 | "2007_002234.jpg", 54 | "2007_001487.jpg", 55 | "2007_001698.jpg" 56 | ], 57 | "4": [ 58 | "2007_006409.jpg", 59 | "2007_004476.jpg", 60 | "2007_003207.jpg", 61 | "2007_000170.jpg", 62 | "2007_006483.jpg", 63 | "2007_002545.jpg", 64 | "2007_003451.jpg", 65 | "2007_004291.jpg", 66 | "2007_001185.jpg", 67 | "2007_002953.jpg", 68 | "2007_003431.jpg", 69 | "2007_007250.jpg", 70 | "2007_001602.jpg", 71 | "2007_000250.jpg" 72 | ], 73 | "5": [ 74 | "2007_004705.jpg", 75 | "2007_004065.jpg", 76 | "2007_001595.jpg", 77 | "2007_007003.jpg", 78 | "2007_000768.jpg", 79 | "2007_003715.jpg", 80 | "2007_002024.jpg", 81 | "2007_002216.jpg", 82 | "2007_005262.jpg" 83 | ], 84 | "6": [ 85 | "2007_002281.jpg", 86 | "2007_004481.jpg", 87 | "2007_004810.jpg", 88 | "2007_005988.jpg", 89 | "2007_002370.jpg", 90 | "2011_001004.jpg", 91 | "2007_003815.jpg", 92 | "2007_002789.jpg", 93 | "2007_006151.jpg", 94 | "2007_004830.jpg", 95 | "2007_001857.jpg" 96 | ], 97 | "7": [ 98 | "2007_005688.jpg", 99 | "2007_001825.jpg", 100 | "2007_002760.jpg", 101 | "2007_000549.jpg", 102 | "2007_003778.jpg", 103 | "2007_000528.jpg", 104 | "2007_003788.jpg", 105 | "2007_006303.jpg", 106 | "2011_000999.jpg", 107 | "2007_004998.jpg", 108 | "2007_000876.jpg", 109 | "2007_003525.jpg" 110 | ], 111 | "8": [ 112 | "2007_006004.jpg", 113 | "2007_005212.jpg", 114 | "2007_003541.jpg", 115 | "2007_006477.jpg", 116 | "2007_004166.jpg", 117 | "2007_005647.jpg", 118 | "2007_003251.jpg", 119 | "2007_001609.jpg", 120 | "2007_006530.jpg", 121 | "2007_001901.jpg", 122 | "2007_001340.jpg", 123 | "2007_005266.jpg", 124 | "2007_000063.jpg", 125 | "2007_003205.jpg", 126 | "2007_003889.jpg", 127 | "2007_006066.jpg", 128 | "2007_002368.jpg", 129 | "2007_005086.jpg", 130 | "2007_001439.jpg" 131 | ], 132 | "9": [ 133 | "2007_002669.jpg", 134 | "2007_000904.jpg", 135 | "2007_001764.jpg", 136 | "2007_000504.jpg", 137 | "2007_001917.jpg", 138 | "2007_004537.jpg", 139 | "2007_004081.jpg", 140 | "2007_004500.jpg", 141 | "2007_002088.jpg", 142 | "2007_001073.jpg", 143 | "2007_005797.jpg", 144 | "2007_005124.jpg" 145 | ], 146 | "10": [ 147 | "2007_002914.jpg", 148 | "2007_005790.jpg", 149 | "2007_003529.jpg", 150 | "2007_006699.jpg", 151 | "2007_002668.jpg", 152 | "2007_003668.jpg", 153 | "2007_001834.jpg" 154 | ], 155 | "11": [ 156 | "2007_002611.jpg", 157 | "2007_000720.jpg", 158 | "2007_003604.jpg", 159 | "2007_001397.jpg", 160 | "2007_002055.jpg", 161 | "2007_001225.jpg", 162 | "2007_009605.jpg", 163 | "2007_009327.jpg", 164 | "2007_007585.jpg", 165 | "2007_007930.jpg" 166 | ], 167 | "12": [ 168 | "2007_001724.jpg", 169 | "2007_002273.jpg", 170 | "2007_006445.jpg", 171 | "2007_000392.jpg", 172 | "2007_003189.jpg", 173 | "2007_005248.jpg", 174 | "2007_000836.jpg", 175 | "2007_001960.jpg", 176 | "2007_001420.jpg", 177 | "2007_006134.jpg" 178 | ], 179 | "13": [ 180 | "2007_002488.jpg", 181 | "2007_005989.jpg", 182 | "2007_000822.jpg", 183 | "2007_005951.jpg", 184 | "2007_004003.jpg", 185 | "2007_005314.jpg", 186 | "2007_000733.jpg", 187 | "2007_001709.jpg", 188 | "2007_000364.jpg", 189 | "2007_005878.jpg", 190 | "2007_002105.jpg" 191 | ], 192 | "14": [ 193 | "2007_002895.jpg", 194 | "2007_002639.jpg", 195 | "2007_002361.jpg", 196 | "2007_002954.jpg", 197 | "2007_004289.jpg", 198 | "2007_004707.jpg", 199 | "2007_002293.jpg", 200 | "2007_002142.jpg" 201 | ], 202 | "15": [ 203 | "2007_004948.jpg", 204 | "2007_002967.jpg", 205 | "2007_001149.jpg" 206 | ], 207 | "16": [ 208 | "2007_001872.jpg", 209 | "2007_003190.jpg", 210 | "2007_001416.jpg", 211 | "2007_006136.jpg", 212 | "2007_002845.jpg", 213 | "2007_004423.jpg", 214 | "2007_003593.jpg", 215 | "2007_004768.jpg", 216 | "2007_006832.jpg", 217 | "2007_006899.jpg" 218 | ], 219 | "17": [ 220 | "2007_008203.jpg" 221 | ], 222 | "18": [ 223 | "2007_004627.jpg", 224 | "2007_004663.jpg", 225 | "2007_003286.jpg", 226 | "2007_006254.jpg", 227 | "2007_006400.jpg", 228 | "2007_005360.jpg", 229 | "2007_002462.jpg", 230 | "2007_003178.jpg", 231 | "2007_004951.jpg", 232 | "2007_000333.jpg" 233 | ], 234 | "19": [ 235 | "2007_000121.jpg", 236 | "2007_006704.jpg", 237 | "2007_005210.jpg", 238 | "2007_000039.jpg", 239 | "2007_005902.jpg", 240 | "2007_001704.jpg" 241 | ] 242 | } -------------------------------------------------------------------------------- /data/voc2012/1k_1_shot.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": [ 3 | "2007_003876.jpg" 4 | ], 5 | "1": [ 6 | "2007_005702.jpg" 7 | ], 8 | "2": [ 9 | "2007_003565.jpg" 10 | ], 11 | "3": [ 12 | "2007_006673.jpg" 13 | ], 14 | "4": [ 15 | "2007_003207.jpg" 16 | ], 17 | "5": [ 18 | "2007_002216.jpg" 19 | ], 20 | "6": [ 21 | "2007_005273.jpg" 22 | ], 23 | "7": [ 24 | "2007_003778.jpg" 25 | ], 26 | "8": [ 27 | "2007_000063.jpg" 28 | ], 29 | "9": [ 30 | "2007_002669.jpg" 31 | ], 32 | "10": [ 33 | "2007_002668.jpg" 34 | ], 35 | "11": [ 36 | "2007_004166.jpg" 37 | ], 38 | "12": [ 39 | "2007_001420.jpg" 40 | ], 41 | "13": [ 42 | "2007_001709.jpg" 43 | ], 44 | "14": [ 45 | "2007_000648.jpg" 46 | ], 47 | "15": [ 48 | "2007_006303.jpg" 49 | ], 50 | "16": [ 51 | "2007_002845.jpg" 52 | ], 53 | "17": [ 54 | "2007_001901.jpg" 55 | ], 56 | "18": [ 57 | "2007_006400.jpg" 58 | ], 59 | "19": [ 60 | "2007_000039.jpg" 61 | ] 62 | } 63 | -------------------------------------------------------------------------------- /data/voc2012/label2id.json: -------------------------------------------------------------------------------- 1 | { 2 | "aeroplane": 1, 3 | "bicycle": 2, 4 | "bird": 3, 5 | "boat": 4, 6 | "bottle": 5, 7 | "bus": 6, 8 | "car": 7, 9 | "cat": 8, 10 | "chair": 9, 11 | "cow": 10, 12 | "diningtable": 11, 13 | "dog": 12, 14 | "horse": 13, 15 | "motorbike": 14, 16 | "person": 15, 17 | "pottedplant": 16, 18 | "sheep": 17, 19 | "sofa": 18, 20 | "train": 19, 21 | "tvmonitor": 20 22 | } -------------------------------------------------------------------------------- /data/voc2012/labels.txt: -------------------------------------------------------------------------------- 1 | 0 __background__ 2 | 1 aeroplane 3 | 2 bicycle 4 | 3 bird 5 | 4 boat 6 | 5 bottle 7 | 6 bus 8 | 7 car 9 | 8 cat 10 | 9 chair 11 | 10 cow 12 | 11 diningtable 13 | 12 dog 14 | 13 horse 15 | 14 motorbike 16 | 15 person 17 | 16 pottedplant 18 | 17 sheep 19 | 18 sofa 20 | 19 train 21 | 20 tvmonitor -------------------------------------------------------------------------------- /data/voc2012/train_cls-10shot.txt: -------------------------------------------------------------------------------- 1 | 2007_000032 0 14 2 | 2007_000039 19 3 | 2007_000063 8 11 4 | 2007_000068 2 5 | 2007_000121 19 6 | 2007_000170 4 14 7 | 2007_000241 3 8 | 2007_000243 0 9 | 2007_000250 4 10 10 | 2007_000256 0 11 | 2007_000333 18 12 | 2007_000363 2 13 | 2007_000364 13 14 14 | 2007_000392 12 14 15 | 2007_000480 0 14 16 | 2007_000504 9 14 17 | 2007_000515 1 6 14 18 | 2007_000528 7 19 | 2007_000549 7 20 | 2007_000584 1 17 21 | 2007_000645 2 22 | 2007_000648 0 5 6 14 23 | 2007_000713 3 24 | 2007_000720 11 25 | 2007_000733 13 14 26 | 2007_000738 0 27 | 2007_000768 5 28 | 2007_000793 1 5 14 29 | 2007_000822 13 30 | 2007_000836 12 14 31 | 2007_000876 7 32 | 2007_000904 9 12 14 33 | 2007_001027 1 8 17 19 34 | 2007_001073 9 35 | 2007_001149 15 17 19 36 | 2007_001185 4 7 10 14 37 | 2007_001225 11 38 | 2007_001340 8 11 14 39 | 2007_001397 11 40 | 2007_001416 16 41 | 2007_001420 12 14 15 42 | 2007_001439 8 10 43 | 2007_001487 3 44 | 2007_001595 5 45 | 2007_001602 4 46 | 2007_001609 8 10 47 | 2007_001698 3 48 | 2007_001704 19 49 | 2007_001709 13 14 50 | 2007_001724 12 51 | 2007_001764 9 52 | 2007_001825 7 11 53 | 2007_001834 10 54 | 2007_001857 6 14 55 | 2007_001872 16 56 | 2007_001901 8 10 17 57 | 2007_001917 9 58 | 2007_001960 12 59 | 2007_002024 5 14 60 | 2007_002055 11 14 17 61 | 2007_002088 9 62 | 2007_002099 0 63 | 2007_002105 13 14 64 | 2007_002107 0 65 | 2007_002120 2 14 66 | 2007_002142 14 67 | 2007_002198 0 68 | 2007_002212 2 69 | 2007_002216 5 70 | 2007_002227 1 19 71 | 2007_002234 3 72 | 2007_002273 12 14 73 | 2007_002281 6 14 74 | 2007_002293 14 75 | 2007_002361 14 15 76 | 2007_002368 8 10 17 77 | 2007_002370 6 14 78 | 2007_002403 2 3 14 79 | 2007_002462 18 80 | 2007_002488 13 14 81 | 2007_002545 4 14 17 82 | 2007_002611 11 14 83 | 2007_002639 14 84 | 2007_002668 10 14 15 85 | 2007_002669 9 14 86 | 2007_002760 7 87 | 2007_002789 6 9 14 88 | 2007_002845 16 89 | 2007_002895 14 90 | 2007_002896 2 91 | 2007_002914 10 14 92 | 2007_002953 4 19 93 | 2007_002954 14 94 | 2007_002967 15 95 | 2007_003000 0 96 | 2007_003118 2 14 97 | 2007_003178 18 98 | 2007_003189 12 14 15 99 | 2007_003190 16 100 | 2007_003205 8 14 101 | 2007_003207 4 102 | 2007_003251 8 10 103 | 2007_003267 2 104 | 2007_003286 18 105 | 2007_003330 2 106 | 2007_003431 4 14 107 | 2007_003451 4 8 17 19 108 | 2007_003525 7 109 | 2007_003529 10 14 110 | 2007_003541 8 14 111 | 2007_003565 2 112 | 2007_003593 16 113 | 2007_003604 11 19 114 | 2007_003668 10 115 | 2007_003715 5 116 | 2007_003778 7 15 117 | 2007_003788 7 8 118 | 2007_003815 6 14 119 | 2007_003876 0 120 | 2007_003889 8 12 14 121 | 2007_003910 3 14 122 | 2007_004003 13 123 | 2007_004009 0 124 | 2007_004065 5 14 125 | 2007_004081 9 15 126 | 2007_004166 8 10 11 17 19 127 | 2007_004289 14 19 128 | 2007_004291 4 14 129 | 2007_004423 16 130 | 2007_004476 4 14 131 | 2007_004481 6 14 132 | 2007_004500 9 133 | 2007_004537 9 12 14 134 | 2007_004627 18 135 | 2007_004663 18 136 | 2007_004705 5 6 137 | 2007_004707 14 15 138 | 2007_004768 16 139 | 2007_004769 1 14 140 | 2007_004810 6 14 141 | 2007_004830 6 14 142 | 2007_004948 15 143 | 2007_004951 18 144 | 2007_004998 7 145 | 2007_005064 1 14 146 | 2007_005086 8 10 14 147 | 2007_005124 9 14 148 | 2007_005210 19 149 | 2007_005212 8 13 17 150 | 2007_005248 12 14 151 | 2007_005262 5 6 152 | 2007_005266 8 15 17 153 | 2007_005273 1 6 14 154 | 2007_005314 13 155 | 2007_005360 18 156 | 2007_005368 1 14 157 | 2007_005430 1 4 14 158 | 2007_005647 8 10 17 159 | 2007_005688 7 160 | 2007_005702 1 14 161 | 2007_005790 10 14 162 | 2007_005797 9 14 163 | 2007_005878 13 164 | 2007_005902 19 165 | 2007_005951 13 14 166 | 2007_005988 6 11 14 167 | 2007_005989 13 14 168 | 2007_006004 8 14 169 | 2007_006066 8 17 19 170 | 2007_006134 12 171 | 2007_006136 16 172 | 2007_006151 6 12 14 173 | 2007_006254 18 174 | 2007_006281 3 175 | 2007_006303 7 15 176 | 2007_006317 1 5 14 177 | 2007_006400 18 178 | 2007_006409 4 10 14 179 | 2007_006445 12 14 180 | 2007_006477 8 14 181 | 2007_006483 4 14 182 | 2007_006490 2 3 14 183 | 2007_006530 8 17 184 | 2007_006660 3 6 185 | 2007_006673 3 186 | 2007_006699 10 14 187 | 2007_006704 19 188 | 2007_006832 16 189 | 2007_006899 16 190 | 2007_007003 5 6 191 | 2007_007250 4 19 192 | 2007_007585 11 193 | 2007_007930 11 194 | 2007_008203 17 195 | 2007_009327 11 14 196 | 2007_009605 11 197 | 2007_009607 2 198 | 2007_009759 2 199 | 2011_000999 7 200 | 2011_001004 6 201 | -------------------------------------------------------------------------------- /data/voc2012/train_cls-1shot.txt: -------------------------------------------------------------------------------- 1 | 2007_000039 19 2 | 2007_000063 8 11 3 | 2007_000648 0 5 6 14 4 | 2007_001420 12 14 15 5 | 2007_001709 13 14 6 | 2007_001901 8 10 17 7 | 2007_002216 5 8 | 2007_002668 10 14 15 9 | 2007_002669 9 14 10 | 2007_002845 16 11 | 2007_003207 4 12 | 2007_003565 2 13 | 2007_003778 7 15 14 | 2007_003876 0 15 | 2007_004166 8 10 11 17 19 16 | 2007_005273 1 6 14 17 | 2007_005702 1 14 18 | 2007_006303 7 15 19 | 2007_006400 18 20 | 2007_006673 3 21 | -------------------------------------------------------------------------------- /detection/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import sys, uuid 4 | import tempfile 5 | from detectron2 import model_zoo 6 | from detectron2.config import get_cfg 7 | from detectron2.engine import launch 8 | from utils import setup_cfg, infer, Trainer 9 | from pathlib import Path 10 | import ujson as json 11 | 12 | def parse_args(): 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument("--train_dataset", "-s", type=str, required=True, choices=["voc_train", "syn", "coco_train"]) 15 | parser.add_argument("--train_dir", type=str, required=False, help="if unspecified, use default path") 16 | parser.add_argument("--train_coconame", type=str, required=False, help="if unspecified, use default name") 17 | parser.add_argument("--syn_dir", type=str, required=False, default="NOT_USED", 18 | help="synthic training data folder, contains `images` for images and `COCO.json` for COCO format annotation and `label2id.json` for labels") 19 | parser.add_argument("--additional_dataset", nargs="+", help="when use multiple dataset other than -s, put more heavy dataset in here") 20 | 21 | parser.add_argument("--test_dataset", "-t", type=str, choices=["voc_val", "coco_val"]) 22 | parser.add_argument("--test_dir", type=str, required=False, help="if unspecified, use default path") 23 | 24 | parser.add_argument("--lr", type=float, default=0.001) 25 | parser.add_argument("--wd", type=float, default=0.0005) 26 | parser.add_argument("--bsz", type=int, default=4) 27 | parser.add_argument("--freeze", default=False, action="store_true") 28 | parser.add_argument("--data_aug", default=False, action="store_true", help="data augmentation on synthetic data, RandomContrast etc not including crop, use crop only when --crop") 29 | parser.add_argument("--crop", default=False, action="store_true") 30 | parser.add_argument("--epoch", type=int, default=20) 31 | 32 | parser.add_argument("--seed", default=42, type=int) 33 | parser.add_argument("--debug", default=False, action="store_true", help="if true, don't log in wandb") 34 | 35 | parser.add_argument("--resnet", choices=[50, 101], default=50, type=int, help="whether use R101 or R50") 36 | 37 | parser.add_argument("--preview", default=False, action="store_true") 38 | 39 | parser.add_argument("--init_checkpoint", default=None, type=str) 40 | 41 | parser.add_argument("--eval_checkpoint", default=None, type=str) 42 | parser.add_argument("--eval_threshold", default=0.7, type=float) 43 | 44 | parser.add_argument("--num_gpus_per_machine", "-g", type=int, default=1, help="number of gpus *per machine*") 45 | parser.add_argument("--num_machines", type=int, default=1, help="total number of machines") 46 | 47 | args = parser.parse_args() 48 | return args 49 | 50 | def filter(json_file, td): 51 | """ 52 | filter out instance seg annotation but only object detection one 53 | """ 54 | with open(json_file) as f: 55 | data = json.load(f) 56 | newanno = [] 57 | for anno in data["annotations"]: 58 | if len(anno["segmentation"]) == 0: 59 | # already no seg 60 | return json_file 61 | anno['segmentation'] = [] 62 | newanno.append(anno) 63 | data["annotations"] = newanno 64 | id = str(uuid.uuid4()) 65 | os.makedirs(Path(td) / id) 66 | json_file = Path(td) / id / "COCO.json" 67 | with open(json_file, "w") as f: 68 | json.dump(data, f) 69 | return json_file 70 | 71 | def fetch_cfg(args): 72 | cfg = get_cfg() 73 | 74 | cfg.merge_from_file(model_zoo.get_config_file(f"COCO-Detection/faster_rcnn_R_{args.resnet}_FPN_3x.yaml")) 75 | if args.init_checkpoint is not None: 76 | print("loading from ckpt:", args.init_checkpoint) 77 | if "PT_DATA_DIR" in os.environ: 78 | args.init_checkpoint = os.path.join(os.environ["PT_DATA_DIR"], args.init_checkpoint) 79 | cfg.MODEL.WEIGHTS = args.init_checkpoint 80 | else: 81 | cfg.MODEL.WEIGHTS = f"detectron2://ImageNetPretrained/MSRA/R-{args.resnet}.pkl" 82 | return cfg 83 | 84 | def main(args): 85 | cfg = fetch_cfg(args) 86 | cfg = setup_cfg(args, cfg, filter=filter) 87 | if args.eval_checkpoint is not None: 88 | infer(cfg) 89 | sys.exit(0) 90 | 91 | Trainer.data_aug = args.data_aug 92 | Trainer.debug = args.debug 93 | Trainer.project_name = "dalle-for-detection" 94 | trainer = Trainer(cfg) 95 | trainer.resume_or_load(resume=False) 96 | trainer.train() 97 | 98 | if __name__ == "__main__": 99 | args = parse_args() 100 | 101 | with tempfile.TemporaryDirectory() as td: 102 | args.td = td 103 | launch( 104 | main, num_gpus_per_machine=args.num_gpus_per_machine, 105 | num_machines=args.num_machines, machine_rank=0, dist_url="auto", args=(args, ) 106 | ) -------------------------------------------------------------------------------- /detection/wandb_writer.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Union 2 | 3 | from detectron2.config import CfgNode 4 | from detectron2.utils.events import EventWriter, get_event_storage 5 | 6 | 7 | class WandbWriter(EventWriter): 8 | """ 9 | Write all scalars to a wandb tool. 10 | """ 11 | 12 | def __init__( 13 | self, 14 | project: str = "detectron2", 15 | config: Union[Dict, CfgNode] = {}, 16 | window_size: int = 20, 17 | **kwargs, 18 | ): 19 | """ 20 | Args: 21 | project (str): W&B Project name 22 | config Union[Dict, CfgNode]: the project level configuration object 23 | window_size (int): the scalars will be median-smoothed by this window size 24 | kwargs: other arguments passed to `wandb.init(...)` 25 | """ 26 | import wandb 27 | wandb.login(key='YOUR API KEY') 28 | 29 | self._window_size = window_size 30 | self._run = ( 31 | wandb.init(project=project, config=config, **kwargs) if not wandb.run else wandb.run 32 | ) 33 | self._run._label(repo="detectron2") 34 | 35 | def write(self): 36 | storage = get_event_storage() 37 | 38 | log_dict = {} 39 | for k, (v, _) in storage.latest_with_smoothing_hint(self._window_size).items(): 40 | log_dict[k] = v 41 | 42 | self._run.log(log_dict) 43 | 44 | def close(self): 45 | self._run.finish() -------------------------------------------------------------------------------- /instance_seg/run.sh: -------------------------------------------------------------------------------- 1 | syn=$1 2 | resnets=$2 3 | lrs=$3 4 | if [[ -d $syn ]]; then 5 | echo "$syn exists" 6 | else 7 | exit -1; 8 | fi; 9 | 10 | for resnet in $(echo $resnets | tr "," "\n"); do 11 | for lr in $(echo $lrs | tr "," "\n"); do 12 | echo "$resnet with $lr" 13 | /lab/andy/anaconda3/envs/paste-segment/bin/python seg.py \ 14 | -s syn -t voc_val \ 15 | --blending gaussian \ 16 | --lr $lr \ 17 | --freeze --data_aug --crop \ 18 | --epoch 20 \ 19 | --resnet $resnet \ 20 | --syn_dir $syn; 21 | done; 22 | done; -------------------------------------------------------------------------------- /instance_seg/seg.py: -------------------------------------------------------------------------------- 1 | import os, cv2 2 | import sys 3 | sys.path.insert(1, os.path.join(sys.path[0], "../", 'detection')) 4 | import tempfile 5 | 6 | from detectron2 import model_zoo 7 | from detectron2.config import get_cfg 8 | from utils import setup_cfg, infer, Trainer 9 | from train import parse_args 10 | 11 | def fetch_cfg(args): 12 | cfg = get_cfg() 13 | 14 | cfg.merge_from_file(model_zoo.get_config_file(f"COCO-InstanceSegmentation/mask_rcnn_R_{args.resnet}_FPN_3x.yaml")) 15 | # cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml") 16 | # from scratch 17 | cfg.MODEL.WEIGHTS = f"detectron2://ImageNetPretrained/MSRA/R-{args.resnet}.pkl" 18 | return cfg 19 | 20 | if __name__ == "__main__": 21 | args = parse_args() 22 | 23 | with tempfile.TemporaryDirectory() as td: 24 | args.td = td 25 | cfg = fetch_cfg(args) 26 | cfg = setup_cfg(args, cfg) 27 | if args.eval_checkpoint is not None: 28 | infer(cfg) 29 | sys.exit(0) 30 | 31 | Trainer.data_aug = args.data_aug 32 | Trainer.debug = args.debug 33 | Trainer.project_name = "paste-seg-instance" 34 | trainer = Trainer(cfg) 35 | trainer.resume_or_load(resume=False) 36 | trainer.train() -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # download gpu version if possible 2 | # torch==1.10.1 3 | # torchvision==0.11.2 4 | # download from https://detectron2.readthedocs.io/en/latest/tutorials/install.html 5 | # detectron2 6 | # needed to transform on syn COCO segmentation RLE 7 | shapely 8 | 9 | numpy 10 | scikit-image==0.18.0 11 | scikit-learn 12 | Pillow 13 | tqdm 14 | opencv-python 15 | wandb 16 | pandas 17 | pycocotools 18 | hydra-core 19 | # weird issue with detectron2 20 | setuptools==59.5.0 21 | # for faster json speedup 22 | ujson 23 | # fix weird bug 24 | omegaconf==2.1 25 | transformers==4.22.2 26 | diffusers==0.9.0 27 | kornia 28 | timm -------------------------------------------------------------------------------- /t2i_generate/background_captions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Create prompt to get pure backgrounds 3 | """ 4 | import json 5 | 6 | templates = [ 7 | "A real photo of {obj}", 8 | ] 9 | classnames = [ 10 | # indoor objects 11 | "empty living room", "empty kitchen", 12 | # vehicle 13 | "blue sky", "empty city street, color", "empty city road, color", "empty lake", "empty sea", "railway without train", "empty railway, color", 14 | # animal 15 | "trees", "forest", "empty street, colored", "farms", "nature", "empty farm", "stable" 16 | ] 17 | 18 | to_save = {"background": { 19 | class_: [] for class_ in classnames 20 | }} 21 | for class_ in classnames: 22 | for temp in templates: 23 | print(temp.format(obj=class_)) 24 | to_save["background"][class_] = [ 25 | temp.format(obj=class_) 26 | for temp in templates 27 | ] 28 | with open("background_templates.json", "w") as f: 29 | json.dump(to_save, f) -------------------------------------------------------------------------------- /t2i_generate/foreground_captions.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | templates = [ 4 | "a photo of {obj}", 5 | "a realistic photo of {obj}", 6 | "a photo of {obj} in pure background", 7 | "{obj} in a white background", 8 | "{obj} without background", 9 | "{obj} isolated on white background", 10 | ] 11 | 12 | classnames = [ 13 | # 'a truck', 'a traffic light', 'a fire hydrant', 'a stop sign', 'a parking meter', 'a bench', 14 | # 'an elephant', 'a bear', 'a zebra', 'a giraffe', 'a backpack', 'an umbrella', 15 | # 'a handbag', 'a tie', 'a suitcase', 'a frisbee', 'a ski', 'a snowboard', 'a sports ball', 'a kite', 'a baseball bat', 16 | # 'a baseball glove', 'a skateboard', 'a surfboard', 'a tennis racket', 'a wine glass', 'a cup', 'a fork', 17 | # 'a knife', 'a spoon', 'a bowl', 'a banana', 'an apple', 'a sandwich', 'an orange', 'a broccoli', 'a carrot', 'a hot dog', 18 | # 'a pizza', 'a donut', 'a cake', 'a couch', 'a bed', 'a toilet', 19 | # 'a laptop', 'a computer mouse', 'an electronic remote', 'a keyboard', 'a cell phone', 'a microwave', 'an oven', 'a toaster', 'a sink', 'a refrigerator', 20 | # 'a book', 'a clock', 'a vase', 'a scissors', 'a teddy bear', 'a hair drier', 'a toothbrush', 21 | # above are 60 classes, used in COCO 22 | "a person", "a man", "a woman", 23 | "a bird", "a cat", "a cow", "a dog", "a horse", "a sheep", 24 | "an airplane", 25 | "a TV", "a monitor", "an old monitor", "a dining table", "a table", 26 | "a bicycle", "a boat", "a bus", "a car", "a motorbike", "a train", 27 | "a bottle", "a chair", "a dining table", "a potted plant", "a sofa", "a tv monitor" 28 | ] 29 | # ] 30 | 31 | to_save = {"foreground": { 32 | class_: [] for class_ in classnames 33 | }} 34 | for class_ in classnames: 35 | for temp in templates: 36 | print(temp.format(obj=class_)) 37 | to_save["foreground"][class_] = [ 38 | temp.format(obj=class_) 39 | for temp in templates 40 | ] 41 | with open("foreground_templates.json", "w") as f: 42 | json.dump(to_save, f) -------------------------------------------------------------------------------- /t2i_generate/stable_diffusion2.py: -------------------------------------------------------------------------------- 1 | # make sure you're logged in with `huggingface-cli login` 2 | import argparse 3 | import json, os 4 | from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler 5 | import torch 6 | import numpy as np 7 | 8 | def parse_args(): 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument("--idx", type=int) 11 | parser.add_argument("--bsz", type=int, default=8) 12 | parser.add_argument("--num_gen_images_per_caption", "-n", type=int, default=20) 13 | parser.add_argument("--caption_json", default="./data", help="if not '', will only generate DallE images from this json, use `idx` and `scene` to select which to generate") 14 | parser.add_argument("--num_clusters", default=100, type=int, help="when using RuDalle, split all captions into `num_clusters` chunk and let each machine handle one chunk only") 15 | 16 | parser.add_argument("--output_dir", default="") 17 | args = parser.parse_args() 18 | if 'PT_DATA_DIR' in os.environ: 19 | args.output_dir = os.path.join(os.environ['PT_DATA_DIR'], args.output_dir) 20 | return args 21 | 22 | def batchify(lst, n): 23 | """Yield successive n-sized chunks from lst.""" 24 | for i in range(0, len(lst), n): 25 | yield lst[i:i + n] 26 | 27 | if __name__ == "__main__": 28 | args = parse_args() 29 | 30 | with open(args.caption_json) as f: 31 | data = json.load(f) 32 | 33 | key = next(iter(data)) 34 | # key = "JPEGImages" 35 | 36 | data = data[key] 37 | 38 | all_keys = sorted(list(data.keys())) 39 | all_chunks = np.array_split(all_keys, args.num_clusters) 40 | chunks = all_chunks[args.idx] 41 | 42 | pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1", torch_type=torch.float16) 43 | pipe = pipe.to("cuda") 44 | 45 | n_already_generated = 0 46 | n_generate_per_cycle = 4 47 | while n_already_generated < args.num_gen_images_per_caption: 48 | for id in chunks: 49 | for cap in data[id]: 50 | prompts = [cap] * n_generate_per_cycle 51 | cap = cap[:50] # too long captions will cause path error 52 | cap = cap.replace('"', "") # server don't like ", will map to %2522 53 | os.makedirs(os.path.join(args.output_dir, id, cap), exist_ok=True) 54 | cur_i = len(list(os.listdir(os.path.join(args.output_dir, id, cap)))) + 1 55 | for prompt_chunk in batchify(prompts, n=args.bsz): 56 | x = pipe(prompt_chunk) 57 | images = x.images 58 | for img in images: 59 | cur_i += 1 60 | # img.resize((256, 256)).save(os.path.join(args.output_dir, id, cap, f"{cur_i}.png")) 61 | img.save(os.path.join(args.output_dir, id, cap, f"{cur_i}.png")) 62 | 63 | n_already_generated += n_generate_per_cycle -------------------------------------------------------------------------------- /viz/viz.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os, sys 3 | import random 4 | from pathlib import Path 5 | 6 | import cv2 7 | import matplotlib.pyplot as plt 8 | from detectron2.data import DatasetCatalog 9 | from detectron2.data.datasets import register_coco_instances 10 | from detectron2.utils.logger import setup_logger 11 | from detectron2.utils.visualizer import Visualizer 12 | 13 | pwd = Path(__file__).parent.resolve() 14 | output = pwd / "out" 15 | 16 | voc_dir = pwd.parent / "data/voc2012" / "VOC2012" 17 | with open(voc_dir.parent / "label2id.json") as f: 18 | label2id = json.load(f) 19 | 20 | artifact_dir = Path(sys.argv[1]) 21 | assert artifact_dir.exists() 22 | print(artifact_dir) 23 | 24 | output = output / artifact_dir.stem 25 | os.makedirs(output, exist_ok=True) 26 | coco_name = "COCO.json" 27 | register_coco_instances("synthetic_train", metadata=label2id, json_file=str(artifact_dir / coco_name), 28 | image_root=str(artifact_dir)) 29 | setup_logger() 30 | 31 | ds = "synthetic_train" 32 | # ds = "VOC_test" 33 | data = DatasetCatalog.get(ds) 34 | for i, d in enumerate(random.sample(data, 30)): 35 | img = cv2.imread(d["file_name"]) 36 | visualizer = Visualizer(img[:, :, ::-1], 37 | scale=0.5) 38 | # metadata=balloon_metadata, scale=0.5) 39 | out = visualizer.draw_dataset_dict(d) 40 | plt.figure(1, figsize=(10, 8)) 41 | plt.subplot(1, 2, 1) 42 | plt.title(d['file_name']) 43 | plt.imshow(out.get_image()) 44 | plt.subplot(1, 2, 2) 45 | plt.imshow(img[:, :, ::-1]) 46 | plt.title("RGB") 47 | plt.tight_layout() 48 | plt.savefig(output / f"demo{i}.png") 49 | plt.show() --------------------------------------------------------------------------------