├── .gitignore
├── README.md
├── assets
    ├── overview.png
    └── results.png
├── cutpaste
    ├── __init__.py
    ├── anno.py
    ├── background.py
    ├── clip_postprocess.py
    ├── config
    │   ├── bg
    │   │   └── VOC
    │   │   │   └── 1shot
    │   │   │       ├── bg_template.yaml
    │   │   │       ├── bg_template_plus_dalle.yaml
    │   │   │       └── real.yaml
    │   ├── config.yaml
    │   ├── ds
    │   │   └── VOC.yaml
    │   ├── exp
    │   │   ├── demo_cutpaste.yaml
    │   │   ├── demo_syn+real.yaml
    │   │   ├── demo_syn.yaml
    │   │   └── demo_synFg.yaml
    │   ├── fg
    │   │   └── VOC
    │   │   │   ├── 1shot
    │   │   │       └── real.yaml
    │   │   │   └── fg_template.yaml
    │   └── paster
    │   │   └── defaults.yaml
    ├── foreground.py
    ├── paste.py
    ├── paster.py
    ├── pb.py
    ├── pyblur3
    │   ├── BoxBlur.py
    │   ├── DefocusBlur.py
    │   ├── GaussianBlur.py
    │   ├── LineDictionary.py
    │   ├── LinearMotionBlur.py
    │   ├── PsfBlur.py
    │   ├── RandomizedBlur.py
    │   ├── __init__.py
    │   └── psf.pkl
    └── utils.py
├── data
    ├── test_data
    │   ├── VOC2012
    │   │   ├── Annotations
    │   │   │   ├── 2007_000039.xml
    │   │   │   ├── 2007_000063.xml
    │   │   │   ├── 2007_000648.xml
    │   │   │   ├── 2007_001420.xml
    │   │   │   ├── 2007_001709.xml
    │   │   │   ├── 2007_001901.xml
    │   │   │   ├── 2007_002216.xml
    │   │   │   ├── 2007_002668.xml
    │   │   │   ├── 2007_002669.xml
    │   │   │   ├── 2007_002845.xml
    │   │   │   ├── 2007_003207.xml
    │   │   │   ├── 2007_003565.xml
    │   │   │   ├── 2007_003778.xml
    │   │   │   ├── 2007_003876.xml
    │   │   │   ├── 2007_004166.xml
    │   │   │   ├── 2007_005273.xml
    │   │   │   ├── 2007_005702.xml
    │   │   │   ├── 2007_006303.xml
    │   │   │   ├── 2007_006400.xml
    │   │   │   └── 2007_006673.xml
    │   │   ├── JPEGImages
    │   │   │   ├── 2007_000039.jpg
    │   │   │   ├── 2007_000063.jpg
    │   │   │   ├── 2007_000648.jpg
    │   │   │   ├── 2007_001420.jpg
    │   │   │   ├── 2007_001709.jpg
    │   │   │   ├── 2007_001901.jpg
    │   │   │   ├── 2007_002216.jpg
    │   │   │   ├── 2007_002668.jpg
    │   │   │   ├── 2007_002669.jpg
    │   │   │   ├── 2007_002845.jpg
    │   │   │   ├── 2007_003207.jpg
    │   │   │   ├── 2007_003565.jpg
    │   │   │   ├── 2007_003778.jpg
    │   │   │   ├── 2007_003876.jpg
    │   │   │   ├── 2007_004166.jpg
    │   │   │   ├── 2007_005273.jpg
    │   │   │   ├── 2007_005702.jpg
    │   │   │   ├── 2007_006303.jpg
    │   │   │   ├── 2007_006400.jpg
    │   │   │   └── 2007_006673.jpg
    │   │   └── SegmentationObject
    │   │   │   ├── 2007_000039.png
    │   │   │   ├── 2007_000063.png
    │   │   │   ├── 2007_000648.png
    │   │   │   ├── 2007_001420.png
    │   │   │   ├── 2007_001709.png
    │   │   │   ├── 2007_001901.png
    │   │   │   ├── 2007_002216.png
    │   │   │   ├── 2007_002668.png
    │   │   │   ├── 2007_002669.png
    │   │   │   ├── 2007_002845.png
    │   │   │   ├── 2007_003207.png
    │   │   │   ├── 2007_003565.png
    │   │   │   ├── 2007_003778.png
    │   │   │   ├── 2007_003876.png
    │   │   │   ├── 2007_004166.png
    │   │   │   ├── 2007_005273.png
    │   │   │   ├── 2007_005702.png
    │   │   │   ├── 2007_006303.png
    │   │   │   ├── 2007_006400.png
    │   │   │   └── 2007_006673.png
    │   ├── background
    │   │   ├── bg_template
    │   │   │   ├── forest
    │   │   │   │   └── A real photo of forest
    │   │   │   │   │   ├── 589.png
    │   │   │   │   │   ├── 590.png
    │   │   │   │   │   ├── 591.png
    │   │   │   │   │   ├── 593.png
    │   │   │   │   │   └── 598.png
    │   │   │   └── railway without train
    │   │   │   │   └── A real photo of railway without train
    │   │   │   │       ├── 367.png
    │   │   │   │       ├── 373.png
    │   │   │   │       ├── 429.png
    │   │   │   │       └── 475.png
    │   │   └── context_augment
    │   │   │   ├── 2007_000504.jpg
    │   │   │       └── a colored photo of an empty pile of trash on the ground
    │   │   │       │   └── 49.png
    │   │   │   ├── 2007_001609.jpg
    │   │   │       └── a colored photo of an empty living room
    │   │   │       │   └── 69.png
    │   │   │   ├── 2007_001764.jpg
    │   │   │       └── a real image of an empty grass covered field
    │   │   │       │   └── 71.png
    │   │   │   ├── 2007_002227.jpg
    │   │   │       └── a real image of an empty wall in a living room
    │   │   │       │   └── 45.png
    │   │   │   ├── 2007_002281.jpg
    │   │   │       └── a colored photo of an empty street next to a forest
    │   │   │       │   └── 79.png
    │   │   │   ├── 2007_002967.jpg
    │   │   │       └── a real image of an empty doorstep
    │   │   │       │   └── 58.png
    │   │   │   ├── 2007_003451.jpg
    │   │   │       └── a real image of an empty living room filled with furniture and a large window
    │   │   │       │   └── 40.png
    │   │   │   ├── 2007_005124.jpg
    │   │   │       └── a real image of an empty grass field
    │   │   │       │   └── 58.png
    │   │   │   ├── 2007_006136.jpg
    │   │   │       └── a real image of an empty grass near a forest
    │   │   │       │   └── 33.png
    │   │   │   ├── 2007_007585.jpg
    │   │   │       └── a real image of an empty grass field
    │   │   │       │   └── 67.png
    │   │   │   └── clip_postprocessed.json
    │   ├── foreground
    │   │   ├── foreground_mask
    │   │   │   ├── aeroplane_mask
    │   │   │   │   ├── The picture of an airplane on a pure background422.png
    │   │   │   │   ├── an airplane in a pure background449.png
    │   │   │   │   └── an airplane106.png
    │   │   │   ├── bicycle_mask
    │   │   │   │   ├── a bicycle in a pure background122.png
    │   │   │   │   ├── a bicycle in a pure background128.png
    │   │   │   │   └── a bicycle in a pure background225.png
    │   │   │   ├── bird_mask
    │   │   │   │   ├── a bird in a pure background162.png
    │   │   │   │   ├── a bird in a pure background293.png
    │   │   │   │   └── a bird in a pure background374.png
    │   │   │   ├── boat_mask
    │   │   │   │   ├── The picture of a boat on a pure background39.png
    │   │   │   │   ├── a boat in a pure background371.png
    │   │   │   │   └── a boat79.png
    │   │   │   ├── bottle_mask
    │   │   │   │   ├── The picture of a bottle on a pure background407.png
    │   │   │   │   ├── a bottle in a pure background108.png
    │   │   │   │   └── a bottle89.png
    │   │   │   ├── bus_mask
    │   │   │   │   ├── The picture of a bus on a pure background460.png
    │   │   │   │   ├── a bus105.png
    │   │   │   │   └── a bus410.png
    │   │   │   ├── car_mask
    │   │   │   │   ├── The picture of a car on a pure background100.png
    │   │   │   │   ├── The picture of a car on a pure background286.png
    │   │   │   │   └── a car97.png
    │   │   │   ├── cat_mask
    │   │   │   │   ├── a cat245.png
    │   │   │   │   ├── a cat58.png
    │   │   │   │   └── a cat62.png
    │   │   │   ├── chair_mask
    │   │   │   │   ├── The picture of a chair on a pure background423.png
    │   │   │   │   ├── a chair in a pure background301.png
    │   │   │   │   └── a chair402.png
    │   │   │   ├── cow_mask
    │   │   │   │   ├── a cow in a pure background9.png
    │   │   │   │   ├── a cow223.png
    │   │   │   │   └── a cow56.png
    │   │   │   ├── diningtable_mask
    │   │   │   │   ├── a dining table in a pure background63.png
    │   │   │   │   ├── a table123.png
    │   │   │   │   └── a table300.png
    │   │   │   ├── dog_mask
    │   │   │   │   ├── The picture of a dog on a pure background236.png
    │   │   │   │   ├── a dog in a pure background487.png
    │   │   │   │   └── a dog121.png
    │   │   │   ├── horse_mask
    │   │   │   │   ├── The picture of a horse on a pure background469.png
    │   │   │   │   ├── a horse in a pure background293.png
    │   │   │   │   └── a horse298.png
    │   │   │   ├── motorbike_mask
    │   │   │   │   ├── a motorbike in a pure background367.png
    │   │   │   │   ├── a motorbike in a pure background421.png
    │   │   │   │   └── a motorbike315.png
    │   │   │   ├── person_mask
    │   │   │   │   ├── a man146.png
    │   │   │   │   ├── a man253.png
    │   │   │   │   └── a man345.png
    │   │   │   ├── pottedplant_mask
    │   │   │   │   ├── a potted plant11.png
    │   │   │   │   ├── a potted plant374.png
    │   │   │   │   └── a potted plant434.png
    │   │   │   ├── sheep_mask
    │   │   │   │   ├── The picture of a sheep on a pure background212.png
    │   │   │   │   ├── a sheep in a pure background219.png
    │   │   │   │   └── a sheep351.png
    │   │   │   ├── sofa_mask
    │   │   │   │   ├── a sofa in a pure background122.png
    │   │   │   │   ├── a sofa in a pure background353.png
    │   │   │   │   └── a sofa in a pure background395.png
    │   │   │   ├── train_mask
    │   │   │   │   ├── The picture of a train on a pure background303.png
    │   │   │   │   ├── The picture of a train on a pure background32.png
    │   │   │   │   └── a train in a pure background133.png
    │   │   │   └── tvmonitor_mask
    │   │   │   │   ├── a tv monitor251.png
    │   │   │   │   ├── an old monitor107.png
    │   │   │   │   └── an old monitor426.png
    │   │   └── foreground_rgb
    │   │   │   ├── a bicycle
    │   │   │       └── a bicycle in a pure background
    │   │   │       │   ├── 122.png
    │   │   │       │   ├── 128.png
    │   │   │       │   └── 225.png
    │   │   │   ├── a bird
    │   │   │       └── a bird in a pure background
    │   │   │       │   ├── 162.png
    │   │   │       │   ├── 293.png
    │   │   │       │   └── 374.png
    │   │   │   ├── a boat
    │   │   │       ├── The picture of a boat on a pure background
    │   │   │       │   └── 39.png
    │   │   │       ├── a boat in a pure background
    │   │   │       │   └── 371.png
    │   │   │       └── a boat
    │   │   │       │   └── 79.png
    │   │   │   ├── a bottle
    │   │   │       ├── The picture of a bottle on a pure background
    │   │   │       │   └── 407.png
    │   │   │       ├── a bottle in a pure background
    │   │   │       │   └── 108.png
    │   │   │       └── a bottle
    │   │   │       │   └── 89.png
    │   │   │   ├── a bus
    │   │   │       ├── The picture of a bus on a pure background
    │   │   │       │   └── 460.png
    │   │   │       └── a bus
    │   │   │       │   ├── 105.png
    │   │   │       │   └── 410.png
    │   │   │   ├── a car
    │   │   │       ├── The picture of a car on a pure background
    │   │   │       │   ├── 100.png
    │   │   │       │   └── 286.png
    │   │   │       └── a car
    │   │   │       │   └── 97.png
    │   │   │   ├── a cat
    │   │   │       └── a cat
    │   │   │       │   ├── 245.png
    │   │   │       │   ├── 58.png
    │   │   │       │   └── 62.png
    │   │   │   ├── a chair
    │   │   │       ├── The picture of a chair on a pure background
    │   │   │       │   └── 423.png
    │   │   │       ├── a chair in a pure background
    │   │   │       │   └── 301.png
    │   │   │       └── a chair
    │   │   │       │   └── 402.png
    │   │   │   ├── a cow
    │   │   │       ├── a cow in a pure background
    │   │   │       │   └── 9.png
    │   │   │       └── a cow
    │   │   │       │   ├── 223.png
    │   │   │       │   └── 56.png
    │   │   │   ├── a dining table
    │   │   │       └── a dining table in a pure background
    │   │   │       │   └── 63.png
    │   │   │   ├── a dog
    │   │   │       ├── The picture of a dog on a pure background
    │   │   │       │   └── 236.png
    │   │   │       ├── a dog in a pure background
    │   │   │       │   └── 487.png
    │   │   │       └── a dog
    │   │   │       │   └── 121.png
    │   │   │   ├── a horse
    │   │   │       ├── The picture of a horse on a pure background
    │   │   │       │   └── 469.png
    │   │   │       ├── a horse in a pure background
    │   │   │       │   └── 293.png
    │   │   │       └── a horse
    │   │   │       │   └── 298.png
    │   │   │   ├── a man
    │   │   │       └── a man
    │   │   │       │   ├── 146.png
    │   │   │       │   ├── 253.png
    │   │   │       │   └── 345.png
    │   │   │   ├── a motorbike
    │   │   │       ├── a motorbike in a pure background
    │   │   │       │   ├── 367.png
    │   │   │       │   └── 421.png
    │   │   │       └── a motorbike
    │   │   │       │   └── 315.png
    │   │   │   ├── a potted plant
    │   │   │       └── a potted plant
    │   │   │       │   ├── 11.png
    │   │   │       │   ├── 374.png
    │   │   │       │   └── 434.png
    │   │   │   ├── a sheep
    │   │   │       ├── The picture of a sheep on a pure background
    │   │   │       │   └── 212.png
    │   │   │       ├── a sheep in a pure background
    │   │   │       │   └── 219.png
    │   │   │       └── a sheep
    │   │   │       │   └── 351.png
    │   │   │   ├── a sofa
    │   │   │       └── a sofa in a pure background
    │   │   │       │   ├── 122.png
    │   │   │       │   ├── 353.png
    │   │   │       │   └── 395.png
    │   │   │   ├── a table
    │   │   │       └── a table
    │   │   │       │   ├── 123.png
    │   │   │       │   └── 300.png
    │   │   │   ├── a train
    │   │   │       ├── The picture of a train on a pure background
    │   │   │       │   ├── 303.png
    │   │   │       │   └── 32.png
    │   │   │       └── a train in a pure background
    │   │   │       │   └── 133.png
    │   │   │   ├── a tv monitor
    │   │   │       └── a tv monitor
    │   │   │       │   └── 251.png
    │   │   │   ├── an airplane
    │   │   │       ├── The picture of an airplane on a pure background
    │   │   │       │   └── 422.png
    │   │   │       ├── an airplane in a pure background
    │   │   │       │   └── 449.png
    │   │   │       └── an airplane
    │   │   │       │   └── 106.png
    │   │   │   └── an old monitor
    │   │   │       └── an old monitor
    │   │   │           ├── 107.png
    │   │   │           └── 426.png
    │   ├── labels.txt
    │   └── train_cls-1shot.txt
    └── voc2012
    │   ├── 1k_10_shot.json
    │   ├── 1k_1_shot.json
    │   ├── label2id.json
    │   ├── labels.txt
    │   ├── prompt_replace_original_200_selected.json
    │   ├── test.txt
    │   ├── train_1k_cls.txt
    │   ├── train_aug.txt
    │   ├── train_cls-10shot.txt
    │   ├── train_cls-1shot.txt
    │   ├── train_cls.txt
    │   └── val_cls.txt
├── detection
    ├── train.py
    ├── utils.py
    └── wandb_writer.py
├── instance_seg
    ├── run.sh
    ├── seg.py
    └── seg_lazy.py
├── requirements.txt
├── t2i_generate
    ├── background_captions.py
    ├── foreground_captions.py
    └── stable_diffusion2.py
└── viz
    └── viz.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | cutpaste/outputs
3 | artifact/
4 | viz/out
5 | **/__pycache__/
6 | 
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Text2Image-for-Detection
  2 | 
  3 | Official Implementation for ["DALL-E for Detection: Language-driven Compositional Image Synthesis for Object Detection"](https://arxiv.org/pdf/2309.05956.pdf) 
  4 | 
  5 | Extention version: ["Beyond Generation: Harnessing Text to Image Models for Object Detection and Segmentation"](https://arxiv.org/pdf/2309.05956.pdf)
  6 | 
  7 | 
 15 | 
 16 | Contact: [yunhaoge@usc.edu](mailto:yunhaoge@usc.edu);  [jxu1@g.harvard.edu](mailto:jxu1@g.harvard.edu)
 17 | 
 18 | ## Install
 19 | 
 20 | This project is developed using Python 3.10 and PyTorch 1.10.1 under CUDA 11.3. We recommend you to use the same version of Python and PyTorch. 
 21 | 
 22 | ```bash
 23 | pip install -r requirements.txt
 24 | ```
 25 | 
 26 | ## Our method
 27 | 
 28 | 
 29 |   
 31 | 
 32 | We propose a noval approach for generating diverse and large-scale pseudo-labeled training datasets, tailored specifically to enhance downstream
 33 | object detection and segmentation models. 
 34 | We leverage text-to-image models (e.g. your favourite diffusion model) to independently generate foregrounds and backgrounds. 
 35 | Then we composite foregrounds onto the backgrounds, a process where we obtain the bounding boxes or segmentation masks of the foregrounds, to be used in the downstream models.
 36 | 
 37 | Specifically, 
 38 | - Foregrounds are generated using text-to-image models with fixed templates (such as "A photo of ", Table 2 of the paper). Note this is purely zero-shot, i.e. no training data whatsoever is required.
 39 | - Backgrounds are slightly more complicated. 
 40 |      - For zero-shot, we have another set of fixed templates (such as "A real photo of forest", Table 3 of the paper). 
 41 |    - For few-shot where we have a limited set of training images, we first image caption each images, 
 42 |         then extract the background contextual words from the caption. Those contextual words are words such as "grass field," indicating the location and context of the training images.
 43 |        Lastly we augment the original caption by inputting the contextual words into the templates. 
 44 |        This step is necessary as otherwise the caption would possibly contain the foreground objects (eg a dog) as well, 
 45 | and using this original caption into the text-to-image models will likely generate a dog that we do not have a segmentation or bounding box label, and thus confusing the model.
 46 | - Both foregrounds and backgrounds are feed into CLIP to control the quality of the generated images.
 47 | 
 48 | ## Usage
 49 | 
 50 | ### Data
 51 | In this project we use Pascal VOC in a low-resource regime.
 52 | 
 53 | You should download original dataset, e.g. [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/). 
 54 | Note that for Pascal we use train & Val set from the [nsrom repo](https://github.com/NUST-Machine-Intelligence-Laboratory/nsrom).
 55 | The data structure will be
 56 | ```
 57 | data
 58 | ├── COCO2017 
 59 | └── voc2012
 60 |     ├── labels.txt
 61 |     ├── train_aug.txt
 62 |     ├── ...
 63 |     └── VOC2012
 64 |         ├── Annotations
 65 |         ├── ImageSets
 66 |         ...
 67 | ```
 68 | We have k-shot selections on `data/voc2012`:
 69 | [1 shot](data/voc2012/train_cls-1shot.txt) and [10 shot](data/voc2012/train_cls-10shot.txt).
 70 | 
 71 | ### Diffusion Generation
 72 | The code to generate foregrounds and backgrounds are in `t2i_generate/` folder.
 73 | First you need to generate captions for [foreground](t2i_generate/foreground_captions.py) and [background](t2i_generate/background_captions.py).
 74 | Then you can use stable diffusion 2 to generate images via `python stable_diffusion2.py`.
 75 | 
 76 | ### Cut Paste
 77 | The code to paste foregrounds onto backgrounds are in `cutpaste/` folder.
 78 | We use [hydra+torch lightning](https://github.com/ashleve/lightning-hydra-template) to run different variants.
 79 | Example config files are in `configs/` folder, and we include a test dataset in `data/test_data/` folder.
 80 | For example, you can use `python paste.py exp=` to launch the script, where `` is
 81 | 
 82 | - [`demo_cutpaste`](cutpaste/config/exp/demo_cutpaste.yaml): paste real foregrounds onto real backgrounds. This is **Pure Real + cut paste** in [Results](#results).
 83 | - [`demo_synFg`](cutpaste/config/exp/demo_synFg.yaml): paste synthetic foregrounds onto real backgrounds. This is **Syn Fg** in [Results](#results).
 84 | - [`demo_syn`](cutpaste/config/exp/demo_syn.yaml): paste synthetic foregrounds onto synthetic backgrounds. This is **Pure Syn** in [Results](#results).
 85 | - [`demo_syn+real`](cutpaste/config/exp/demo_syn+real.yaml): paste synthetic and real foregrounds onto synthetic and real backgrounds. This is **Syn + real** in [Results](#results).
 86 |   We recommend you to always use this setting as this offers the benefit of both synthetic and real data.
 87 | 
 88 | You can also use `viz/` to visualize the generated datasets. Simple do
 89 | ```shell
 90 | python viz/viz.py 
 91 | ```
 92 | This will generate 30 randomly sampled annotated images in `viz/` folder.
 93 | 
 94 | Readers are welcome to check the config files for more parameters to control the process. Some notable mentions:
 95 |  - by default the foreground is pasted with Space Maximize Paste algorithm described in [EM-paste](https://arxiv.org/pdf/2212.07629.pdf): i.e. each
 96 | foreground is re-scaled and pasted within the max inscribing circle of the background contour.
 97 |  - `repeat_background` is the number of time the algorithm repeats the pasting process. By default it is 2, i.e. each background is used twice, but each time with different foregrounds pasted onto it.
 98 |  - `num_workers` is the number of workers to use for multiprocessing. We recommend you to use at least 4.
 99 |  - `use_random_paste` is for random paste (i.e. paste foreground on a random location of background image) while ranom scale the pasted foreground 0.3-0.7. This is adapted from [object cut and paste](https://arxiv.org/abs/1708.01642).
100 |  - by default `num_cut_images=4` foregrounds are pasted in each background. You can also set `num_cut_lowerbound` to paste Unif(num_cut_lowerbound, num_cut_images) foregrounds.
101 |  - The codebase supports 5 total blendings, namely Guassian Blur, Poisson Blur, Box Blur, Motion Blur and None (do not apply any blendings). We empirically find Gaussian alone achieves the best results.
102 |  - Space Maximize Paste will scale the foreground object to match the diameter of the max inscribing circle of the background contour, but sometimes producing small foregrounds. `use_random_scaling=True` 
103 |  will apply the random scaling factor (0.3-0.7) as [object cut and paste](https://arxiv.org/abs/1708.01642) paper.
104 |  - `scale_factor=factor` additionally apply another scaling to the pasted foreground after Space Maximize Paste.
105 |  - `center_translation_factor=factor` shift the location to paste the foreground by ± height x factor or ± width x factor, instead of pasting on the center of the max inscribing circle.
106 | 
107 | 
108 | ### Model Training
109 | Once the dataset is created, you can train object detection model using `detection/` and instance segmentation model using
110 | `instance_seg/`. Both are based on the battle-tested [detectron2](https://github.com/facebookresearch/detectron2).
111 | 
112 | For example, on VOC 2012 with 2 GPUs, you can run
113 | ```shell
114 | # object detection
115 | python detection/train.py -s syn \ # use synthetic data
116 |     --syn_dir  \
117 |     -t voc_val \ # test on VOC val
118 |     --test_dir  \ # data/voc2012/VOC2012, we need to find val set in this folder
119 |     -g 4 \ # use 4 GPUs on 1 machine
120 |     --freeze --data_aug --bsz 32 --epoch 200 --resnet 50 --lr 0.01 # hyperparameters
121 | ```
122 | For instance segmentation, use `instance_seg/seg.py` instead of `detection/train.py`. The flags are the same.
123 | 
124 | For inference, simply apply the additional flag `--eval_checkpoint `.
125 | 
126 | 
127 | ## Results
128 | Our method results in significant improvement over the baseline on Pascal VOC and MS COCO, especially in the low-resource regime.
129 | We refer details [in the paper](https://arxiv.org/pdf/2309.05956.pdf).
130 | 
131 |   
133 | 
--------------------------------------------------------------------------------
/assets/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/assets/overview.png
--------------------------------------------------------------------------------
/assets/results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/assets/results.png
--------------------------------------------------------------------------------
/cutpaste/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/cutpaste/__init__.py
--------------------------------------------------------------------------------
/cutpaste/anno.py:
--------------------------------------------------------------------------------
  1 | from abc import abstractmethod
  2 | from typing import Optional, Tuple
  3 | 
  4 | import numpy as np
  5 | import ujson as json
  6 | from PIL import Image
  7 | from pycocotools import mask as cocomask
  8 | 
  9 | 
 10 | class Anno:
 11 |     label2id: dict
 12 |     id2label: dict
 13 | 
 14 |     @abstractmethod
 15 |     def objects(self):
 16 |         raise NotImplementedError
 17 | 
 18 |     @abstractmethod
 19 |     def create_mask(self, for_object: Optional[int] = None) -> Image.Image:
 20 |         raise NotImplementedError
 21 | 
 22 |     @abstractmethod
 23 |     def create_instance_mask(self) -> Tuple[Image.Image, dict]:
 24 |         raise NotImplementedError
 25 | 
 26 |     @staticmethod
 27 |     def factory(anno_path, seg_img_path):
 28 |         if anno_path is None:
 29 |             return EntityAnno(seg_img_path)
 30 |         elif seg_img_path is None:
 31 |             return COCOAnno(anno_path)
 32 |         return VOCAnno(anno_path, seg_img_path)
 33 | 
 34 | 
 35 | class VOCAnno(Anno):
 36 |     def __init__(self, anno_path, seg_img_path):
 37 |         import xml.etree.ElementTree as ET
 38 |         self.anno_path = anno_path
 39 |         self.anno = ET.parse(anno_path).getroot()
 40 |         self.seg_img_path = seg_img_path
 41 | 
 42 |     def size(self):
 43 |         size = self.anno.find("size")
 44 |         height, width = size.find("./height").text, size.find("./width").text
 45 |         return int(height), int(width)
 46 | 
 47 |     def filename(self) -> str:
 48 |         return self.anno.find("filename").text
 49 | 
 50 |     def objects(self):
 51 |         objects = self.anno.findall("object")
 52 |         # hardcode, remove wrong seg annotation
 53 |         if "2009_005069" in self.anno_path:
 54 |             objects = objects[:-1]
 55 |         return objects
 56 | 
 57 |     def create_mask(self, for_object: Optional[int] = None):
 58 |         """
 59 |         create boolean mask with same shape as .size()
 60 |         gt (is object) is positive, dummy is 0
 61 |         if for_object = None, OR all mask
 62 |         else, mask for this specific object (0 if dummy, positive for this category)
 63 |         """
 64 |         # consists of: objects (object number in anno), 0 (dummy bg), 255 (white mask outline)
 65 |         seg_mask = np.array(Image.open(self.seg_img_path))
 66 |         objects = self.objects()
 67 |         if for_object is None:
 68 |             ids = list(range(1, len(objects) + 1))
 69 |             categories = [
 70 |                 object.find("./name").text
 71 |                 for object in objects
 72 |             ]
 73 |             id2categoryid = {
 74 |                 i: self.label2id[c]
 75 |                 for i, c in zip(ids, categories)
 76 |             }
 77 |             # plus mapping to get dummy 255
 78 |             id2categoryid[0] = 0
 79 |             id2categoryid[255] = 0
 80 |             if len(np.unique(seg_mask)) != len(id2categoryid):
 81 |                 # when seg is wrong, there are mismatch
 82 |                 seg_mask = np.where(np.isin(seg_mask, list(id2categoryid)), seg_mask, 0)
 83 | 
 84 |             # rn if seg_mask == i, it's ith object, make it ith object's category
 85 |             mask = np.vectorize(id2categoryid.get)(seg_mask).astype('uint8')
 86 |             return Image.fromarray(mask)
 87 | 
 88 |         assert type(for_object) is int
 89 |         assert 1 <= for_object <= len(objects)
 90 |         id = for_object
 91 |         category = objects[id - 1].find("./name").text
 92 | 
 93 |         mask = np.where(seg_mask == id, self.label2id[category], 0).astype("uint8")
 94 |         return Image.fromarray(mask)
 95 | 
 96 |     def create_instance_mask(self):
 97 |         """
 98 |         instance mask where each non-dummy object is positive with id (starts from 1, NOT label id)
 99 |         0 if background dummy
100 |         """
101 |         seg_mask = np.array(Image.open(self.seg_img_path))
102 |         instance_mask = np.where(np.isin(seg_mask, [0, 255]), 0, seg_mask).astype("uint8")
103 |         objects = self.objects()
104 |         ids = list(range(1, len(objects) + 1))
105 |         categories = [
106 |             object.find("./name").text
107 |             for object in objects
108 |         ]
109 |         instance_mask_id2category = {
110 |             i: self.label2id[c]
111 |             for i, c in zip(ids, categories)
112 |         }
113 |         return Image.fromarray(instance_mask), instance_mask_id2category
114 | 
115 | 
116 | class EntityAnno(Anno):
117 |     def __init__(self, seg_img_path):
118 |         # eg data/voc2012/entity_mask/bottle_mask/2009_000562.png
119 |         self.seg_img_path = seg_img_path
120 |         _, label, filename = seg_img_path.rsplit("/", 2)
121 |         self.label = self.label2id[label.replace("_mask", "")]
122 | 
123 |     def objects(self):
124 |         return [self.label]
125 | 
126 |     def create_mask(self, for_object: Optional[int] = None):
127 |         # if for_object is not None:
128 |         #     assert for_object in self.objects()
129 |         # 0 or 255
130 |         mask = np.array(Image.open(self.seg_img_path))
131 |         mask = np.where(mask == 255, self.label, 0).astype("uint8")
132 |         return Image.fromarray(mask)
133 | 
134 |     def create_instance_mask(self):
135 |         instance_mask = np.array(Image.open(self.seg_img_path))
136 |         # 0 or 255
137 |         instance_mask = np.where(instance_mask == 255, 1, 0).astype("uint8")
138 |         return Image.fromarray(instance_mask), {1: self.label}
139 | 
140 | class COCOAnno(Anno):
141 |     def __init__(self, anno_path):
142 |         with open(anno_path) as f:
143 |             self.anno = json.load(f)
144 |         
145 |         self.id2annos = {
146 |             id: []
147 |             for id in self.objects()
148 |         }
149 |         for anno in self.anno["annotations"]:
150 |             self.id2annos[anno["category_id"]].append(anno)
151 | 
152 |     def size(self):
153 |         return int(self.anno['images']['height']), int(self.anno['images']['width'])
154 | 
155 |     def objects(self):
156 |         return sorted(set([
157 |             anno['category_id']
158 |             for anno in self.anno["annotations"]
159 |         ]))
160 | 
161 |     def create_mask(self, for_object: Optional[int] = None):
162 |         if for_object: # i-th (1 based)
163 |             category = self.objects()[for_object-1]
164 |             annos = self.id2annos[category]
165 |             mask = np.zeros(self.size(), dtype=int)
166 |             for anno in annos:
167 |                 objs = cocomask.frPyObjects(anno["segmentation"], *self.size())
168 |                 binary_mask = cocomask.decode(objs) # (h, w, n) binary {0 (dummy), 1 (obj)} where n is \# disjoint anno
169 |                 if binary_mask.ndim == 2:
170 |                     binary_mask = binary_mask[:, :, np.newaxis]
171 |                 for n in range(binary_mask.shape[-1]): #
172 |                     mask[binary_mask[:, :, n] == 1] = category
173 |                 # binary_mask = np.where(binary_mask == 1, category, 0)
174 |                 # mask = np.ma.mask_or(mask, binary_mask)
175 |             return Image.fromarray(mask.astype(np.uint8))
176 |         
177 |         mask = np.zeros(self.size(), dtype=int)
178 |         for i, category in enumerate(self.objects(), 1):
179 |             mask2 = self.create_mask(for_object = i)
180 |             mask[np.array(mask2) == category] = category
181 |         return Image.fromarray(mask.astype(np.uint8))
182 | 
183 |     def create_instance_mask(self):
184 |         instance_mask = np.zeros(self.size(), dtype=int)
185 |         instance_mask_id2category = {}
186 |         for anno in self.anno["annotations"]:
187 |             objs = cocomask.frPyObjects(anno["segmentation"], *self.size())
188 |             binary_mask = cocomask.decode(objs) # (h, w) binary {0 (dummy), 1 (obj)}
189 |             if binary_mask.ndim == 2:
190 |                 binary_mask = binary_mask[:, :, np.newaxis]
191 |             next_id = len(instance_mask_id2category) + 1
192 |             for n in range(binary_mask.shape[-1]): #
193 |                 instance_mask[binary_mask[:, :, n] == 1] = next_id
194 |             instance_mask_id2category[next_id] = anno['category_id']
195 | 
196 |         return Image.fromarray(instance_mask.astype(np.uint8)), instance_mask_id2category
--------------------------------------------------------------------------------
/cutpaste/background.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import random
  3 | from pathlib import Path
  4 | from typing import Optional, Iterator
  5 | 
  6 | import cv2
  7 | import numpy as np
  8 | from PIL import Image
  9 | from pycocotools import mask as cocomask
 10 | 
 11 | from anno import Anno
 12 | from foreground import CutObject, CutObjects
 13 | from pb import create_mask, poisson_blend
 14 | from pyblur3 import LinearMotionBlur
 15 | 
 16 | 
 17 | def binarize_mask(mask, set_boundary=False) -> np.ndarray:
 18 |     """
 19 |     make it 255 if occupied, regardless of pixel category
 20 |     """
 21 |     mask = np.array(mask)
 22 |     mask = np.where(mask > 0, 255, 0).astype('uint8')
 23 |     if set_boundary:
 24 |         mask[:, 0] = 255
 25 |         mask[:, -1] = 255
 26 |         mask[0, :] = 255
 27 |         mask[-1, :] = 255
 28 |     return mask
 29 | 
 30 | 
 31 | class PastedBackground:
 32 |     """
 33 |     background image to be pasted on
 34 |     """
 35 | 
 36 |     def __init__(self, imagepath: str, anno: Optional[Anno] = None):
 37 |         """
 38 |         if anno is None, no need to find annotation in given image, i.e. we ignore potential foregrounds
 39 |         """
 40 |         self.image: Image.Image = Image.open(imagepath)
 41 |         self.imagepath = imagepath
 42 |         if anno:
 43 |             self.ignore_foreground = False
 44 |             # semantic mask
 45 |             # 0 if dummy, positive int (label id for each of the potential object/foreground) is object mask
 46 |             self.mask = anno.create_mask(for_object=None)
 47 |             # Image Mask (each instance a unique id) & instance id to actual category (starts from 1)
 48 |             self.instance_mask, self.instance_mask_id2category = anno.create_instance_mask()
 49 |         else:
 50 |             self.ignore_foreground = True
 51 |             w, h = self.size
 52 |             self.mask = Image.fromarray(np.zeros((h, w), dtype="uint8"))
 53 |             self.instance_mask, self.instance_mask_id2category = self.mask.copy(), {}
 54 | 
 55 |     @property
 56 |     def size(self):
 57 |         """ w and h """
 58 |         return self.image.size
 59 | 
 60 |     def resize(self, out_size: int):
 61 |         self.image = self.image.resize((out_size, out_size), Image.LANCZOS)
 62 |         self.mask = self.mask.resize((out_size, out_size), Image.NEAREST)
 63 |         self.instance_mask = self.instance_mask.resize((out_size, out_size), Image.NEAREST)
 64 | 
 65 |     def find_paste_location(self,
 66 |                             foregrounds: CutObjects, max_degree, random_paste=False,
 67 |                             scale_factor=0, center_translation_factor=0, use_random_scaling=False):
 68 |         """
 69 |         modify self.mask in place (paste with foreground)
 70 |         return new foregrounds (after scaling and rotation)
 71 |         if random_paste: select random paste location (for abalation) and random scale 0.3-0.7
 72 |         """
 73 |         # 1. loop through all objects
 74 |         foregrounds.shuffle()
 75 |         foregrounds_to_paste = []
 76 |         locations = []
 77 |         for foreground_object in foregrounds:
 78 |             w, h = self.size
 79 |             if random_paste:
 80 |                 (x, y) = random.randint(0, h), random.randint(0, w)
 81 |                 radius2, center2 = foreground_object.min_enclosing_circle()
 82 |                 scaling = random.uniform(0.3, 0.7)
 83 |                 radius = scaling * radius2
 84 |             else:
 85 |                 # 2. find max inscribing circle in the background non-occupied area
 86 |                 radius, (x, y) = self.max_inscribing_circle()
 87 |                 if center_translation_factor != 0:  # translate center
 88 |                     sgn = 1 if random.random() < 0.5 else -1
 89 |                     x += sgn * h * (center_translation_factor / 100)
 90 |                     y += sgn * w * (center_translation_factor / 100)
 91 | 
 92 |                 # 3. compute per-object min enclosing circle
 93 |                 radius2, center2 = foreground_object.min_enclosing_circle()
 94 | 
 95 |                 # 4. after scale, make foreground_object align with max inscribing circle
 96 |                 if use_random_scaling:
 97 |                     scaling = random.uniform(0.3, 0.7)
 98 |                 else:
 99 |                     scaling = radius / radius2
100 |                     if scale_factor != 0:  # scale by @scale_factor %
101 |                         scaling *= (1 + scale_factor / 100)
102 |             try:
103 |                 foreground_object.scale(scaling)
104 |                 o_w, o_h = foreground_object.img.size
105 |                 assert w - o_w >= 0 and h - o_h >= 0 and o_w > 0 and o_h > 0
106 |             except:
107 |                 continue
108 | 
109 |             foreground_object.rotate(size=self.size, max_degree=max_degree)
110 | 
111 |             foregrounds_to_paste.append(foreground_object)
112 |             locations.append((
113 |                 int(x - radius), int(y - radius)
114 |             ))
115 |             self.mask.paste(foreground_object.mask, locations[-1],
116 |                             Image.fromarray(binarize_mask(foreground_object.mask)))
117 |             new_instance_id = len(self.instance_mask_id2category) + 1
118 |             self.instance_mask_id2category[new_instance_id] = foreground_object.category
119 |             self.instance_mask.paste(
120 |                 Image.fromarray(
121 |                     np.where(np.array(foreground_object.mask) == foreground_object.category, new_instance_id, 0).astype(
122 |                         "uint8")),
123 |                 locations[-1], Image.fromarray(binarize_mask(foreground_object.mask))
124 |             )
125 |             if self.ignore_foreground:
126 |                 # even if ignore foreground first, now it will not since one foreground was pasted
127 |                 self.ignore_foreground = False
128 | 
129 |         return locations, CutObjects(foregrounds_to_paste)
130 | 
131 |     def save(self, name, path=None):
132 |         """
133 |         before save, mask is label 1 - 20, and 0 if dummy
134 |         change it to 0-19 label (corresponding to labels.txt but minus 1), 255 if dummy
135 |         """
136 |         if path is None:
137 |             img_path = mask_path = Path(".")
138 |         else:
139 |             img_path = path / "Images"
140 |             mask_path = path / "Masks"
141 |         mask = self.mask
142 |         self.image.save(img_path / f"{name}.png")
143 |         mask.save(mask_path / f"{name}.png")
144 | 
145 |     def max_inscribing_circle(self):
146 |         """
147 |         max inscribing circle that contains all background objects
148 |         """
149 |         if self.ignore_foreground:
150 |             assert list(np.unique(self.mask)) == [0], "should be only 0 i.e. dummy"
151 |             w, h = self.size
152 |             x, y = random.randint(0, h), random.randint(0, w)
153 |             dist_x = min(abs(x), abs(x - h // 2))
154 |             dist_y = min(abs(y), abs(y - w // 2))
155 |             return min(dist_x, dist_y), (x, y)
156 | 
157 |         background_mask = binarize_mask(self.mask, set_boundary=True)
158 |         dist_map = cv2.distanceTransform(255 - background_mask, cv2.DIST_L2, cv2.DIST_MASK_PRECISE)
159 |         _, radius, _, center = cv2.minMaxLoc(dist_map)
160 |         return radius, center
161 | 
162 |     def paste(self, blending: str, paste_location: tuple, foreground_object: CutObject):
163 |         def LinearMotionBlur3C(img):
164 |             """Performs motion blur on an image with 3 channels. Used to simulate
165 |                blurring caused due to motion of camera.
166 |             Args:
167 |                 img(NumPy Array): Input image with 3 channels
168 |             Returns:
169 |                 Image: Blurred image by applying a motion blur with random parameters
170 |             """
171 | 
172 |             def randomAngle(kerneldim):
173 |                 """Returns a random angle used to produce motion blurring
174 |                 Args:
175 |                     kerneldim (int): size of the kernel used in motion blurring
176 |                 Returns:
177 |                     int: Random angle
178 |                 """
179 |                 kernelCenter = int(math.floor(kerneldim / 2))
180 |                 numDistinctLines = kernelCenter * 4
181 |                 validLineAngles = np.linspace(0, 180, numDistinctLines, endpoint=False)
182 |                 angleIdx = np.random.randint(0, len(validLineAngles))
183 |                 return int(validLineAngles[angleIdx])
184 | 
185 |             lineLengths = [3, 5, 7, 9]
186 |             lineTypes = ["right", "left", "full"]
187 |             lineLengthIdx = np.random.randint(0, len(lineLengths))
188 |             lineTypeIdx = np.random.randint(0, len(lineTypes))
189 |             lineLength = lineLengths[lineLengthIdx]
190 |             lineType = lineTypes[lineTypeIdx]
191 |             lineAngle = randomAngle(lineLength)
192 |             blurred_img = img
193 |             for i in range(3):
194 |                 blurred_img[:, :, i] = np.asarray(LinearMotionBlur(img[:, :, i], lineLength, lineAngle, lineType))
195 |             blurred_img = Image.fromarray(blurred_img, 'RGB')
196 |             return blurred_img
197 | 
198 |         x, y = paste_location
199 |         foreground = foreground_object.img
200 |         foreground_mask = Image.fromarray(np.where(np.array(foreground_object.mask) != 0, 255, 0).astype('uint8'))
201 |         background = self.image.copy()
202 |         if blending == 'none':
203 |             background.paste(foreground, (x, y), foreground_mask)
204 |         elif blending == 'motion':
205 |             background.paste(foreground, (x, y), foreground_mask)
206 |             background = LinearMotionBlur3C(np.asarray(background))
207 | 
208 |         elif blending == 'poisson':
209 |             offset = (y, x)
210 |             img_mask = np.asarray(foreground_mask)
211 |             img_src = np.asarray(foreground).astype(np.float64)
212 |             img_target = np.asarray(background)
213 |             img_mask, img_src, offset_adj \
214 |                 = create_mask(img_mask.astype(np.float64),
215 |                               img_target, img_src, offset=offset)
216 |             background_array = poisson_blend(img_mask, img_src, img_target,
217 |                                              method='normal', offset_adj=offset_adj)
218 |             background = Image.fromarray(background_array, 'RGB')
219 |         elif blending == 'gaussian':
220 |             background.paste(foreground, (x, y), Image.fromarray(
221 |                 cv2.GaussianBlur(np.asarray(foreground_mask), (5, 5), 2)))
222 |         elif blending == 'box':
223 |             background.paste(foreground, (x, y), Image.fromarray(
224 |                 cv2.blur(np.asarray(foreground_mask), (3, 3))))
225 |         else:
226 |             raise NotImplementedError
227 |         self.image = background
228 | 
229 |     def to_COCO_ann(self) -> Iterator[tuple]:
230 |         """ polygons, bbox, area """
231 |         for id, category in self.instance_mask_id2category.items():
232 |             mask = np.array(self.instance_mask)
233 |             mask = np.where(mask == id, mask, 0)
234 |             contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)[-2:]
235 |             polygons = []
236 |             for contour in contours:
237 |                 if contour.size >= 6:
238 |                     polygons.append(contour.flatten().tolist())
239 |             if len(polygons) == 0:
240 |                 continue
241 |             RLEs = cocomask.frPyObjects(polygons, mask.shape[0], mask.shape[0])
242 |             RLE = cocomask.merge(RLEs)
243 |             area = cocomask.area(RLE)
244 |             [x, y, w, h] = cv2.boundingRect(mask)
245 |             yield polygons, [x, y, w, h], float(area), category
--------------------------------------------------------------------------------
/cutpaste/clip_postprocess.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | from concurrent import futures
 3 | from pathlib import Path
 4 | 
 5 | import json
 6 | import torch
 7 | from PIL import Image
 8 | from tqdm.auto import tqdm
 9 | from transformers import CLIPProcessor, CLIPModel
10 | 
11 | voc_texts = [
12 |     f"a photo of {obj}"
13 |     for obj in [
14 |         "person",
15 |         "bird", 'cat', 'cow', 'dog', 'horse', 'sheep',
16 |         'aeroplane', 'airplane', 'bicycle', 'boat', 'bus', 'car', 'motorbike', 'train',
17 |         'bottle', 'chair', 'dining table', 'potted plant', 'sofa', "tv/ monitor"
18 |     ]
19 | ]
20 | 
21 | def batchify(lst, n):
22 |     """Yield successive n-sized chunks from lst."""
23 |     for i in range(0, len(lst), n):
24 |         yield lst[i:i + n]
25 | 
26 | @torch.no_grad()
27 | def get_CLIP_score(caption: str, images: list):
28 |     logits_per_images = []
29 |     for img in batchify(images, 400):
30 |         inputs = processor(text=[caption] + voc_texts, images=img, return_tensors="pt", padding=True).to("cuda")
31 |         outputs = model(**inputs)
32 |         logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
33 |         logits_per_images.append(logits_per_image)
34 |     # probs = logits_per_image.softmax(dim=1)  # we can take the softmax to get the label probabilities
35 |     return torch.cat(logits_per_images, dim=0)
36 | 
37 | def scores_for_one_caption(caption: Path):
38 |     keep_files = 30
39 |     images = []
40 |     for image in caption.iterdir(): # eg 1.png
41 |         try:
42 |             images.append(Image.open(image))
43 |         except:
44 |             pass # weird generation error
45 |     scores = get_CLIP_score(caption.stem, images) # (#images, 22)
46 | 
47 |     # 1. select top keep_files*2 lowest consistent_with_voc_labels
48 |     consistent_with_voc_labels = scores[:, 1:].max(1).values
49 |     double_keep_files = min(keep_files * 2, scores.size(0))
50 |     _, indices = torch.topk(-consistent_with_voc_labels.squeeze(), min(double_keep_files, scores.size(0)))
51 |     # 2. select top keep_files highest consistent_with_caption
52 |     consistent_with_caption = scores[indices, 0]
53 |     _, indices = torch.topk(consistent_with_caption, keep_files)
54 |     selected_images = [
55 |         images[i].filename.split("/")[-1]
56 |         for i in indices.detach().cpu().numpy().tolist()
57 |     ]
58 |     return caption.stem, selected_images
59 | 
60 | def sort_images(images):
61 |     return sorted(images, key=lambda x: int(x.split(".png")[0]))
62 | 
63 | if __name__ == "__main__":
64 |     pwd = Path(__file__).parent.resolve()
65 |     # root = pwd / "artifact" / "syn" / "voc_1k_bg" / "diffusion_wordnet_v1-10shot"
66 |     # root = pwd.parent / "data" / "voc2012" / "background" / "critical_distractor_v1-10shot"
67 |     # root = pwd.parent / "data" / "voc2012" / "background" / "critical_distractor_v1-10shot"
68 |     # root = pwd.parent / "data" / "voc2012" / "background" / "critical_wordnet_diffusion_v2-10shot"
69 |     # root = pwd.parent / "data" / "voc2012" / "background" / "diffusion_v1_600each"
70 |     # root = pwd.parent / "data" / "voc2012" / "background" / "critical_wordnet_diffusion_v2-1shot"
71 |     # root = pwd.parent / "data" / "voc2012" / "background" / "critical_wordnet_diffusion_v2-10shot_refined"
72 |     root = pwd.parent / "data" / "voc2012" / "background" / "critical_context_only-10shot"
73 |     # root = pwd.parent / "data" / "voc2012" / "background" / "context_augment"
74 | 
75 |     model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to("cuda").eval()
76 |     processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
77 | 
78 |     to_save = defaultdict(lambda: {})
79 | 
80 |     # for object in ['empty living room', 'railway without train', 'stable']:
81 |     for object in tqdm(root.iterdir(), total=len(list(root.iterdir()))): # eg a bicycle
82 |         object = root / object
83 |         if not object.name.endswith(".jpg"):
84 |             continue
85 |         captions = list(object.iterdir()) # eg a bicyle in a black background
86 |         with futures.ThreadPoolExecutor(80) as executor:
87 |             res = executor.map(scores_for_one_caption, captions)
88 |             for caption, images in res:
89 |                 to_save[object.stem][caption] = sort_images(images)
90 |     # with open(root / "clip_postprocessed.json", "w") as f:
91 |     #     json.dump(to_save, f, indent=4)
92 |     with open("clip_postprocessed.json", "w") as f:
93 |         json.dump(to_save, f, indent=4)
--------------------------------------------------------------------------------
/cutpaste/config/bg/VOC/1shot/bg_template.yaml:
--------------------------------------------------------------------------------
1 | - _target_: utils.read_background_template
2 |   data_dir: ${dataset_dir}/background/bg_template
3 | 
--------------------------------------------------------------------------------
/cutpaste/config/bg/VOC/1shot/bg_template_plus_dalle.yaml:
--------------------------------------------------------------------------------
1 | - _target_: utils.read_dalle_backgrounds
2 |   data_dir: ${dataset_dir}/background/context_augment
3 | - _target_: utils.read_background_template
4 |   data_dir: ${dataset_dir}/background/bg_template
--------------------------------------------------------------------------------
/cutpaste/config/bg/VOC/1shot/real.yaml:
--------------------------------------------------------------------------------
1 | _target_: utils.read_real_VOC_backgrounds
2 | 
3 | # assume dataset_dir: data/voc2012/
4 | data_list: ${dataset_dir}/train_cls-1shot.txt
5 | data_dir: ${dataset_dir}/VOC2012/JPEGImages
6 | anno_dir: ${dataset_dir}/VOC2012/Annotations
7 | seg_dir: ${dataset_dir}/VOC2012/SegmentationObject
--------------------------------------------------------------------------------
/cutpaste/config/config.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # specify here default training configuration
 4 | defaults:
 5 |   - _self_
 6 |   - ds: null
 7 |   - fg@fg_real: null
 8 |   - fg@fg_syn: null
 9 |   - bg@bg_syn: null
10 |   - bg@bg_real: null
11 |   - paster: defaults
12 |   - exp: null
13 | 
14 |   # enable color logging
15 |   - override hydra/hydra_logging: colorlog
16 |   - override hydra/job_logging: colorlog
17 | 
18 | # path to original working directory
19 | # hydra hijacks working directory by changing it to the new log directory
20 | # so it's useful to have this path as a special variable
21 | # https://hydra.cc/docs/next/tutorials/basic/running_your_app/working_directory
22 | work_dir: ${hydra:runtime.cwd}
23 | 
24 | dataset: ${ds.name}
25 | 
26 | name: ??? # name of exp
27 | 
28 | output_dir: ${work_dir}/../artifact/${dataset}/${name}
29 | 
30 | # seed for random number generators in pytorch, numpy and python.random
31 | seed: 42
32 | 
33 | hydra:
34 |   job:
35 |     chdir: True
--------------------------------------------------------------------------------
/cutpaste/config/ds/VOC.yaml:
--------------------------------------------------------------------------------
 1 | name: VOC
 2 | label2id:
 3 |   aeroplane: 1
 4 |   bicycle: 2
 5 |   bird: 3
 6 |   boat: 4
 7 |   bottle: 5
 8 |   bus: 6
 9 |   car: 7
10 |   cat: 8
11 |   chair: 9
12 |   cow: 10
13 |   diningtable: 11
14 |   dog: 12
15 |   horse: 13
16 |   motorbike: 14
17 |   person: 15
18 |   pottedplant: 16
19 |   sheep: 17
20 |   sofa: 18
21 |   train: 19
22 |   tvmonitor: 20
--------------------------------------------------------------------------------
/cutpaste/config/exp/demo_cutpaste.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | defaults:
 4 |   - override /ds: VOC
 5 |   - override /fg@fg_real: VOC/1shot/real # 1 shot real foregrounds
 6 |   - override /bg@bg_real: VOC/1shot/real # 1 shot real backgrounds
 7 | 
 8 | name: VOC-demo-1shot-cutpaste
 9 | dataset_dir: ${work_dir}/../data/test_data
10 | paster:
11 |   debug: True
12 | 
--------------------------------------------------------------------------------
/cutpaste/config/exp/demo_syn+real.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | defaults:
 4 |   - override /ds: VOC
 5 |   - override /fg@fg_real: VOC/1shot/real # 1 shot real foregrounds
 6 |   - override /fg@fg_syn: VOC/fg_template # zero-shot fg template
 7 |   - override /bg@bg_real: VOC/1shot/real # 1 shot real backgrounds
 8 |   - override /bg@bg_syn: VOC/1shot/bg_template_plus_dalle # zero-shot bg template + 1 shot context augmented captions
 9 | 
10 | name: VOC-demo-1shot-syn+real
11 | dataset_dir: ${work_dir}/../data/test_data
12 | paster:
13 |   debug: True
--------------------------------------------------------------------------------
/cutpaste/config/exp/demo_syn.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | defaults:
 4 |   - override /ds: VOC
 5 |   - override /fg@fg_syn: VOC/fg_template # zero-shot fg template
 6 |   - override /bg@bg_syn: VOC/1shot/bg_template_plus_dalle # zero-shot bg template + 1 shot context augmented captions
 7 | 
 8 | name: VOC-demo-1shot-syn
 9 | dataset_dir: ${work_dir}/../data/test_data
10 | paster:
11 |   debug: True
12 | 
--------------------------------------------------------------------------------
/cutpaste/config/exp/demo_synFg.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | defaults:
 4 |   - override /ds: VOC
 5 |   - override /fg@fg_real: VOC/1shot/real # 1 shot real foregrounds
 6 |   - override /fg@fg_syn: VOC/fg_template # zero-shot fg template
 7 |   - override /bg@bg_real: VOC/1shot/real # 1 shot real backgrounds
 8 | 
 9 | name: VOC-demo-1shot-synFg
10 | dataset_dir: ${work_dir}/../data/test_data
11 | paster:
12 |   debug: True
13 | 
--------------------------------------------------------------------------------
/cutpaste/config/fg/VOC/1shot/real.yaml:
--------------------------------------------------------------------------------
1 | _target_: utils.read_real_VOC_foregrounds
2 | 
3 | # assume dataset_dir: data/voc2012/
4 | data_list: ${dataset_dir}/train_cls-1shot.txt
5 | data_dir: ${dataset_dir}/VOC2012/JPEGImages
6 | anno_dir: ${dataset_dir}/VOC2012/Annotations
7 | seg_dir: ${dataset_dir}/VOC2012/SegmentationObject
--------------------------------------------------------------------------------
/cutpaste/config/fg/VOC/fg_template.yaml:
--------------------------------------------------------------------------------
1 | _target_: utils.read_entity_foregrounds
2 | 
3 | dataset: ${dataset}
4 | rgb_dir: ${dataset_dir}/foreground/foreground_rgb
5 | mask_dir: ${dataset_dir}/foreground/foreground_mask
6 | 
--------------------------------------------------------------------------------
/cutpaste/config/paster/defaults.yaml:
--------------------------------------------------------------------------------
 1 | debug: False # debug mode, if True, only paste 40 images for debugging purpose
 2 | 
 3 | r: 2 # number of repeat
 4 | repeat_background: ${paster.r} # alias
 5 | c: 4 # number of foregrounds pasted onto each background
 6 | num_cut_images: ${paster.c} # alias
 7 | j: 1 # number of workers, 1 means no multiprocessing, to accelerate, set to something like 50
 8 | max_workers: ${paster.j} # alias
 9 | 
10 | # random scale in percentage
11 | scale_factor: 0
12 | # random center translation in percentage
13 | center_translation_factor: 0
14 | # if true, after find circle still random scale instead of matching radius
15 | use_random_scaling: False
16 | # if set, each time sample num_cut from [lowerbound, num_cut_images]
17 | num_cut_lowerbound: null
18 | # if true, ablation study of random paste images in anywhere
19 | use_random_paste: False
20 | 
21 | select_prob: uniform # how to sample backgrounds
22 | out_size: 512
23 | max_degree: 30
24 | 
25 | # if set, only paste [start:to] instead of entire
26 | start: null
27 | to: null
28 | 
--------------------------------------------------------------------------------
/cutpaste/foreground.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from typing import List
 3 | 
 4 | import cv2
 5 | import numpy as np
 6 | from PIL import Image
 7 | 
 8 | from anno import Anno
 9 | 
10 | 
11 | def get_box(mask):
12 |     rows = np.any(mask, axis=1)
13 |     cols = np.any(mask, axis=0)
14 |     assert len(np.where(rows)[0]) > 0
15 |     ymin, ymax = np.where(rows)[0][[0, -1]]
16 |     xmin, xmax = np.where(cols)[0][[0, -1]]
17 |     assert ymax >= ymin and xmax >= xmin
18 |     return int(xmin), int(xmax), int(ymin), int(ymax)
19 | 
20 | 
21 | def get_area(xmin, xmax, ymin, ymax):
22 |     return (xmax - xmin) * (ymax - ymin)
23 | 
24 | 
25 | class CutObject:
26 |     """
27 |     mask object
28 |     input mask dummy is 0, positive if occupied (dep on category, 1-20 for VOC)
29 |     """
30 |     def __init__(self, img_path: str, img: Image.Image, mask: Image.Image):
31 |         self.img_path = img_path
32 |         self.img = img
33 |         self.mask = mask
34 |         uniques = set(np.unique(self.mask))
35 |         assert len(uniques) in [2, 1]  # it's possible to get perfect mask, so only positive val (1)
36 |         # if not 0, then it's category for mask
37 |         uniques: set = uniques.difference({0})
38 |         self.category, = uniques
39 |         self.category_name = Anno.id2label[self.category]
40 | 
41 | 
42 |     def min_enclosing_circle(self):
43 |         contours, _ = cv2.findContours(np.array(self.mask), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)[-2:]
44 |         center, radius = cv2.minEnclosingCircle(np.concatenate(contours, 0))
45 |         return radius, center
46 | 
47 |     def scale(self, scaling_factor: float):
48 |         orig_w, orig_h = self.img.size
49 |         o_w, o_h = int(scaling_factor * orig_w), int(scaling_factor * orig_h)
50 |         self.img = self.img.resize((o_w, o_h), Image.ANTIALIAS)
51 |         self.mask = self.mask.resize((o_w, o_h), Image.NEAREST)
52 | 
53 |     def rotate(self, size, max_degree=60):
54 |         w, h = size
55 |         while True:
56 |             rot_degrees = random.randint(-max_degree, max_degree)
57 |             foreground_tmp = self.img.rotate(rot_degrees, expand=True)
58 |             foreground_mask_tmp = self.mask.rotate(rot_degrees, expand=True)
59 |             o_w, o_h = foreground_tmp.size
60 |             if w - o_w > 0 and h - o_h > 0:
61 |                 break
62 |         self.img = foreground_tmp
63 |         self.mask = foreground_mask_tmp
64 | 
65 |     def save(self, name):
66 |         self.img.save(f"{name}-fg-image.png")
67 |         self.mask.save(f"{name}-fg-mask.png")
68 | 
69 | class CutObjects(list):
70 |     """
71 |     list of objects (i.e. foregrounds) to cut, and later will be pasted on PastedImage
72 |       can contain multiple foregrounds from the same image
73 |     """
74 |     def __init__(self, *args):
75 |         super().__init__(*args)
76 |         self: List[CutObject]
77 | 
78 |     def add_image(self, img_path, foreground_anno: Anno, area_threshold=700):
79 |         """
80 |         add per-object mask of the given image
81 |         only add if area exceeds area_threshold
82 |         """
83 |         foreground_img = Image.open(img_path)
84 |         for i, foreground_object in enumerate(foreground_anno.objects(), 1):
85 |             """
86 |             binary mask, 0 is dummy, positive int (label id for ith object) is object mask
87 |             """
88 |             foreground_mask = foreground_anno.create_mask(for_object=i)
89 |             xmin, xmax, ymin, ymax = get_box(foreground_mask)
90 |             if get_area(xmin, xmax, ymin, ymax) < area_threshold:
91 |                 continue
92 |             foreground = foreground_img.crop((xmin, ymin, xmax, ymax))
93 |             foreground_mask = foreground_mask.crop((xmin, ymin, xmax, ymax))
94 |             self.append(CutObject(img_path, foreground, foreground_mask))
95 |         return self
96 | 
97 |     def shuffle(self):
98 |         random.shuffle(self)
--------------------------------------------------------------------------------
/cutpaste/paste.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | import shutil
  4 | from pathlib import Path
  5 | 
  6 | import hydra
  7 | from omegaconf import DictConfig, open_dict, ListConfig
  8 | import numpy as np
  9 | 
 10 | from paster import Paster
 11 | 
 12 | from logging import Logger, getLogger
 13 | 
 14 | logger = getLogger(__file__)
 15 | @hydra.main(config_path="config", config_name="config", version_base="1.2")
 16 | def paste(cfg: DictConfig):
 17 |     assert cfg.get("dataset") and cfg.get("name")
 18 |     assert cfg['paster'].get("select_prob") in ["uniform", "balanced"]
 19 |     with open_dict(cfg):
 20 |         if not cfg.get("dataset_dir"):
 21 |             if cfg.dataset == "VOC":
 22 |                 cfg.dataset_dir = Path(cfg.work_dir).parent / "data" / "voc2012"
 23 |             else:
 24 |                 cfg.dataset_dir = Path(cfg.work_dir).parent / "data" / "COCO2017"
 25 |         else:
 26 |             cfg.dataset_dir = Path(cfg.dataset_dir)
 27 |         cfg.output_dir = Path(cfg.output_dir)
 28 |         if cfg.get("debug") and cfg.output_dir.exists():
 29 |             shutil.rmtree(cfg.output_dir)
 30 |         os.makedirs(cfg.output_dir, exist_ok=True)
 31 | 
 32 |     np.random.seed(cfg.seed)
 33 |     random.seed(cfg.seed)
 34 |     os.environ["PYTHONHASHSEED"] = str(cfg.seed)
 35 |     paster = Paster(
 36 |         label2id=cfg.ds.label2id,
 37 |         out_size=cfg.paster.out_size,
 38 |         repeat_background=cfg.paster.repeat_background,
 39 |         select_prob=cfg.paster.select_prob,
 40 |         random_paste=cfg.paster.use_random_paste,
 41 |     )
 42 |     if (cfg.output_dir / "paster.pt").exists(): # if paster is saved, load it instead of creating a new one
 43 |         paster = paster.from_save(cfg.output_dir)
 44 |     else:
 45 |         #### foreground
 46 |         if cfg.get("fg_real"):
 47 |             # from utils import read_real_VOC
 48 |             paster.foregrounds.extend(hydra.utils.call(cfg.fg_real))
 49 |         if cfg.get("fg_syn"):
 50 |             # from utils import read_entity_foregrounds
 51 |             paster.foregrounds.extend(hydra.utils.call(cfg.fg_syn))
 52 |         #### background
 53 |         if cfg.get("bg_real"):
 54 |             # from utils import read_real_VOC
 55 |             paster.backgrounds.extend(hydra.utils.call(cfg.bg_real))
 56 |         if cfg.get("bg_syn"):
 57 |             # from utils import read_dalle_backgrounds
 58 |             assert isinstance(cfg.bg_syn, ListConfig)
 59 |             for syn_cfg in cfg.bg_syn:
 60 |                 paster.backgrounds.extend(hydra.utils.call(syn_cfg))
 61 | 
 62 |         paster.save(cfg.output_dir)
 63 | 
 64 |     paster.validate()
 65 |     if cfg.paster.debug:
 66 |         random.shuffle(paster.backgrounds)
 67 |         random.shuffle(paster.foregrounds)
 68 |         paster.truncate(slice(0, 40))
 69 |         cfg.paster.max_workers = 1
 70 |         cfg.output_dir /= "debug"
 71 |         if cfg.output_dir.exists():
 72 |             shutil.rmtree(cfg.output_dir)
 73 |     else:
 74 |         random_str = "-random" if cfg.use_random_paste else ""
 75 |         prob_str = '' if cfg.select_prob == 'uniform' else "-balanced"
 76 |         num_cut = cfg.num_cut_images if cfg.num_cut_lowerbound is None else f"{cfg.num_cut_lowerbound}~{cfg.num_cut_images}"
 77 |         cfg.output_dir = cfg.output_dir \
 78 |                          / f"[{cfg.foreground},{cfg.background}]{random_str}{prob_str}[{cfg.repeat_each_image},{num_cut},{cfg.max_degree},{cfg.out_size}]"
 79 | 
 80 |     os.makedirs(cfg.output_dir, exist_ok=True)
 81 | 
 82 |     if cfg.paster.get("start") and cfg.paster.get("to"):
 83 |         slice_idx = slice(cfg.paster.start, cfg.paster.to)
 84 |         paster.truncate(slice_idx)
 85 | 
 86 |     logger.info(f"size of background {len(paster)}; size of foreground {len(paster.foregrounds)}")
 87 |     logger.info(f"saving to {cfg.output_dir}")
 88 | 
 89 |     # TODO, move json
 90 |     shutil.copy(Path(os.getcwd()) / ".hydra" / "config.yaml", cfg.output_dir / "config.yaml")
 91 |     shutil.copy(Path(os.getcwd()) / ".hydra" / "overrides.yaml", cfg.output_dir / "overrides.yaml")
 92 |     paster.cut_and_paste(
 93 |         out_dir=cfg.output_dir,
 94 |         max_workers=cfg.paster.max_workers,
 95 |         num_cut_images=cfg.paster.num_cut_images, max_degree=cfg.paster.max_degree,
 96 |         num_cut_lowerbound=cfg.paster.num_cut_lowerbound,
 97 |         scale_factor=cfg.paster.scale_factor, center_translation_factor=cfg.paster.center_translation_factor,
 98 |         use_random_scaling=cfg.paster.use_random_scaling
 99 |     )
100 | 
101 | 
102 | if __name__ == "__main__":
103 |     paste()
--------------------------------------------------------------------------------
/cutpaste/paster.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import random
  4 | from concurrent import futures
  5 | from copy import deepcopy
  6 | from functools import partial
  7 | from pathlib import Path
  8 | from typing import Union, Optional, List
  9 | 
 10 | import numpy as np
 11 | import ujson as json
 12 | # from detectron2.utils.env import seed_all_rng
 13 | from tqdm import tqdm
 14 | 
 15 | from anno import Anno
 16 | from background import PastedBackground
 17 | from foreground import CutObjects
 18 | from utils import convert_to_COCO
 19 | 
 20 | 
 21 | class Paster:
 22 |     """
 23 |     paste @self.foregrounds into @self.backgrounds
 24 |     """
 25 | 
 26 |     def __init__(self, label2id: dict,
 27 |                  out_size: int = 512,
 28 |                  repeat_background: int = 1, select_prob: str = "uniform",
 29 |                  random_paste=False):
 30 |         """
 31 |         out_dir/
 32 |             foregrounds.csv
 33 |             backgrounds.csv # without repeat
 34 |             TODO
 35 |             xxx
 36 |         Args:
 37 |             label2id: dict with label text & id
 38 |             repeat_background: # times background image is repeated
 39 |             select_prob: how to select foreground
 40 |             random_paste: whether to use random paste, if False use space maximization paste
 41 |         """
 42 |         # blending_list = ['gaussian', 'poisson', 'none', 'box', 'motion']
 43 |         assert select_prob in ["uniform", "balanced", "supercategory_CDI", "supercategory"]
 44 |         # seed_all_rng(seed)
 45 |         # TODO
 46 |         self.select_prob = select_prob
 47 |         self.random_paste = random_paste
 48 |         self.out_size = out_size
 49 |         self.blending_list: List[str] = ['gaussian', ]
 50 |         assert all(b in ['gaussian', 'poisson', 'none', 'box', 'motion'] for b in self.blending_list)
 51 |         self.repeat_background = repeat_background  # repeat background only
 52 |         self.backgrounds = []
 53 |         self.foregrounds = []
 54 | 
 55 |         self.id2label = {v: k for k, v in label2id.items()}
 56 |         self.label2id = dict(label2id)
 57 |         Anno.label2id = label2id
 58 |         Anno.id2label = self.id2label
 59 | 
 60 |     def aggregate_json(self,
 61 |                        input_dir: Path, max_workers=1, json_name="COCO"):
 62 |         """
 63 |         convert instance mask to COCO format
 64 |         input_dir must contain @image_folder folder for pasted images, and json are saved here
 65 |         """
 66 |         output_json_dict = {
 67 |             "images": [],
 68 |             "type": "instances",
 69 |             "annotations": [],
 70 |             "categories": [
 71 |                 {'supercategory': 'none', 'id': label_id, 'name': label}
 72 |                 for label, label_id in self.label2id.items()
 73 |             ]
 74 |         }
 75 | 
 76 |         def read_json(path):
 77 |             with open(input_dir / "tmp" / path) as f:
 78 |                 return json.load(f)
 79 | 
 80 |         files = list(os.listdir(input_dir / "tmp"))
 81 |         with tqdm(total=len(files), desc="COCO agg") as pbar, \
 82 |                 futures.ThreadPoolExecutor(max_workers) as executor:
 83 |             todos = []
 84 |             bnd_id = 0  # coco need integer bnd ids
 85 |             for file in files:
 86 |                 todos.append(executor.submit(read_json, file))
 87 |             for future in futures.as_completed(todos):
 88 |                 data = future.result()
 89 |                 output_json_dict["images"].extend(data["images"])
 90 |                 anno = data["annotations"]
 91 |                 for bbox in anno:
 92 |                     bbox["id"] = bnd_id
 93 |                     bnd_id += 1
 94 |                 output_json_dict["annotations"].extend(anno)
 95 |                 pbar.update(1)
 96 |         print(f"saving to {input_dir}")
 97 |         with open(input_dir / f"{json_name}.json", "w") as f:
 98 |             json.dump(output_json_dict, f)
 99 |         with open(input_dir / "label2id.json", "w") as f:
100 |             json.dump(dict(self.label2id), f, indent=4)
101 |         # can rm tmp folder if you want
102 |         # shutil.rmtree(input_dir / "tmp")
103 | 
104 |     def cut_and_paste_one_image(self, i: int, out_dir: Path, out_size: int,
105 |                                 num_cut_images: int, blending_list: List[str], probs: List[float],
106 |                                 num_cut_lowerbound: Optional[int],
107 |                                 max_degree: int, random_paste: bool, scale_factor: int, center_translation_factor: int,
108 |                                 use_random_scaling: bool):
109 |         """ return path of background image + all objects (original bg + pasted fg) for this image """
110 |         background: PastedBackground = deepcopy(self.backgrounds[i])
111 |         path_to_save = f"{Path(background.imagepath).stem}_{i}"
112 |         if (out_dir / "tmp" / f"{path_to_save}.json").exists():
113 |             return
114 | 
115 |         background.resize(out_size)
116 |         if num_cut_lowerbound:
117 |             k = random.randint(num_cut_lowerbound, num_cut_images)
118 |         else:
119 |             k = num_cut_images
120 |         cut_images = random.choices(self.foregrounds, k=k, weights=probs)
121 |         foregrounds = CutObjects(cut_images)
122 |         locations, foregrounds = background.find_paste_location(foregrounds, max_degree=max_degree,
123 |                                                                 random_paste=random_paste, scale_factor=scale_factor,
124 |                                                                 center_translation_factor=center_translation_factor,
125 |                                                                 use_random_scaling=use_random_scaling)
126 | 
127 |         for blending in blending_list:
128 |             pasted_background = deepcopy(background)
129 |             for paste_location, foreground_object in zip(locations, foregrounds):
130 |                 pasted_background.paste(
131 |                     foreground_object=foreground_object, paste_location=paste_location, blending=blending)
132 |             pasted_background.save(path=out_dir, name=f"{path_to_save}_{blending}")
133 | 
134 |         convert_to_COCO(out_dir, path_to_save, background, blending_list)
135 |     
136 |     def foreground_sample_prob_by_supercategory(self, bg_filename, probs):
137 |         """
138 |         either sample based on supercategory of @bg_filename
139 |         or fallback to @probs
140 |         """
141 |         if "background" in bg_filename and self.select_prob == "supercategory_CDI": # bgtemplate
142 |             return probs # random select fg on bg template images
143 |         return probs
144 | 
145 |     def save(self, output_dir: Path):
146 |         import torch
147 |         with open(output_dir / "paster.json", "w") as f:
148 |             json.dump({
149 |                 "counts": [len(self.foregrounds), len(self)],
150 |                 "foreground": [str(fg.img_path) for fg in self.foregrounds],
151 |                 "background": [str(bg.imagepath) for bg in self.backgrounds],
152 |             }, f)
153 |         torch.save(self, output_dir / "paster.pt")
154 |         # with open(output_dir / "paster.pt", "w") as f:
155 | 
156 |     @staticmethod
157 |     def from_save(input_dir: Path):
158 |         import torch
159 |         return torch.load(input_dir / "paster.pt")
160 | 
161 |     def get_select_prob(self, select_prob) -> np.ndarray:
162 |         """
163 |         return selection prob for each ele of @self.foregrounds
164 |         """
165 |         if select_prob == "uniform":  # uniform over provided data, thus can be balanced and reflect distribution of bg
166 |             probs = np.ones(len(self.foregrounds)) / len(self.foregrounds)
167 |         else:  # balanced based on label st each label shows up equal likely
168 |             labels = np.array([label for _, label, _, _ in self.foregrounds])  # (N, )
169 |             probs = np.zeros_like(labels).astype(float)
170 |             for class_i in np.unique(labels):
171 |                 class_indices = labels == class_i  # boolean (N, )
172 |                 num_samples_class_i = class_indices.sum()
173 |                 assert num_samples_class_i > 0
174 |                 probs[class_indices] = 1.0 / num_samples_class_i
175 |             probs /= probs.sum()  # st sum(probs) == 1
176 |             """
177 |             # check if probs work
178 |             labels = []
179 |             for _ in range(10000):
180 |                 cut_images = random.choices(self.backgrounds, k=4, weights=probs)
181 |                 for _, l, _, _ in cut_images:
182 |                     labels.append(l)
183 |             from collections import Counter
184 |             ct = Counter(labels)
185 |             print(ct) # should be almost same number for each class
186 |             """
187 |         assert len(probs) == len(self.foregrounds)
188 |         return probs
189 | 
190 |     def __len__(self): # len def as background
191 |         return len(self.backgrounds)
192 | 
193 |     def truncate(self, slice):
194 |         self.backgrounds = self.backgrounds[slice]
195 |     
196 |     def validate(self):
197 |         assert len(self.foregrounds) > 0 and len(self.backgrounds) > 0
198 | 
199 |     def cut_and_paste(
200 |         self, out_dir: Path, num_cut_images: int = 2, max_workers=1,
201 |         # rotate
202 |         max_degree: int = 30,
203 |         # variant
204 |         scale_factor=0, center_translation_factor=0, use_random_scaling=False,
205 |         num_cut_lowerbound: Optional[int] = None
206 |     ):
207 |         """
208 |         will create the following in @out_dir:
209 |             Images folder: pasted RGB images
210 |             Masks folder: semantic level segmentation mask
211 |             COCO.json: instance level COCO segmentation annotation
212 |         """
213 |         self.validate()
214 |         self.backgrounds = self.backgrounds * self.repeat_background
215 |         self.save(output_dir=out_dir) # with updated backgrounds
216 |         os.makedirs(out_dir / "Images", exist_ok=True)
217 |         os.makedirs(out_dir / "Masks", exist_ok=True)
218 | 
219 |         probs = self.get_select_prob(self.select_prob)
220 | 
221 |         cut_and_paste_one_image = partial(
222 |             self.cut_and_paste_one_image, out_dir=out_dir, out_size=self.out_size, probs=probs,
223 |             num_cut_images=num_cut_images, max_degree=max_degree, blending_list=self.blending_list,
224 |             num_cut_lowerbound=num_cut_lowerbound,
225 |             random_paste=self.random_paste, scale_factor=scale_factor, center_translation_factor=center_translation_factor,
226 |             use_random_scaling=use_random_scaling)
227 |         for i in list(range(len(self))):
228 |             cut_and_paste_one_image(i)
229 | 
230 |         # todos = []
231 |         # with tqdm(total=len(self), desc="cutpaste") as pbar, \
232 |         #         futures.ThreadPoolExecutor(max_workers) as executor:
233 |         #     for i in list(range(len(self))):
234 |         #         todos.append(executor.submit(
235 |         #             cut_and_paste_one_image, i))
236 |         #     for future in futures.as_completed(todos):
237 |         #         future.result()
238 |         #         pbar.update(1)
239 |         print("converting to COCO format")
240 |         self.aggregate_json(out_dir, max_workers, json_name="COCO")
--------------------------------------------------------------------------------
/cutpaste/pb.py:
--------------------------------------------------------------------------------
  1 | """
  2 | pb: Poisson Image Blending implemented by Python
  3 | """
  4 | 
  5 | import matplotlib.pyplot as plt
  6 | import numpy as np
  7 | import scipy.sparse
  8 | from skimage import io
  9 | 
 10 | 
 11 | def create_mask(img_mask, img_target, img_src, offset=(0, 0)):
 12 |     '''
 13 |     Takes the np.array from the grayscale image
 14 |     '''
 15 | 
 16 |     # crop img_mask and img_src to fit to the img_target
 17 |     hm, wm = img_mask.shape
 18 |     ht, wt, nl = img_target.shape
 19 | 
 20 |     hd0 = max(0, -offset[0])
 21 |     wd0 = max(0, -offset[1])
 22 | 
 23 |     hd1 = hm - max(hm + offset[0] - ht, 0)
 24 |     wd1 = wm - max(wm + offset[1] - wt, 0)
 25 | 
 26 |     mask = np.zeros((hm, wm))
 27 |     mask[img_mask > 0] = 1
 28 |     mask[img_mask == 0] = 0
 29 | 
 30 |     mask = mask[hd0:hd1, wd0:wd1]
 31 |     src = img_src[hd0:hd1, wd0:wd1]
 32 | 
 33 |     # fix offset
 34 |     offset_adj = (max(offset[0], 0), max(offset[1], 0))
 35 | 
 36 |     # remove edge from the mask so that we don't have to check the
 37 |     # edge condition
 38 |     mask[:, -1] = 0
 39 |     mask[:, 0] = 0
 40 |     mask[-1, :] = 0
 41 |     mask[0, :] = 0
 42 | 
 43 |     return mask, src, offset_adj
 44 | 
 45 | 
 46 | def get_gradient_sum(img, i, j, h, w):
 47 |     """
 48 |     Return the sum of the gradient of the source imgae.
 49 |     * 3D array for RGB
 50 |     """
 51 | 
 52 |     v_sum = np.array([0.0, 0.0, 0.0])
 53 |     v_sum = img[i, j] * 4 \
 54 |             - img[i + 1, j] - img[i - 1, j] - img[i, j + 1] - img[i, j - 1]
 55 | 
 56 |     return v_sum
 57 | 
 58 | 
 59 | def get_mixed_gradient_sum(img_src, img_target, i, j, h, w, ofs,
 60 |                            c=1.0):
 61 |     """
 62 |     Return the sum of the gradient of the source imgae.
 63 |     * 3D array for RGB
 64 |     c(>=0): larger, the more important the target image gradient is
 65 |     """
 66 | 
 67 |     v_sum = np.array([0.0, 0.0, 0.0])
 68 |     nb = np.array([[1, 0], [-1, 0], [0, 1], [0, -1]])
 69 | 
 70 |     for kk in range(4):
 71 |         fp = img_src[i, j] - img_src[i + nb[kk, 0], j + nb[kk, 1]]
 72 |         gp = img_target[i + ofs[0], j + ofs[1]] \
 73 |              - img_target[i + nb[kk, 0] + ofs[0], j + nb[kk, 1] + ofs[1]]
 74 | 
 75 |         # if np.linalg.norm(fp) > np.linalg.norm(gp):
 76 |         #     v_sum += fp
 77 |         # else:
 78 |         #     v_sum += gp
 79 | 
 80 |         v_sum += np.array([fp[0] if abs(fp[0] * c) > abs(gp[0]) else gp[0],
 81 |                            fp[1] if abs(fp[1] * c) > abs(gp[1]) else gp[1],
 82 |                            fp[2] if abs(fp[2] * c) > abs(gp[2]) else gp[2]])
 83 | 
 84 |     return v_sum
 85 | 
 86 | 
 87 | def poisson_blend(img_mask, img_src, img_target, method='mix', c=1.0,
 88 |                   offset_adj=(0, 0)):
 89 |     hm, wm = img_mask.shape
 90 |     region_size = hm * wm
 91 | 
 92 |     F = np.zeros((region_size, 3))
 93 |     A = scipy.sparse.identity(region_size, format='lil')
 94 | 
 95 |     get_k = lambda i, j: i + j * hm
 96 | 
 97 |     # plane insertion
 98 |     if method in ['target', 'src']:
 99 |         for i in range(hm):
100 |             for j in range(wm):
101 |                 k = get_k(i, j)
102 | 
103 |                 # ignore the edge case (# of neighboor is always 4)
104 |                 if img_mask[i, j] == 1:
105 | 
106 |                     if method == 'target':
107 |                         F[k] = img_target[i + offset_adj[0], j + offset_adj[1]]
108 |                     elif method == 'src':
109 |                         F[k] = img_src[i, j]
110 |                 else:
111 |                     F[k] = img_target[i + offset_adj[0], j + offset_adj[1]]
112 | 
113 |     # poisson blending
114 |     else:
115 |         if method == 'mix':
116 |             grad_func = lambda ii, jj: get_mixed_gradient_sum(
117 |                 img_src, img_target, ii, jj, hm, wm, offset_adj, c=c)
118 |         else:
119 |             grad_func = lambda ii, jj: get_gradient_sum(
120 |                 img_src, ii, jj, hm, wm)
121 | 
122 |         for i in range(hm):
123 |             for j in range(wm):
124 |                 k = get_k(i, j)
125 | 
126 |                 # ignore the edge case (# of neighboor is always 4)
127 |                 if img_mask[i, j] == 1:
128 |                     f_star = np.array([0.0, 0.0, 0.0])
129 | 
130 |                     if img_mask[i - 1, j] == 1:
131 |                         A[k, k - 1] = -1
132 |                     else:
133 |                         f_star += img_target[i - 1 +
134 |                                              offset_adj[0], j + offset_adj[1]]
135 | 
136 |                     if img_mask[i + 1, j] == 1:
137 |                         A[k, k + 1] = -1
138 |                     else:
139 |                         f_star += img_target[i + 1 +
140 |                                              offset_adj[0], j + offset_adj[1]]
141 | 
142 |                     if img_mask[i, j - 1] == 1:
143 |                         A[k, k - hm] = -1
144 |                     else:
145 |                         f_star += img_target[i +
146 |                                              offset_adj[0], j - 1 + offset_adj[1]]
147 | 
148 |                     if img_mask[i, j + 1] == 1:
149 |                         A[k, k + hm] = -1
150 |                     else:
151 |                         f_star += img_target[i +
152 |                                              offset_adj[0], j + 1 + offset_adj[1]]
153 | 
154 |                     A[k, k] = 4
155 |                     F[k] = grad_func(i, j) + f_star
156 | 
157 |                 else:
158 |                     F[k] = img_target[i + offset_adj[0], j + offset_adj[1]]
159 | 
160 |     A = A.tocsr()
161 | 
162 |     img_pro = np.empty_like(img_target.astype(np.uint8))
163 |     img_pro[:] = img_target.astype(np.uint8)
164 | 
165 |     for l in range(3):
166 |         # x = pyamg.solve(A, F[:, l], verb=True, tol=1e-15, maxiter=100)
167 |         x = scipy.sparse.linalg.spsolve(A, F[:, l])
168 |         x[x > 255] = 255
169 |         x[x < 0] = 0
170 |         x = np.array(x, img_pro.dtype)
171 | 
172 |         img_pro[offset_adj[0]:offset_adj[0] + hm,
173 |         offset_adj[1]:offset_adj[1] + wm, l] \
174 |             = x.reshape(hm, wm, order='F')
175 | 
176 |     return img_pro
177 | 
178 | 
179 | if __name__ == "__main__":
180 |     offset = (40, -30)
181 |     img_mask = io.imread('/Users/ysakamoto/Projects/sccomp/mask.png', as_grey=True)
182 |     img_src = io.imread('./testimages/0.png').astype(np.float64)
183 |     img_target = io.imread('./testimages/0.png')
184 | 
185 |     # img_src = io.imread('./testimages/test1_src.png').astype(np.float64)
186 |     # img_target = io.imread('./testimages/test1_target.png')
187 |     # img_mask = io.imread('./testimages/test1_mask.png', as_grey=True)
188 | 
189 |     # resize src and mask images
190 |     # import skimage.transform
191 |     # from skimage import color
192 |     # fac = 3
193 |     # img_src = skimage.transform.resize(img_src, (np.array(img_src.shape)//fac)[:2])
194 |     # img_mask = io.imread('/Users/ysakamoto/Desktop/mask.png', as_grey=True)
195 |     # img_mask = skimage.transform.resize(img_mask, (np.array(img_mask.shape)//fac)[:2])
196 |     # img_mask = color.rgb2grey(img_mask)
197 | 
198 |     img_mask, img_src, offset_adj \
199 |         = create_mask(img_mask.astype(np.float64),
200 |                       img_target, img_src, offset=offset)
201 | 
202 |     img_pro = poisson_blend(img_mask, img_src, img_target,
203 |                             method='normal', offset_adj=offset_adj)
204 |     plt.imshow(img_pro)
205 |     plt.show()
206 |     io.imsave('./testimages/poisson_normal.png', img_pro)
207 | 
208 |     # pdb.set_trace()
209 |     # i=14
210 |     # for c in np.linspace(10.0, 50.0, 5):
211 |     #     i+=1
212 |     #     img_pro = poisson_blend(img_mask, img_src, img_target, method='mix', c=c)
213 |     #     plt.imshow(img_pro)
214 |     #     plt.show()
215 |     #     io.imsave('./testimages/poisson_mix_%d.png' %i, img_pro)
216 | 
217 |     # img_pro = poisson_blend(img_mask, img_src, img_target, method='src')
218 |     # io.imsave('./testimages/poisson_src.png', img_pro)
219 | 
220 |     # img_pro = poisson_blend(img_mask, img_src, img_target, method='target')
221 |     # io.imsave('./testimages/poisson_target.png', img_pro)
222 | 
223 |     # def plot_coo_matrix(m):
224 |     #     if not isinstance(m, coo_matrix):
225 |     #         m = coo_matrix(m)
226 |     #     fig = plt.figure()
227 |     #     ax = fig.add_subplot(111, axisbg='white')
228 |     #     ax.plot(m.col, m.row, 's', color='black', ms=1)
229 |     #     ax.set_xlim(0, m.shape[1])
230 |     #     ax.set_ylim(0, m.shape[0])
231 |     #     ax.set_aspect('equal')
232 |     #     for spine in ax.spines.values():
233 |     #         spine.set_visible(False)
234 |     #     ax.invert_yaxis()
235 |     #     ax.set_aspect('equal')
236 |     #     ax.set_xticks([])
237 |     #     ax.set_yticks([])
238 |     #     return ax
239 | 
240 |     # B = A.tocoo()
241 |     # plot_coo_matrix(B)
242 |     # plt.show()
243 | 
--------------------------------------------------------------------------------
/cutpaste/pyblur3/BoxBlur.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from PIL import Image
 3 | from scipy.signal import convolve2d
 4 | 
 5 | boxKernelDims = [3, 5, 7, 9]
 6 | 
 7 | 
 8 | def BoxBlur_random(img):
 9 |     kernelidx = np.random.randint(0, len(boxKernelDims))
10 |     kerneldim = boxKernelDims[kernelidx]
11 |     return BoxBlur(img, kerneldim)
12 | 
13 | 
14 | def BoxBlur(img, dim):
15 |     imgarray = np.array(img, dtype="float32")
16 |     kernel = BoxKernel(dim)
17 |     convolved = convolve2d(imgarray, kernel, mode='same', fillvalue=255.0).astype("uint8")
18 |     img = Image.fromarray(convolved)
19 |     return img
20 | 
21 | 
22 | def BoxKernel(dim):
23 |     kernelwidth = dim
24 |     kernel = np.ones((kernelwidth, kernelwidth), dtype=np.float32)
25 |     normalizationFactor = np.count_nonzero(kernel)
26 |     kernel = kernel / normalizationFactor
27 |     return kernel
28 | 
--------------------------------------------------------------------------------
/cutpaste/pyblur3/DefocusBlur.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import numpy as np
 3 | from PIL import Image
 4 | from scipy.signal import convolve2d
 5 | from skimage.draw import circle
 6 | 
 7 | defocusKernelDims = [3, 5, 7, 9]
 8 | 
 9 | 
10 | def DefocusBlur_random(img):
11 |     kernelidx = np.random.randint(0, len(defocusKernelDims))
12 |     kerneldim = defocusKernelDims[kernelidx]
13 |     return DefocusBlur(img, kerneldim)
14 | 
15 | 
16 | def DefocusBlur(img, dim):
17 |     imgarray = np.array(img, dtype="float32")
18 |     kernel = DiskKernel(dim)
19 |     convolved = convolve2d(imgarray, kernel, mode='same', fillvalue=255.0).astype("uint8")
20 |     img = Image.fromarray(convolved)
21 |     return img
22 | 
23 | 
24 | def DiskKernel(dim):
25 |     kernelwidth = dim
26 |     kernel = np.zeros((kernelwidth, kernelwidth), dtype=np.float32)
27 |     circleCenterCoord = dim / 2
28 |     circleRadius = circleCenterCoord + 1
29 | 
30 |     rr, cc = circle(circleCenterCoord, circleCenterCoord, circleRadius)
31 |     kernel[rr, cc] = 1
32 | 
33 |     if (dim == 3 or dim == 5):
34 |         kernel = Adjust(kernel, dim)
35 | 
36 |     normalizationFactor = np.count_nonzero(kernel)
37 |     kernel = kernel / normalizationFactor
38 |     return kernel
39 | 
40 | 
41 | def Adjust(kernel, kernelwidth):
42 |     kernel[0, 0] = 0
43 |     kernel[0, kernelwidth - 1] = 0
44 |     kernel[kernelwidth - 1, 0] = 0
45 |     kernel[kernelwidth - 1, kernelwidth - 1] = 0
46 |     return kernel
47 | 
--------------------------------------------------------------------------------
/cutpaste/pyblur3/GaussianBlur.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from PIL import ImageFilter
 3 | 
 4 | gaussianbandwidths = [0.5, 1, 1.5, 2, 2.5, 3, 3.5]
 5 | 
 6 | 
 7 | def GaussianBlur_random(img):
 8 |     gaussianidx = np.random.randint(0, len(gaussianbandwidths))
 9 |     gaussianbandwidth = gaussianbandwidths[gaussianidx]
10 |     return GaussianBlur(img, gaussianbandwidth)
11 | 
12 | 
13 | def GaussianBlur(img, bandwidth):
14 |     img = img.filter(ImageFilter.GaussianBlur(bandwidth))
15 |     return img
16 | 
--------------------------------------------------------------------------------
/cutpaste/pyblur3/LineDictionary.py:
--------------------------------------------------------------------------------
 1 | class LineDictionary:
 2 |     def __init__(self):
 3 |         self.lines = {}
 4 |         self.Create3x3Lines()
 5 |         self.Create5x5Lines()
 6 |         self.Create7x7Lines()
 7 |         self.Create9x9Lines()
 8 |         return
 9 | 
10 |     def Create3x3Lines(self):
11 |         lines = {}
12 |         lines[0] = [1, 0, 1, 2]
13 |         lines[45] = [2, 0, 0, 2]
14 |         lines[90] = [0, 1, 2, 1]
15 |         lines[135] = [0, 0, 2, 2]
16 |         self.lines[3] = lines
17 |         return
18 | 
19 |     def Create5x5Lines(self):
20 |         lines = {}
21 |         lines[0] = [2, 0, 2, 4]
22 |         lines[22.5] = [3, 0, 1, 4]
23 |         lines[45] = [0, 4, 4, 0]
24 |         lines[67.5] = [0, 3, 4, 1]
25 |         lines[90] = [0, 2, 4, 2]
26 |         lines[112.5] = [0, 1, 4, 3]
27 |         lines[135] = [0, 0, 4, 4]
28 |         lines[157.5] = [1, 0, 3, 4]
29 |         self.lines[5] = lines
30 |         return
31 | 
32 |     def Create7x7Lines(self):
33 |         lines = {}
34 |         lines[0] = [3, 0, 3, 6]
35 |         lines[15] = [4, 0, 2, 6]
36 |         lines[30] = [5, 0, 1, 6]
37 |         lines[45] = [6, 0, 0, 6]
38 |         lines[60] = [6, 1, 0, 5]
39 |         lines[75] = [6, 2, 0, 4]
40 |         lines[90] = [0, 3, 6, 3]
41 |         lines[105] = [0, 2, 6, 4]
42 |         lines[120] = [0, 1, 6, 5]
43 |         lines[135] = [0, 0, 6, 6]
44 |         lines[150] = [1, 0, 5, 6]
45 |         lines[165] = [2, 0, 4, 6]
46 |         self.lines[7] = lines
47 |         return
48 | 
49 |     def Create9x9Lines(self):
50 |         lines = {}
51 |         lines[0] = [4, 0, 4, 8]
52 |         lines[11.25] = [5, 0, 3, 8]
53 |         lines[22.5] = [6, 0, 2, 8]
54 |         lines[33.75] = [7, 0, 1, 8]
55 |         lines[45] = [8, 0, 0, 8]
56 |         lines[56.25] = [8, 1, 0, 7]
57 |         lines[67.5] = [8, 2, 0, 6]
58 |         lines[78.75] = [8, 3, 0, 5]
59 |         lines[90] = [8, 4, 0, 4]
60 |         lines[101.25] = [0, 3, 8, 5]
61 |         lines[112.5] = [0, 2, 8, 6]
62 |         lines[123.75] = [0, 1, 8, 7]
63 |         lines[135] = [0, 0, 8, 8]
64 |         lines[146.25] = [1, 0, 7, 8]
65 |         lines[157.5] = [2, 0, 6, 8]
66 |         lines[168.75] = [3, 0, 5, 8]
67 |         self.lines[9] = lines
68 |         return
69 | 
--------------------------------------------------------------------------------
/cutpaste/pyblur3/LinearMotionBlur.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import math
 3 | 
 4 | import numpy as np
 5 | from PIL import Image
 6 | from scipy.signal import convolve2d
 7 | from skimage.draw import line
 8 | 
 9 | from .LineDictionary import LineDictionary
10 | 
11 | lineLengths = [3, 5, 7, 9]
12 | lineTypes = ["full", "right", "left"]
13 | 
14 | lineDict = LineDictionary()
15 | 
16 | 
17 | def LinearMotionBlur_random(img):
18 |     lineLengthIdx = np.random.randint(0, len(lineLengths))
19 |     lineTypeIdx = np.random.randint(0, len(lineTypes))
20 |     lineLength = lineLengths[lineLengthIdx]
21 |     lineType = lineTypes[lineTypeIdx]
22 |     lineAngle = randomAngle(lineLength)
23 |     return LinearMotionBlur(img, lineLength, lineAngle, lineType)
24 | 
25 | 
26 | def LinearMotionBlur(img, dim, angle, linetype):
27 |     imgarray = np.array(img, dtype="float32")
28 |     kernel = LineKernel(dim, angle, linetype)
29 |     convolved = convolve2d(imgarray, kernel, mode='same', fillvalue=255.0).astype("uint8")
30 |     img = Image.fromarray(convolved)
31 |     return img
32 | 
33 | 
34 | def LineKernel(dim, angle, linetype):
35 |     kernelwidth = dim
36 |     kernelCenter = int(math.floor(dim / 2))
37 |     angle = SanitizeAngleValue(kernelCenter, angle)
38 |     kernel = np.zeros((kernelwidth, kernelwidth), dtype=np.float32)
39 |     lineAnchors = lineDict.lines[dim][angle]
40 |     if (linetype == 'right'):
41 |         lineAnchors[0] = kernelCenter
42 |         lineAnchors[1] = kernelCenter
43 |     if (linetype == 'left'):
44 |         lineAnchors[2] = kernelCenter
45 |         lineAnchors[3] = kernelCenter
46 |     rr, cc = line(lineAnchors[0], lineAnchors[1], lineAnchors[2], lineAnchors[3])
47 |     kernel[rr, cc] = 1
48 |     normalizationFactor = np.count_nonzero(kernel)
49 |     kernel = kernel / normalizationFactor
50 |     return kernel
51 | 
52 | 
53 | def SanitizeAngleValue(kernelCenter, angle):
54 |     numDistinctLines = kernelCenter * 4
55 |     angle = math.fmod(angle, 180.0)
56 |     validLineAngles = np.linspace(0, 180, numDistinctLines, endpoint=False)
57 |     angle = nearestValue(angle, validLineAngles)
58 |     return angle
59 | 
60 | 
61 | def nearestValue(theta, validAngles):
62 |     idx = (np.abs(validAngles - theta)).argmin()
63 |     return validAngles[idx]
64 | 
65 | 
66 | def randomAngle(kerneldim):
67 |     kernelCenter = int(math.floor(kerneldim / 2))
68 |     numDistinctLines = kernelCenter * 4
69 |     validLineAngles = np.linspace(0, 180, numDistinctLines, endpoint=False)
70 |     angleIdx = np.random.randint(0, len(validLineAngles))
71 |     return int(validLineAngles[angleIdx])
72 | 
--------------------------------------------------------------------------------
/cutpaste/pyblur3/PsfBlur.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os.path
 3 | import pickle
 4 | 
 5 | import numpy as np
 6 | from PIL import Image
 7 | from scipy.signal import convolve2d
 8 | 
 9 | pickledPsfFilename = os.path.join(os.path.dirname(__file__), "psf.pkl")
10 | 
11 | with open(pickledPsfFilename, 'rb') as pklfile:
12 |     psfDictionary = pickle.load(pklfile, encoding='latin1')
13 | 
14 | 
15 | def PsfBlur(img, psfid):
16 |     imgarray = np.array(img, dtype="float32")
17 |     kernel = psfDictionary[psfid]
18 |     convolved = convolve2d(imgarray, kernel, mode='same', fillvalue=255.0).astype("uint8")
19 |     img = Image.fromarray(convolved)
20 |     return img
21 | 
22 | 
23 | def PsfBlur_random(img):
24 |     psfid = np.random.randint(0, len(psfDictionary))
25 |     return PsfBlur(img, psfid)
26 | 
--------------------------------------------------------------------------------
/cutpaste/pyblur3/RandomizedBlur.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from .BoxBlur import BoxBlur_random
 4 | from .DefocusBlur import DefocusBlur_random
 5 | from .GaussianBlur import GaussianBlur_random
 6 | from .LinearMotionBlur import LinearMotionBlur_random
 7 | from .PsfBlur import PsfBlur_random
 8 | 
 9 | blurFunctions = {"0": BoxBlur_random, "1": DefocusBlur_random, "2": GaussianBlur_random, "3": LinearMotionBlur_random,
10 |                  "4": PsfBlur_random}
11 | 
12 | 
13 | def RandomizedBlur(img):
14 |     blurToApply = blurFunctions[str(np.random.randint(0, len(blurFunctions)))]
15 |     return blurToApply(img)
16 | 
--------------------------------------------------------------------------------
/cutpaste/pyblur3/__init__.py:
--------------------------------------------------------------------------------
 1 | from .BoxBlur import BoxBlur, BoxBlur_random
 2 | from .DefocusBlur import DefocusBlur, DefocusBlur_random
 3 | from .GaussianBlur import GaussianBlur, GaussianBlur_random
 4 | from .LinearMotionBlur import LinearMotionBlur, LinearMotionBlur_random
 5 | from .PsfBlur import PsfBlur, PsfBlur_random
 6 | from .RandomizedBlur import RandomizedBlur
 7 | 
 8 | __all__ = ["BoxBlur", "BoxBlur_random",
 9 |            "DefocusBlur", "DefocusBlur_random",
10 |            "GaussianBlur", "GaussianBlur_random",
11 |            "LinearMotionBlur", "LinearMotionBlur_random",
12 |            "PsfBlur", "PsfBlur_random",
13 |            "RandomizedBlur"]
14 | 
--------------------------------------------------------------------------------
/cutpaste/pyblur3/psf.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/cutpaste/pyblur3/psf.pkl
--------------------------------------------------------------------------------
/cutpaste/utils.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import json
  3 | import os
  4 | import re
  5 | from concurrent import futures
  6 | from pathlib import Path
  7 | from typing import List
  8 | 
  9 | from PIL import Image
 10 | from tqdm import tqdm
 11 | 
 12 | from anno import VOCAnno, Anno, EntityAnno
 13 | from cutpaste.background import PastedBackground
 14 | from foreground import CutObjects, CutObject
 15 | 
 16 | 
 17 | ####################################################################################################
 18 | # foregrounds
 19 | def read_real_VOC_foregrounds(data_dir, anno_dir, seg_dir, data_list) -> List[CutObjects]:
 20 |     """
 21 |     Read real foregrounds from VOC dataset
 22 |         Each has VOCAnno
 23 |     """
 24 |     foregrounds = []
 25 |     with open(data_list, 'r') as f:
 26 |         lines = f.readlines()
 27 |     for line in tqdm(lines, total=len(lines), desc="reading real VOC fg"):
 28 |         fields = line.strip().split()
 29 |         img_path = os.path.join(data_dir, fields[0] + '.jpg')
 30 |         anno_path = os.path.join(anno_dir, fields[0] + '.xml')
 31 |         seg_img_path = os.path.join(seg_dir, fields[0] + '.png')
 32 |         assert os.path.exists(anno_path) and os.path.exists(img_path) and os.path.exists(seg_img_path)
 33 |         anno: VOCAnno = Anno.factory(anno_path, seg_img_path)
 34 |         foregrounds.extend(CutObjects().add_image(img_path, anno))
 35 |     return foregrounds
 36 | 
 37 | 
 38 | def read_entity_foregrounds(dataset, rgb_dir, mask_dir) -> List[CutObjects]:
 39 |     """
 40 |     Read syn foregrounds (processed by entity segmentation, then selected by GradCAM)
 41 |         Each has EntityAnno
 42 |     """
 43 |     rgb_dir, mask_dir = map(Path, [rgb_dir, mask_dir])
 44 | 
 45 |     foregrounds = []
 46 |     # VOC
 47 |     def get_voc_image(mask_file):
 48 |         # eg voc2012/foreground/foreground_mask_old/car_mask/a car in a white background30.png
 49 |         _, label, filename = mask_file.rsplit("/", 2)
 50 |         label = label.replace("_mask", "")
 51 |         # infer rgb img_path
 52 |         # eg (a car in a white background, 30, _)
 53 |         target_caption, target_num, _ = re.split(r'(\d+)', filename)
 54 |         img_path = None
 55 |         for class_dir in rgb_dir.iterdir():
 56 |             for caption in os.listdir(class_dir):
 57 |                 if caption == target_caption:
 58 |                     img_path = class_dir / caption / f"{target_num}.png"
 59 |             if img_path is not None:
 60 |                 break
 61 |         assert img_path is not None, f"{str(mask_file)} Not found!"
 62 |         assert os.path.exists(img_path)
 63 |         anno: EntityAnno = Anno.factory(None, mask_file)
 64 |         return CutObjects().add_image(img_path, anno)
 65 | 
 66 |     todos = []
 67 |     all_mask_files = list(glob.glob(str(mask_dir / "*_mask" / "*.png")))
 68 |     with tqdm(total=len(all_mask_files), desc="collecting real fg") as pbar, \
 69 |             futures.ThreadPoolExecutor(100) as executor:
 70 |         for mask_file in all_mask_files:
 71 |             todos.append(executor.submit(get_voc_image, mask_file))
 72 |         for future in futures.as_completed(todos):
 73 |             res = future.result()
 74 |             foregrounds.extend(res)
 75 |             pbar.update(1)
 76 |     return foregrounds
 77 | 
 78 | ####################################################################################################
 79 | # backgrounds
 80 | def read_real_VOC_backgrounds(data_dir, anno_dir, seg_dir, data_list) -> List[PastedBackground]:
 81 |     """
 82 |     load list of image name and image labels ([i] is img_name_list[i]'s K+1 class boolean vector)
 83 |     can be used in reading fg or bg
 84 |     """
 85 |     backgrounds = []
 86 |     with open(data_list, 'r') as f:
 87 |         lines = f.readlines()
 88 |     for line in tqdm(lines, total=len(lines), desc="reading real VOC bg"):
 89 |         fields = line.strip().split()
 90 |         img_path = os.path.join(data_dir, fields[0] + '.jpg')
 91 |         anno_path = os.path.join(anno_dir, fields[0] + '.xml')
 92 |         seg_img_path = os.path.join(seg_dir, fields[0] + '.png')
 93 |         assert os.path.exists(anno_path)
 94 |         assert os.path.exists(img_path)
 95 |         assert os.path.exists(seg_img_path)
 96 |         backgrounds.append(PastedBackground(
 97 |             imagepath=img_path, anno=VOCAnno(anno_path, seg_img_path)
 98 |         ))
 99 |     return backgrounds
100 | def read_background_template(data_dir: str) -> List[PastedBackground]:
101 |     backgrounds = []
102 |     for rgb_file in glob.iglob(f"{data_dir}/**/*.png",
103 |                                recursive=True):
104 |         if "azDownload" in str(rgb_file): continue
105 |         if "group_0" in str(rgb_file): continue
106 |         backgrounds.append(PastedBackground(rgb_file))
107 |     return backgrounds
108 | 
109 | def read_dalle_backgrounds(data_dir: str, clip_strategy="use") -> List[PastedBackground]:
110 |     """
111 |     use syn images for background, and ignore possible foreground in syn images
112 |     data_dir: path to get dalle generated syn images
113 |     """
114 |     data_dir = Path(data_dir)
115 |     backgrounds = []
116 |     if clip_strategy in ["use", "reverse"]:
117 |         with open(data_dir / "clip_postprocessed.json") as f:
118 |             data = json.load(f)
119 |         if clip_strategy == "use":
120 |             # keep only in CLIP processed
121 |             for imageid, captions in data.items():
122 |                 for caption, selected_ids in captions.items():
123 |                     for id in selected_ids:
124 |                         img = data_dir / f"{imageid}.jpg" / caption / id
125 |                         assert img.exists()
126 |                         backgrounds.append(PastedBackground(str(img)))
127 |         else: # keep only NOT in CLIP processed
128 |             for imgid in data_dir.iterdir():
129 |                 if not imgid.name.endswith(".jpg"):
130 |                     continue
131 |                 if imgid not in data:
132 |                     for caption in imgid.iterdir():
133 |                         for img in caption.iterdir():
134 |                             backgrounds.append(PastedBackground(str(img)))
135 |     else: # do not use clip but raw
136 |         for img in data_dir.iterdir():
137 |             backgrounds.append(PastedBackground(str(img)))
138 |     return backgrounds
139 | 
140 | def convert_to_COCO(input_dir,
141 |                     image_id_src, background, blending_list,
142 |                     image_folder="Images", image_suffix="png", output_dir=None):
143 |     """
144 |     image in input_dir / image_folder / image_id_src
145 |     save in output_dir / tmp
146 |     COCO image path in the format of image_folder / image_id_src, relative so that in detectron we can provide input_dir (eg use in remote server)
147 |     """
148 |     if output_dir is None:
149 |         output_dir = input_dir
150 |     output_json_dict = {
151 |         "images": [],
152 |         "annotations": []
153 |     }
154 |     bnd_id = 1
155 |     for blending in blending_list:
156 |         # image_id eg 2007_000515_16
157 |         if blending != "":
158 |             image_id = f"{image_id_src}_{blending}"
159 |         else:
160 |             image_id = image_id_src
161 |         file_name = os.path.join(image_folder, f"{image_id}.{image_suffix}")
162 |         from PIL import PngImagePlugin
163 |         LARGE_ENOUGH_NUMBER = 100
164 |         PngImagePlugin.MAX_TEXT_CHUNK = LARGE_ENOUGH_NUMBER * (1024 ** 2)
165 |         img = Image.open(input_dir / file_name)
166 |         width, height = img.size
167 |         output_json_dict["images"].append({
168 |             "file_name": file_name,
169 |             "height": height,
170 |             "width": width,
171 |             "id": image_id
172 |         })
173 |         for polygons, bbox, area, category in background.to_COCO_ann():
174 |             output_json_dict["annotations"].append({
175 |                 "segmentation": polygons,
176 |                 "area": area,
177 |                 "iscrowd": 0,
178 |                 "image_id": image_id,
179 |                 "bbox": bbox,
180 |                 "category_id": int(category),
181 |                 "id": f"{image_id}_{bnd_id}"
182 |             })
183 |             bnd_id += 1
184 |     tmpdir = output_dir / "tmp"
185 |     os.makedirs(tmpdir, exist_ok=True)
186 |     tmp_json_path = tmpdir / f"{image_id_src}.json"
187 |     with open(tmp_json_path, "w") as f:
188 |         json.dump(output_json_dict, f)
189 | 
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_000039.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | 	VOC2012 
 3 | 	2007_000039.jpg 
 4 | 	
 5 | 		The VOC2007 Database 
 6 | 		PASCAL VOC2007 
 7 | 		flickr 
 8 | 	 
 9 | 	
10 | 		500 
11 | 		375 
12 | 		3 
13 | 	 
14 | 	1 
15 | 	
16 | 		tvmonitor 
17 | 		Frontal 
18 | 		0 
19 | 		0 
20 | 		
21 | 			156 
22 | 			89 
23 | 			344 
24 | 			279 
25 | 		 
26 | 	 
27 |  
28 | 
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_000063.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | 	VOC2012 
 3 | 	2007_000063.jpg 
 4 | 	
 5 | 		The VOC2007 Database 
 6 | 		PASCAL VOC2007 
 7 | 		flickr 
 8 | 	 
 9 | 	
10 | 		500 
11 | 		375 
12 | 		3 
13 | 	 
14 | 	1 
15 | 	
16 | 		dog 
17 | 		Unspecified 
18 | 		0 
19 | 		0 
20 | 		
21 | 			123 
22 | 			115 
23 | 			379 
24 | 			275 
25 | 		 
26 | 	 
27 | 	
28 | 		chair 
29 | 		Frontal 
30 | 		1 
31 | 		0 
32 | 		
33 | 			75 
34 | 			1 
35 | 			428 
36 | 			375 
37 | 		 
38 | 	 
39 |  
40 | 
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_000648.xml:
--------------------------------------------------------------------------------
  1 | 
  2 | 	VOC2012 
  3 | 	2007_000648.jpg 
  4 | 	
  5 | 		The VOC2007 Database 
  6 | 		PASCAL VOC2007 
  7 | 		flickr 
  8 | 	 
  9 | 	
 10 | 		500 
 11 | 		333 
 12 | 		3 
 13 | 	 
 14 | 	1 
 15 | 	
 16 | 		person 
 17 | 		Unspecified 
 18 | 		0 
 19 | 		1 
 20 | 		
 21 | 			394 
 22 | 			199 
 23 | 			404 
 24 | 			223 
 25 | 		 
 26 | 	 
 27 | 	
 28 | 		person 
 29 | 		Unspecified 
 30 | 		0 
 31 | 		1 
 32 | 		
 33 | 			424 
 34 | 			199 
 35 | 			436 
 36 | 			220 
 37 | 		 
 38 | 	 
 39 | 	
 40 | 		person 
 41 | 		Unspecified 
 42 | 		0 
 43 | 		1 
 44 | 		
 45 | 			434 
 46 | 			196 
 47 | 			444 
 48 | 			220 
 49 | 		 
 50 | 	 
 51 | 	
 52 | 		person 
 53 | 		Unspecified 
 54 | 		0 
 55 | 		1 
 56 | 		
 57 | 			443 
 58 | 			195 
 59 | 			452 
 60 | 			220 
 61 | 		 
 62 | 	 
 63 | 	
 64 | 		bus 
 65 | 		Right 
 66 | 		0 
 67 | 		0 
 68 | 		
 69 | 			29 
 70 | 			113 
 71 | 			353 
 72 | 			266 
 73 | 		 
 74 | 	 
 75 | 	
 76 | 		aeroplane 
 77 | 		Unspecified 
 78 | 		1 
 79 | 		1 
 80 | 		
 81 | 			328 
 82 | 			86 
 83 | 			474 
 84 | 			192 
 85 | 		 
 86 | 	 
 87 | 	
 88 | 		car 
 89 | 		Unspecified 
 90 | 		1 
 91 | 		1 
 92 | 		
 93 | 			2 
 94 | 			213 
 95 | 			28 
 96 | 			235 
 97 | 		 
 98 | 	 
 99 |  
100 | 
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_001420.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | 	VOC2012 
 3 | 	2007_001420.jpg 
 4 | 	
 5 | 		The VOC2007 Database 
 6 | 		PASCAL VOC2007 
 7 | 		flickr 
 8 | 	 
 9 | 	
10 | 		500 
11 | 		332 
12 | 		3 
13 | 	 
14 | 	1 
15 | 	
16 | 		horse 
17 | 		Unspecified 
18 | 		0 
19 | 		0 
20 | 		
21 | 			192 
22 | 			109 
23 | 			340 
24 | 			270 
25 | 		 
26 | 	 
27 | 	
28 | 		person 
29 | 		Unspecified 
30 | 		0 
31 | 		0 
32 | 		
33 | 			281 
34 | 			80 
35 | 			364 
36 | 			268 
37 | 		 
38 | 	 
39 | 	
40 | 		pottedplant 
41 | 		Unspecified 
42 | 		0 
43 | 		0 
44 | 		
45 | 			436 
46 | 			148 
47 | 			500 
48 | 			306 
49 | 		 
50 | 	 
51 |  
52 | 
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_001709.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | 	VOC2012 
 3 | 	2007_001709.jpg 
 4 | 	
 5 | 		The VOC2007 Database 
 6 | 		PASCAL VOC2007 
 7 | 		flickr 
 8 | 	 
 9 | 	
10 | 		500 
11 | 		333 
12 | 		3 
13 | 	 
14 | 	1 
15 | 	
16 | 		person 
17 | 		Frontal 
18 | 		1 
19 | 		0 
20 | 		
21 | 			367 
22 | 			132 
23 | 			426 
24 | 			287 
25 | 		 
26 | 	 
27 | 	
28 | 		person 
29 | 		Unspecified 
30 | 		1 
31 | 		1 
32 | 		
33 | 			1 
34 | 			1 
35 | 			202 
36 | 			333 
37 | 		 
38 | 	 
39 | 	
40 | 		motorbike 
41 | 		Left 
42 | 		1 
43 | 		0 
44 | 		
45 | 			1 
46 | 			45 
47 | 			412 
48 | 			333 
49 | 		 
50 | 	 
51 |  
52 | 
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_001901.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | 	VOC2012 
 3 | 	2007_001901.jpg 
 4 | 	
 5 | 		The VOC2007 Database 
 6 | 		PASCAL VOC2007 
 7 | 		flickr 
 8 | 	 
 9 | 	
10 | 		375 
11 | 		500 
12 | 		3 
13 | 	 
14 | 	1 
15 | 	
16 | 		chair 
17 | 		Unspecified 
18 | 		0 
19 | 		0 
20 | 		
21 | 			192 
22 | 			261 
23 | 			247 
24 | 			390 
25 | 		 
26 | 	 
27 | 	
28 | 		chair 
29 | 		Unspecified 
30 | 		1 
31 | 		1 
32 | 		
33 | 			292 
34 | 			255 
35 | 			312 
36 | 			415 
37 | 		 
38 | 	 
39 | 	
40 | 		chair 
41 | 		Right 
42 | 		0 
43 | 		0 
44 | 		
45 | 			233 
46 | 			258 
47 | 			308 
48 | 			400 
49 | 		 
50 | 	 
51 | 	
52 | 		diningtable 
53 | 		Unspecified 
54 | 		1 
55 | 		0 
56 | 		
57 | 			238 
58 | 			266 
59 | 			312 
60 | 			413 
61 | 		 
62 | 	 
63 | 	
64 | 		sofa 
65 | 		Unspecified 
66 | 		1 
67 | 		1 
68 | 		
69 | 			4 
70 | 			284 
71 | 			173 
72 | 			497 
73 | 		 
74 | 	 
75 |  
76 | 
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_002216.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | 	VOC2012 
 3 | 	2007_002216.jpg 
 4 | 	
 5 | 		The VOC2007 Database 
 6 | 		PASCAL VOC2007 
 7 | 		flickr 
 8 | 	 
 9 | 	
10 | 		500 
11 | 		340 
12 | 		3 
13 | 	 
14 | 	1 
15 | 	
16 | 		bus 
17 | 		Unspecified 
18 | 		1 
19 | 		0 
20 | 		
21 | 			360 
22 | 			110 
23 | 			500 
24 | 			313 
25 | 		 
26 | 	 
27 | 	
28 | 		bus 
29 | 		Unspecified 
30 | 		0 
31 | 		0 
32 | 		
33 | 			65 
34 | 			104 
35 | 			396 
36 | 			319 
37 | 		 
38 | 	 
39 |  
40 | 
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_002668.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | 	VOC2012 
 3 | 	2007_002668.jpg 
 4 | 	
 5 | 		The VOC2007 Database 
 6 | 		PASCAL VOC2007 
 7 | 		flickr 
 8 | 	 
 9 | 	
10 | 		500 
11 | 		375 
12 | 		3 
13 | 	 
14 | 	1 
15 | 	
16 | 		diningtable 
17 | 		Unspecified 
18 | 		1 
19 | 		1 
20 | 		
21 | 			86 
22 | 			312 
23 | 			499 
24 | 			374 
25 | 		 
26 | 	 
27 | 	
28 | 		person 
29 | 		Unspecified 
30 | 		1 
31 | 		1 
32 | 		
33 | 			320 
34 | 			102 
35 | 			500 
36 | 			319 
37 | 		 
38 | 	 
39 | 	
40 | 		person 
41 | 		Unspecified 
42 | 		1 
43 | 		0 
44 | 		
45 | 			426 
46 | 			105 
47 | 			462 
48 | 			134 
49 | 		 
50 | 	 
51 | 	
52 | 		person 
53 | 		Frontal 
54 | 		1 
55 | 		0 
56 | 		
57 | 			95 
58 | 			83 
59 | 			309 
60 | 			319 
61 | 		 
62 | 	 
63 | 	
64 | 		person 
65 | 		Frontal 
66 | 		1 
67 | 		0 
68 | 		
69 | 			89 
70 | 			169 
71 | 			112 
72 | 			190 
73 | 		 
74 | 	 
75 | 	
76 | 		pottedplant 
77 | 		Unspecified 
78 | 		1 
79 | 		1 
80 | 		
81 | 			19 
82 | 			124 
83 | 			97 
84 | 			187 
85 | 		 
86 | 	 
87 |  
88 | 
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_002669.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | 	VOC2012 
 3 | 	2007_002669.jpg 
 4 | 	
 5 | 		The VOC2007 Database 
 6 | 		PASCAL VOC2007 
 7 | 		flickr 
 8 | 	 
 9 | 	
10 | 		500 
11 | 		375 
12 | 		3 
13 | 	 
14 | 	1 
15 | 	
16 | 		cow 
17 | 		Unspecified 
18 | 		1 
19 | 		1 
20 | 		
21 | 			292 
22 | 			137 
23 | 			372 
24 | 			174 
25 | 		 
26 | 	 
27 | 	
28 | 		cow 
29 | 		Left 
30 | 		0 
31 | 		0 
32 | 		
33 | 			116 
34 | 			191 
35 | 			245 
36 | 			287 
37 | 		 
38 | 	 
39 | 	
40 | 		person 
41 | 		Frontal 
42 | 		0 
43 | 		0 
44 | 		
45 | 			442 
46 | 			241 
47 | 			478 
48 | 			294 
49 | 		 
50 | 	 
51 | 	
52 | 		cow 
53 | 		Right 
54 | 		0 
55 | 		0 
56 | 		
57 | 			163 
58 | 			184 
59 | 			273 
60 | 			269 
61 | 		 
62 | 	 
63 |  
64 | 
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_002845.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | 	VOC2012 
 3 | 	2007_002845.jpg 
 4 | 	
 5 | 		The VOC2007 Database 
 6 | 		PASCAL VOC2007 
 7 | 		flickr 
 8 | 	 
 9 | 	
10 | 		375 
11 | 		500 
12 | 		3 
13 | 	 
14 | 	1 
15 | 	
16 | 		sheep 
17 | 		Right 
18 | 		0 
19 | 		0 
20 | 		
21 | 			284 
22 | 			196 
23 | 			326 
24 | 			216 
25 | 		 
26 | 	 
27 | 	
28 | 		sheep 
29 | 		Right 
30 | 		0 
31 | 		0 
32 | 		
33 | 			14 
34 | 			265 
35 | 			175 
36 | 			347 
37 | 		 
38 | 	 
39 | 	
40 | 		sheep 
41 | 		Right 
42 | 		0 
43 | 		0 
44 | 		
45 | 			140 
46 | 			199 
47 | 			186 
48 | 			235 
49 | 		 
50 | 	 
51 | 	
52 | 		sheep 
53 | 		Frontal 
54 | 		0 
55 | 		0 
56 | 		
57 | 			220 
58 | 			210 
59 | 			248 
60 | 			229 
61 | 		 
62 | 	 
63 | 	
64 | 		sheep 
65 | 		Frontal 
66 | 		0 
67 | 		0 
68 | 		
69 | 			254 
70 | 			204 
71 | 			279 
72 | 			227 
73 | 		 
74 | 	 
75 | 	
76 | 		sheep 
77 | 		Unspecified 
78 | 		0 
79 | 		0 
80 | 		
81 | 			230 
82 | 			201 
83 | 			256 
84 | 			220 
85 | 		 
86 | 	 
87 |  
88 | 
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_003207.xml:
--------------------------------------------------------------------------------
  1 | 
  2 | 	VOC2012 
  3 | 	2007_003207.jpg 
  4 | 	
  5 | 		The VOC2007 Database 
  6 | 		PASCAL VOC2007 
  7 | 		flickr 
  8 | 	 
  9 | 	
 10 | 		500 
 11 | 		333 
 12 | 		3 
 13 | 	 
 14 | 	1 
 15 | 	
 16 | 		bottle 
 17 | 		Unspecified 
 18 | 		0 
 19 | 		0 
 20 | 		
 21 | 			124 
 22 | 			6 
 23 | 			162 
 24 | 			102 
 25 | 		 
 26 | 	 
 27 | 	
 28 | 		bottle 
 29 | 		Unspecified 
 30 | 		1 
 31 | 		0 
 32 | 		
 33 | 			94 
 34 | 			36 
 35 | 			132 
 36 | 			91 
 37 | 		 
 38 | 	 
 39 | 	
 40 | 		bottle 
 41 | 		Unspecified 
 42 | 		1 
 43 | 		0 
 44 | 		
 45 | 			71 
 46 | 			57 
 47 | 			108 
 48 | 			166 
 49 | 		 
 50 | 	 
 51 | 	
 52 | 		bottle 
 53 | 		Unspecified 
 54 | 		0 
 55 | 		0 
 56 | 		
 57 | 			100 
 58 | 			58 
 59 | 			150 
 60 | 			188 
 61 | 		 
 62 | 	 
 63 | 	
 64 | 		bottle 
 65 | 		Unspecified 
 66 | 		1 
 67 | 		0 
 68 | 		
 69 | 			13 
 70 | 			71 
 71 | 			61 
 72 | 			184 
 73 | 		 
 74 | 	 
 75 | 	
 76 | 		bottle 
 77 | 		Unspecified 
 78 | 		0 
 79 | 		0 
 80 | 		
 81 | 			152 
 82 | 			115 
 83 | 			203 
 84 | 			237 
 85 | 		 
 86 | 	 
 87 | 	
 88 | 		bottle 
 89 | 		Unspecified 
 90 | 		0 
 91 | 		0 
 92 | 		
 93 | 			216 
 94 | 			102 
 95 | 			261 
 96 | 			222 
 97 | 		 
 98 | 	 
 99 | 	
100 | 		bottle 
101 | 		Unspecified 
102 | 		0 
103 | 		0 
104 | 		
105 | 			259 
106 | 			134 
107 | 			307 
108 | 			260 
109 | 		 
110 | 	 
111 | 	
112 | 		bottle 
113 | 		Unspecified 
114 | 		0 
115 | 		0 
116 | 		
117 | 			308 
118 | 			126 
119 | 			356 
120 | 			251 
121 | 		 
122 | 	 
123 | 	
124 | 		bottle 
125 | 		Unspecified 
126 | 		0 
127 | 		0 
128 | 		
129 | 			350 
130 | 			139 
131 | 			408 
132 | 			264 
133 | 		 
134 | 	 
135 |  
136 | 
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_003565.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | 	VOC2012 
 3 | 	2007_003565.jpg 
 4 | 	
 5 | 		The VOC2007 Database 
 6 | 		PASCAL VOC2007 
 7 | 		flickr 
 8 | 	 
 9 | 	
10 | 		500 
11 | 		375 
12 | 		3 
13 | 	 
14 | 	1 
15 | 	
16 | 		bird 
17 | 		Rear 
18 | 		0 
19 | 		0 
20 | 		
21 | 			280 
22 | 			218 
23 | 			500 
24 | 			317 
25 | 		 
26 | 	 
27 |  
28 | 
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_003778.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | 	VOC2012 
 3 | 	2007_003778.jpg 
 4 | 	
 5 | 		The VOC2007 Database 
 6 | 		PASCAL VOC2007 
 7 | 		flickr 
 8 | 	 
 9 | 	
10 | 		500 
11 | 		500 
12 | 		3 
13 | 	 
14 | 	1 
15 | 	
16 | 		cat 
17 | 		Frontal 
18 | 		0 
19 | 		0 
20 | 		
21 | 			38 
22 | 			67 
23 | 			443 
24 | 			466 
25 | 		 
26 | 	 
27 | 	
28 | 		pottedplant 
29 | 		Unspecified 
30 | 		1 
31 | 		0 
32 | 		
33 | 			55 
34 | 			75 
35 | 			221 
36 | 			256 
37 | 		 
38 | 	 
39 | 	
40 | 		pottedplant 
41 | 		Unspecified 
42 | 		1 
43 | 		0 
44 | 		
45 | 			380 
46 | 			94 
47 | 			496 
48 | 			270 
49 | 		 
50 | 	 
51 | 	
52 | 		pottedplant 
53 | 		Unspecified 
54 | 		0 
55 | 		0 
56 | 		
57 | 			432 
58 | 			54 
59 | 			500 
60 | 			289 
61 | 		 
62 | 	 
63 |  
64 | 
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_003876.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | 	VOC2012 
 3 | 	2007_003876.jpg 
 4 | 	
 5 | 		The VOC2007 Database 
 6 | 		PASCAL VOC2007 
 7 | 		flickr 
 8 | 	 
 9 | 	
10 | 		500 
11 | 		382 
12 | 		3 
13 | 	 
14 | 	1 
15 | 	
16 | 		aeroplane 
17 | 		Unspecified 
18 | 		0 
19 | 		0 
20 | 		
21 | 			93 
22 | 			98 
23 | 			174 
24 | 			144 
25 | 		 
26 | 	 
27 |  
28 | 
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_004166.xml:
--------------------------------------------------------------------------------
  1 | 
  2 | 	VOC2012 
  3 | 	2007_004166.jpg 
  4 | 	
  5 | 		The VOC2007 Database 
  6 | 		PASCAL VOC2007 
  7 | 		flickr 
  8 | 	 
  9 | 	
 10 | 		500 
 11 | 		332 
 12 | 		3 
 13 | 	 
 14 | 	1 
 15 | 	
 16 | 		tvmonitor 
 17 | 		Unspecified 
 18 | 		0 
 19 | 		1 
 20 | 		
 21 | 			243 
 22 | 			129 
 23 | 			304 
 24 | 			181 
 25 | 		 
 26 | 	 
 27 | 	
 28 | 		chair 
 29 | 		Unspecified 
 30 | 		1 
 31 | 		1 
 32 | 		
 33 | 			485 
 34 | 			189 
 35 | 			500 
 36 | 			222 
 37 | 		 
 38 | 	 
 39 | 	
 40 | 		chair 
 41 | 		Unspecified 
 42 | 		1 
 43 | 		1 
 44 | 		
 45 | 			358 
 46 | 			195 
 47 | 			398 
 48 | 			239 
 49 | 		 
 50 | 	 
 51 | 	
 52 | 		chair 
 53 | 		Unspecified 
 54 | 		1 
 55 | 		1 
 56 | 		
 57 | 			300 
 58 | 			205 
 59 | 			355 
 60 | 			254 
 61 | 		 
 62 | 	 
 63 | 	
 64 | 		chair 
 65 | 		Unspecified 
 66 | 		1 
 67 | 		1 
 68 | 		
 69 | 			265 
 70 | 			278 
 71 | 			374 
 72 | 			332 
 73 | 		 
 74 | 	 
 75 | 	
 76 | 		sofa 
 77 | 		Unspecified 
 78 | 		1 
 79 | 		1 
 80 | 		
 81 | 			1 
 82 | 			199 
 83 | 			99 
 84 | 			332 
 85 | 		 
 86 | 	 
 87 | 	
 88 | 		diningtable 
 89 | 		Unspecified 
 90 | 		1 
 91 | 		0 
 92 | 		
 93 | 			234 
 94 | 			204 
 95 | 			500 
 96 | 			332 
 97 | 		 
 98 | 	 
 99 | 	
100 | 		dog 
101 | 		Unspecified 
102 | 		0 
103 | 		0 
104 | 		
105 | 			210 
106 | 			194 
107 | 			257 
108 | 			234 
109 | 		 
110 | 	 
111 |  
112 | 
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_005273.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | 	VOC2012 
 3 | 	2007_005273.jpg 
 4 | 	
 5 | 		The VOC2007 Database 
 6 | 		PASCAL VOC2007 
 7 | 		flickr 
 8 | 	 
 9 | 	
10 | 		500 
11 | 		375 
12 | 		3 
13 | 	 
14 | 	1 
15 | 	
16 | 		car 
17 | 		Right 
18 | 		1 
19 | 		0 
20 | 		
21 | 			108 
22 | 			55 
23 | 			285 
24 | 			156 
25 | 		 
26 | 	 
27 | 	
28 | 		car 
29 | 		Right 
30 | 		1 
31 | 		0 
32 | 		
33 | 			235 
34 | 			84 
35 | 			418 
36 | 			153 
37 | 		 
38 | 	 
39 | 	
40 | 		person 
41 | 		Left 
42 | 		0 
43 | 		0 
44 | 		
45 | 			336 
46 | 			12 
47 | 			415 
48 | 			160 
49 | 		 
50 | 	 
51 | 	
52 | 		person 
53 | 		Unspecified 
54 | 		1 
55 | 		0 
56 | 		
57 | 			397 
58 | 			50 
59 | 			439 
60 | 			154 
61 | 		 
62 | 	 
63 | 	
64 | 		bicycle 
65 | 		Unspecified 
66 | 		1 
67 | 		0 
68 | 		
69 | 			1 
70 | 			112 
71 | 			305 
72 | 			357 
73 | 		 
74 | 	 
75 |  
76 | 
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_005702.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | 	VOC2012 
 3 | 	2007_005702.jpg 
 4 | 	
 5 | 		The VOC2007 Database 
 6 | 		PASCAL VOC2007 
 7 | 		flickr 
 8 | 	 
 9 | 	
10 | 		333 
11 | 		500 
12 | 		3 
13 | 	 
14 | 	1 
15 | 	
16 | 		bicycle 
17 | 		Unspecified 
18 | 		0 
19 | 		0 
20 | 		
21 | 			38 
22 | 			227 
23 | 			292 
24 | 			500 
25 | 		 
26 | 	 
27 | 	
28 | 		person 
29 | 		Unspecified 
30 | 		0 
31 | 		0 
32 | 		
33 | 			76 
34 | 			63 
35 | 			327 
36 | 			465 
37 | 		 
38 | 	 
39 |  
40 | 
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_006303.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | 	VOC2012 
 3 | 	2007_006303.jpg 
 4 | 	
 5 | 		The VOC2007 Database 
 6 | 		PASCAL VOC2007 
 7 | 		flickr 
 8 | 	 
 9 | 	
10 | 		500 
11 | 		371 
12 | 		3 
13 | 	 
14 | 	1 
15 | 	
16 | 		cat 
17 | 		Unspecified 
18 | 		0 
19 | 		0 
20 | 		
21 | 			181 
22 | 			136 
23 | 			302 
24 | 			320 
25 | 		 
26 | 	 
27 | 	
28 | 		pottedplant 
29 | 		Unspecified 
30 | 		1 
31 | 		0 
32 | 		
33 | 			354 
34 | 			3 
35 | 			500 
36 | 			371 
37 | 		 
38 | 	 
39 |  
40 | 
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_006400.xml:
--------------------------------------------------------------------------------
 1 | 
 2 | 	VOC2012 
 3 | 	2007_006400.jpg 
 4 | 	
 5 | 		The VOC2007 Database 
 6 | 		PASCAL VOC2007 
 7 | 		flickr 
 8 | 	 
 9 | 	
10 | 		500 
11 | 		335 
12 | 		3 
13 | 	 
14 | 	1 
15 | 	
16 | 		train 
17 | 		Frontal 
18 | 		1 
19 | 		0 
20 | 		
21 | 			433 
22 | 			80 
23 | 			500 
24 | 			256 
25 | 		 
26 | 	 
27 | 	
28 | 		train 
29 | 		Unspecified 
30 | 		1 
31 | 		0 
32 | 		
33 | 			189 
34 | 			101 
35 | 			440 
36 | 			235 
37 | 		 
38 | 	 
39 | 	
40 | 		train 
41 | 		Unspecified 
42 | 		1 
43 | 		0 
44 | 		
45 | 			40 
46 | 			125 
47 | 			199 
48 | 			203 
49 | 		 
50 | 	 
51 |  
52 | 
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_006673.xml:
--------------------------------------------------------------------------------
  1 | 
  2 | 	VOC2012 
  3 | 	2007_006673.jpg 
  4 | 	
  5 | 		The VOC2007 Database 
  6 | 		PASCAL VOC2007 
  7 | 		flickr 
  8 | 	 
  9 | 	
 10 | 		500 
 11 | 		333 
 12 | 		3 
 13 | 	 
 14 | 	1 
 15 | 	
 16 | 		boat 
 17 | 		Right 
 18 | 		0 
 19 | 		0 
 20 | 		
 21 | 			394 
 22 | 			42 
 23 | 			445 
 24 | 			234 
 25 | 		 
 26 | 	 
 27 | 	
 28 | 		boat 
 29 | 		Right 
 30 | 		0 
 31 | 		0 
 32 | 		
 33 | 			353 
 34 | 			81 
 35 | 			390 
 36 | 			207 
 37 | 		 
 38 | 	 
 39 | 	
 40 | 		boat 
 41 | 		Right 
 42 | 		0 
 43 | 		0 
 44 | 		
 45 | 			309 
 46 | 			52 
 47 | 			372 
 48 | 			246 
 49 | 		 
 50 | 	 
 51 | 	
 52 | 		boat 
 53 | 		Right 
 54 | 		0 
 55 | 		0 
 56 | 		
 57 | 			294 
 58 | 			110 
 59 | 			324 
 60 | 			196 
 61 | 		 
 62 | 	 
 63 | 	
 64 | 		boat 
 65 | 		Right 
 66 | 		0 
 67 | 		0 
 68 | 		
 69 | 			242 
 70 | 			70 
 71 | 			275 
 72 | 			212 
 73 | 		 
 74 | 	 
 75 | 	
 76 | 		boat 
 77 | 		Right 
 78 | 		0 
 79 | 		0 
 80 | 		
 81 | 			230 
 82 | 			117 
 83 | 			245 
 84 | 			189 
 85 | 		 
 86 | 	 
 87 | 	
 88 | 		boat 
 89 | 		Right 
 90 | 		0 
 91 | 		0 
 92 | 		
 93 | 			214 
 94 | 			112 
 95 | 			239 
 96 | 			201 
 97 | 		 
 98 | 	 
 99 | 	
100 | 		boat 
101 | 		Right 
102 | 		0 
103 | 		0 
104 | 		
105 | 			187 
106 | 			114 
107 | 			210 
108 | 			195 
109 | 		 
110 | 	 
111 | 	
112 | 		boat 
113 | 		Right 
114 | 		0 
115 | 		0 
116 | 		
117 | 			171 
118 | 			107 
119 | 			206 
120 | 			223 
121 | 		 
122 | 	 
123 | 	
124 | 		boat 
125 | 		Right 
126 | 		0 
127 | 		0 
128 | 		
129 | 			137 
130 | 			105 
131 | 			171 
132 | 			213 
133 | 		 
134 | 	 
135 | 	
136 | 		boat 
137 | 		Unspecified 
138 | 		0 
139 | 		1 
140 | 		
141 | 			43 
142 | 			224 
143 | 			71 
144 | 			234 
145 | 		 
146 | 	 
147 | 	
148 | 		boat 
149 | 		Unspecified 
150 | 		0 
151 | 		1 
152 | 		
153 | 			4 
154 | 			231 
155 | 			29 
156 | 			244 
157 | 		 
158 | 	 
159 |  
160 | 
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_000039.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_000039.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_000063.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_000063.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_000648.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_000648.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_001420.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_001420.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_001709.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_001709.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_001901.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_001901.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_002216.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_002216.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_002668.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_002668.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_002669.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_002669.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_002845.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_002845.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_003207.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_003207.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_003565.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_003565.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_003778.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_003778.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_003876.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_003876.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_004166.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_004166.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_005273.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_005273.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_005702.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_005702.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_006303.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_006303.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_006400.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_006400.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_006673.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_006673.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_000039.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_000039.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_000063.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_000063.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_000648.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_000648.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_001420.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_001420.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_001709.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_001709.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_001901.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_001901.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_002216.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_002216.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_002668.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_002668.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_002669.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_002669.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_002845.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_002845.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_003207.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_003207.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_003565.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_003565.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_003778.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_003778.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_003876.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_003876.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_004166.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_004166.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_005273.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_005273.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_005702.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_005702.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_006303.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_006303.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_006400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_006400.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_006673.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_006673.png
--------------------------------------------------------------------------------
/data/test_data/background/bg_template/forest/A real photo of forest/589.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/forest/A real photo of forest/589.png
--------------------------------------------------------------------------------
/data/test_data/background/bg_template/forest/A real photo of forest/590.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/forest/A real photo of forest/590.png
--------------------------------------------------------------------------------
/data/test_data/background/bg_template/forest/A real photo of forest/591.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/forest/A real photo of forest/591.png
--------------------------------------------------------------------------------
/data/test_data/background/bg_template/forest/A real photo of forest/593.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/forest/A real photo of forest/593.png
--------------------------------------------------------------------------------
/data/test_data/background/bg_template/forest/A real photo of forest/598.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/forest/A real photo of forest/598.png
--------------------------------------------------------------------------------
/data/test_data/background/bg_template/railway without train/A real photo of railway without train/367.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/railway without train/A real photo of railway without train/367.png
--------------------------------------------------------------------------------
/data/test_data/background/bg_template/railway without train/A real photo of railway without train/373.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/railway without train/A real photo of railway without train/373.png
--------------------------------------------------------------------------------
/data/test_data/background/bg_template/railway without train/A real photo of railway without train/429.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/railway without train/A real photo of railway without train/429.png
--------------------------------------------------------------------------------
/data/test_data/background/bg_template/railway without train/A real photo of railway without train/475.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/railway without train/A real photo of railway without train/475.png
--------------------------------------------------------------------------------
/data/test_data/background/context_augment/2007_000504.jpg/a colored photo of an empty pile of trash on the ground/49.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_000504.jpg/a colored photo of an empty pile of trash on the ground/49.png
--------------------------------------------------------------------------------
/data/test_data/background/context_augment/2007_001609.jpg/a colored photo of an empty living room/69.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_001609.jpg/a colored photo of an empty living room/69.png
--------------------------------------------------------------------------------
/data/test_data/background/context_augment/2007_001764.jpg/a real image of an empty grass covered field/71.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_001764.jpg/a real image of an empty grass covered field/71.png
--------------------------------------------------------------------------------
/data/test_data/background/context_augment/2007_002227.jpg/a real image of an empty wall in a living room/45.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_002227.jpg/a real image of an empty wall in a living room/45.png
--------------------------------------------------------------------------------
/data/test_data/background/context_augment/2007_002281.jpg/a colored photo of an empty street next to a forest/79.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_002281.jpg/a colored photo of an empty street next to a forest/79.png
--------------------------------------------------------------------------------
/data/test_data/background/context_augment/2007_002967.jpg/a real image of an empty doorstep/58.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_002967.jpg/a real image of an empty doorstep/58.png
--------------------------------------------------------------------------------
/data/test_data/background/context_augment/2007_003451.jpg/a real image of an empty living room filled with furniture and a large window/40.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_003451.jpg/a real image of an empty living room filled with furniture and a large window/40.png
--------------------------------------------------------------------------------
/data/test_data/background/context_augment/2007_005124.jpg/a real image of an empty grass field/58.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_005124.jpg/a real image of an empty grass field/58.png
--------------------------------------------------------------------------------
/data/test_data/background/context_augment/2007_006136.jpg/a real image of an empty grass near a forest/33.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_006136.jpg/a real image of an empty grass near a forest/33.png
--------------------------------------------------------------------------------
/data/test_data/background/context_augment/2007_007585.jpg/a real image of an empty grass field/67.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_007585.jpg/a real image of an empty grass field/67.png
--------------------------------------------------------------------------------
/data/test_data/background/context_augment/clip_postprocessed.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "2007_003451": {
 3 |         "a real image of an empty living room filled with furniture and a large window": [
 4 |             "40.png"
 5 |         ]
 6 |     },
 7 |     "2007_002227": {
 8 |         "a real image of an empty wall in a living room": [
 9 |             "45.png"
10 |         ]
11 |     },
12 |     "2007_006136": {
13 |         "a real image of an empty grass near a forest": [
14 |             "33.png"
15 |         ]
16 |     },
17 |     "2007_002281": {
18 |         "a colored photo of an empty street next to a forest": [
19 |             "79.png"
20 |         ]
21 |     },
22 |     "2007_000504": {
23 |         "a colored photo of an empty pile of trash on the ground": [
24 |             "49.png"
25 |         ]
26 |     },
27 |     "2007_007585": {
28 |         "a real image of an empty grass field": [
29 |             "67.png"
30 |         ]
31 |     },
32 |     "2007_001764": {
33 |         "a real image of an empty grass covered field": [
34 |             "71.png"
35 |         ]
36 |     },
37 |     "2007_002967": {
38 |         "a real image of an empty doorstep": [
39 |             "58.png"
40 |         ]
41 |     },
42 |     "2007_001609": {
43 |         "a colored photo of an empty living room": [
44 |             "69.png"
45 |         ]
46 |     },
47 |     "2007_005124": {
48 |         "a real image of an empty grass field": [
49 |             "58.png"
50 |         ]
51 |     }
52 | }
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/aeroplane_mask/The picture of an airplane on a pure background422.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/aeroplane_mask/The picture of an airplane on a pure background422.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/aeroplane_mask/an airplane in a pure background449.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/aeroplane_mask/an airplane in a pure background449.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/aeroplane_mask/an airplane106.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/aeroplane_mask/an airplane106.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bicycle_mask/a bicycle in a pure background122.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bicycle_mask/a bicycle in a pure background122.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bicycle_mask/a bicycle in a pure background128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bicycle_mask/a bicycle in a pure background128.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bicycle_mask/a bicycle in a pure background225.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bicycle_mask/a bicycle in a pure background225.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bird_mask/a bird in a pure background162.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bird_mask/a bird in a pure background162.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bird_mask/a bird in a pure background293.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bird_mask/a bird in a pure background293.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bird_mask/a bird in a pure background374.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bird_mask/a bird in a pure background374.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/boat_mask/The picture of a boat on a pure background39.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/boat_mask/The picture of a boat on a pure background39.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/boat_mask/a boat in a pure background371.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/boat_mask/a boat in a pure background371.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/boat_mask/a boat79.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/boat_mask/a boat79.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bottle_mask/The picture of a bottle on a pure background407.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bottle_mask/The picture of a bottle on a pure background407.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bottle_mask/a bottle in a pure background108.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bottle_mask/a bottle in a pure background108.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bottle_mask/a bottle89.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bottle_mask/a bottle89.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bus_mask/The picture of a bus on a pure background460.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bus_mask/The picture of a bus on a pure background460.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bus_mask/a bus105.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bus_mask/a bus105.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bus_mask/a bus410.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bus_mask/a bus410.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/car_mask/The picture of a car on a pure background100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/car_mask/The picture of a car on a pure background100.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/car_mask/The picture of a car on a pure background286.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/car_mask/The picture of a car on a pure background286.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/car_mask/a car97.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/car_mask/a car97.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/cat_mask/a cat245.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/cat_mask/a cat245.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/cat_mask/a cat58.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/cat_mask/a cat58.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/cat_mask/a cat62.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/cat_mask/a cat62.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/chair_mask/The picture of a chair on a pure background423.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/chair_mask/The picture of a chair on a pure background423.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/chair_mask/a chair in a pure background301.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/chair_mask/a chair in a pure background301.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/chair_mask/a chair402.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/chair_mask/a chair402.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/cow_mask/a cow in a pure background9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/cow_mask/a cow in a pure background9.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/cow_mask/a cow223.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/cow_mask/a cow223.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/cow_mask/a cow56.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/cow_mask/a cow56.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/diningtable_mask/a dining table in a pure background63.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/diningtable_mask/a dining table in a pure background63.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/diningtable_mask/a table123.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/diningtable_mask/a table123.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/diningtable_mask/a table300.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/diningtable_mask/a table300.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/dog_mask/The picture of a dog on a pure background236.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/dog_mask/The picture of a dog on a pure background236.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/dog_mask/a dog in a pure background487.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/dog_mask/a dog in a pure background487.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/dog_mask/a dog121.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/dog_mask/a dog121.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/horse_mask/The picture of a horse on a pure background469.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/horse_mask/The picture of a horse on a pure background469.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/horse_mask/a horse in a pure background293.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/horse_mask/a horse in a pure background293.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/horse_mask/a horse298.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/horse_mask/a horse298.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/motorbike_mask/a motorbike in a pure background367.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/motorbike_mask/a motorbike in a pure background367.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/motorbike_mask/a motorbike in a pure background421.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/motorbike_mask/a motorbike in a pure background421.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/motorbike_mask/a motorbike315.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/motorbike_mask/a motorbike315.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/person_mask/a man146.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/person_mask/a man146.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/person_mask/a man253.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/person_mask/a man253.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/person_mask/a man345.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/person_mask/a man345.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/pottedplant_mask/a potted plant11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/pottedplant_mask/a potted plant11.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/pottedplant_mask/a potted plant374.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/pottedplant_mask/a potted plant374.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/pottedplant_mask/a potted plant434.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/pottedplant_mask/a potted plant434.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/sheep_mask/The picture of a sheep on a pure background212.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/sheep_mask/The picture of a sheep on a pure background212.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/sheep_mask/a sheep in a pure background219.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/sheep_mask/a sheep in a pure background219.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/sheep_mask/a sheep351.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/sheep_mask/a sheep351.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/sofa_mask/a sofa in a pure background122.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/sofa_mask/a sofa in a pure background122.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/sofa_mask/a sofa in a pure background353.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/sofa_mask/a sofa in a pure background353.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/sofa_mask/a sofa in a pure background395.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/sofa_mask/a sofa in a pure background395.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/train_mask/The picture of a train on a pure background303.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/train_mask/The picture of a train on a pure background303.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/train_mask/The picture of a train on a pure background32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/train_mask/The picture of a train on a pure background32.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/train_mask/a train in a pure background133.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/train_mask/a train in a pure background133.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/tvmonitor_mask/a tv monitor251.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/tvmonitor_mask/a tv monitor251.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/tvmonitor_mask/an old monitor107.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/tvmonitor_mask/an old monitor107.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/tvmonitor_mask/an old monitor426.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/tvmonitor_mask/an old monitor426.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bicycle/a bicycle in a pure background/122.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bicycle/a bicycle in a pure background/122.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bicycle/a bicycle in a pure background/128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bicycle/a bicycle in a pure background/128.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bicycle/a bicycle in a pure background/225.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bicycle/a bicycle in a pure background/225.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bird/a bird in a pure background/162.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bird/a bird in a pure background/162.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bird/a bird in a pure background/293.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bird/a bird in a pure background/293.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bird/a bird in a pure background/374.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bird/a bird in a pure background/374.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a boat/The picture of a boat on a pure background/39.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a boat/The picture of a boat on a pure background/39.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a boat/a boat in a pure background/371.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a boat/a boat in a pure background/371.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a boat/a boat/79.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a boat/a boat/79.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bottle/The picture of a bottle on a pure background/407.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bottle/The picture of a bottle on a pure background/407.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bottle/a bottle in a pure background/108.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bottle/a bottle in a pure background/108.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bottle/a bottle/89.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bottle/a bottle/89.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bus/The picture of a bus on a pure background/460.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bus/The picture of a bus on a pure background/460.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bus/a bus/105.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bus/a bus/105.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bus/a bus/410.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bus/a bus/410.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a car/The picture of a car on a pure background/100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a car/The picture of a car on a pure background/100.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a car/The picture of a car on a pure background/286.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a car/The picture of a car on a pure background/286.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a car/a car/97.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a car/a car/97.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a cat/a cat/245.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a cat/a cat/245.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a cat/a cat/58.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a cat/a cat/58.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a cat/a cat/62.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a cat/a cat/62.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a chair/The picture of a chair on a pure background/423.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a chair/The picture of a chair on a pure background/423.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a chair/a chair in a pure background/301.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a chair/a chair in a pure background/301.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a chair/a chair/402.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a chair/a chair/402.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a cow/a cow in a pure background/9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a cow/a cow in a pure background/9.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a cow/a cow/223.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a cow/a cow/223.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a cow/a cow/56.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a cow/a cow/56.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a dining table/a dining table in a pure background/63.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a dining table/a dining table in a pure background/63.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a dog/The picture of a dog on a pure background/236.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a dog/The picture of a dog on a pure background/236.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a dog/a dog in a pure background/487.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a dog/a dog in a pure background/487.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a dog/a dog/121.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a dog/a dog/121.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a horse/The picture of a horse on a pure background/469.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a horse/The picture of a horse on a pure background/469.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a horse/a horse in a pure background/293.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a horse/a horse in a pure background/293.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a horse/a horse/298.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a horse/a horse/298.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a man/a man/146.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a man/a man/146.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a man/a man/253.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a man/a man/253.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a man/a man/345.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a man/a man/345.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a motorbike/a motorbike in a pure background/367.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a motorbike/a motorbike in a pure background/367.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a motorbike/a motorbike in a pure background/421.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a motorbike/a motorbike in a pure background/421.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a motorbike/a motorbike/315.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a motorbike/a motorbike/315.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a potted plant/a potted plant/11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a potted plant/a potted plant/11.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a potted plant/a potted plant/374.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a potted plant/a potted plant/374.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a potted plant/a potted plant/434.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a potted plant/a potted plant/434.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a sheep/The picture of a sheep on a pure background/212.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a sheep/The picture of a sheep on a pure background/212.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a sheep/a sheep in a pure background/219.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a sheep/a sheep in a pure background/219.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a sheep/a sheep/351.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a sheep/a sheep/351.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a sofa/a sofa in a pure background/122.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a sofa/a sofa in a pure background/122.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a sofa/a sofa in a pure background/353.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a sofa/a sofa in a pure background/353.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a sofa/a sofa in a pure background/395.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a sofa/a sofa in a pure background/395.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a table/a table/123.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a table/a table/123.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a table/a table/300.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a table/a table/300.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a train/The picture of a train on a pure background/303.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a train/The picture of a train on a pure background/303.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a train/The picture of a train on a pure background/32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a train/The picture of a train on a pure background/32.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a train/a train in a pure background/133.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a train/a train in a pure background/133.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a tv monitor/a tv monitor/251.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a tv monitor/a tv monitor/251.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/an airplane/The picture of an airplane on a pure background/422.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/an airplane/The picture of an airplane on a pure background/422.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/an airplane/an airplane in a pure background/449.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/an airplane/an airplane in a pure background/449.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/an airplane/an airplane/106.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/an airplane/an airplane/106.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/an old monitor/an old monitor/107.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/an old monitor/an old monitor/107.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/an old monitor/an old monitor/426.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/an old monitor/an old monitor/426.png
--------------------------------------------------------------------------------
/data/test_data/labels.txt:
--------------------------------------------------------------------------------
 1 | 0	__background__
 2 | 1	aeroplane
 3 | 2	bicycle
 4 | 3	bird
 5 | 4	boat
 6 | 5	bottle
 7 | 6	bus
 8 | 7	car
 9 | 8	cat
10 | 9	chair
11 | 10	cow
12 | 11	diningtable
13 | 12	dog
14 | 13	horse
15 | 14	motorbike
16 | 15	person
17 | 16	pottedplant
18 | 17	sheep
19 | 18	sofa
20 | 19	train
21 | 20	tvmonitor
--------------------------------------------------------------------------------
/data/test_data/train_cls-1shot.txt:
--------------------------------------------------------------------------------
 1 | 2007_000039 19 
 2 | 2007_000063 8 11 
 3 | 2007_000648 0 5 6 14 
 4 | 2007_001420 12 14 15 
 5 | 2007_001709 13 14 
 6 | 2007_001901 8 10 17 
 7 | 2007_002216 5 
 8 | 2007_002668 10 14 15 
 9 | 2007_002669 9 14 
10 | 2007_002845 16 
11 | 2007_003207 4 
12 | 2007_003565 2 
13 | 2007_003778 7 15 
14 | 2007_003876 0 
15 | 2007_004166 8 10 11 17 19 
16 | 2007_005273 1 6 14 
17 | 2007_005702 1 14 
18 | 2007_006303 7 15 
19 | 2007_006400 18 
20 | 2007_006673 3 
21 | 
--------------------------------------------------------------------------------
/data/voc2012/1k_10_shot.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "0": [
  3 |         "2007_000480.jpg",
  4 |         "2007_002198.jpg",
  5 |         "2007_004009.jpg",
  6 |         "2007_000648.jpg",
  7 |         "2007_000738.jpg",
  8 |         "2007_003000.jpg",
  9 |         "2007_000032.jpg",
 10 |         "2007_002107.jpg",
 11 |         "2007_003876.jpg",
 12 |         "2007_002099.jpg",
 13 |         "2007_000256.jpg",
 14 |         "2007_000243.jpg"
 15 |     ],
 16 |     "1": [
 17 |         "2007_004769.jpg",
 18 |         "2007_002227.jpg",
 19 |         "2007_000584.jpg",
 20 |         "2007_006317.jpg",
 21 |         "2007_000793.jpg",
 22 |         "2007_005273.jpg",
 23 |         "2007_001027.jpg",
 24 |         "2007_005430.jpg",
 25 |         "2007_000515.jpg",
 26 |         "2007_005368.jpg",
 27 |         "2007_005064.jpg",
 28 |         "2007_005702.jpg"
 29 |     ],
 30 |     "2": [
 31 |         "2007_009607.jpg",
 32 |         "2007_003565.jpg",
 33 |         "2007_002212.jpg",
 34 |         "2007_009759.jpg",
 35 |         "2007_002403.jpg",
 36 |         "2007_003330.jpg",
 37 |         "2007_002896.jpg",
 38 |         "2007_003267.jpg",
 39 |         "2007_006490.jpg",
 40 |         "2007_003118.jpg",
 41 |         "2007_000645.jpg",
 42 |         "2007_000363.jpg",
 43 |         "2007_000068.jpg",
 44 |         "2007_002120.jpg"
 45 |     ],
 46 |     "3": [
 47 |         "2007_006281.jpg",
 48 |         "2007_000713.jpg",
 49 |         "2007_000241.jpg",
 50 |         "2007_006660.jpg",
 51 |         "2007_006673.jpg",
 52 |         "2007_003910.jpg",
 53 |         "2007_002234.jpg",
 54 |         "2007_001487.jpg",
 55 |         "2007_001698.jpg"
 56 |     ],
 57 |     "4": [
 58 |         "2007_006409.jpg",
 59 |         "2007_004476.jpg",
 60 |         "2007_003207.jpg",
 61 |         "2007_000170.jpg",
 62 |         "2007_006483.jpg",
 63 |         "2007_002545.jpg",
 64 |         "2007_003451.jpg",
 65 |         "2007_004291.jpg",
 66 |         "2007_001185.jpg",
 67 |         "2007_002953.jpg",
 68 |         "2007_003431.jpg",
 69 |         "2007_007250.jpg",
 70 |         "2007_001602.jpg",
 71 |         "2007_000250.jpg"
 72 |     ],
 73 |     "5": [
 74 |         "2007_004705.jpg",
 75 |         "2007_004065.jpg",
 76 |         "2007_001595.jpg",
 77 |         "2007_007003.jpg",
 78 |         "2007_000768.jpg",
 79 |         "2007_003715.jpg",
 80 |         "2007_002024.jpg",
 81 |         "2007_002216.jpg",
 82 |         "2007_005262.jpg"
 83 |     ],
 84 |     "6": [
 85 |         "2007_002281.jpg",
 86 |         "2007_004481.jpg",
 87 |         "2007_004810.jpg",
 88 |         "2007_005988.jpg",
 89 |         "2007_002370.jpg",
 90 |         "2011_001004.jpg",
 91 |         "2007_003815.jpg",
 92 |         "2007_002789.jpg",
 93 |         "2007_006151.jpg",
 94 |         "2007_004830.jpg",
 95 |         "2007_001857.jpg"
 96 |     ],
 97 |     "7": [
 98 |         "2007_005688.jpg",
 99 |         "2007_001825.jpg",
100 |         "2007_002760.jpg",
101 |         "2007_000549.jpg",
102 |         "2007_003778.jpg",
103 |         "2007_000528.jpg",
104 |         "2007_003788.jpg",
105 |         "2007_006303.jpg",
106 |         "2011_000999.jpg",
107 |         "2007_004998.jpg",
108 |         "2007_000876.jpg",
109 |         "2007_003525.jpg"
110 |     ],
111 |     "8": [
112 |         "2007_006004.jpg",
113 |         "2007_005212.jpg",
114 |         "2007_003541.jpg",
115 |         "2007_006477.jpg",
116 |         "2007_004166.jpg",
117 |         "2007_005647.jpg",
118 |         "2007_003251.jpg",
119 |         "2007_001609.jpg",
120 |         "2007_006530.jpg",
121 |         "2007_001901.jpg",
122 |         "2007_001340.jpg",
123 |         "2007_005266.jpg",
124 |         "2007_000063.jpg",
125 |         "2007_003205.jpg",
126 |         "2007_003889.jpg",
127 |         "2007_006066.jpg",
128 |         "2007_002368.jpg",
129 |         "2007_005086.jpg",
130 |         "2007_001439.jpg"
131 |     ],
132 |     "9": [
133 |         "2007_002669.jpg",
134 |         "2007_000904.jpg",
135 |         "2007_001764.jpg",
136 |         "2007_000504.jpg",
137 |         "2007_001917.jpg",
138 |         "2007_004537.jpg",
139 |         "2007_004081.jpg",
140 |         "2007_004500.jpg",
141 |         "2007_002088.jpg",
142 |         "2007_001073.jpg",
143 |         "2007_005797.jpg",
144 |         "2007_005124.jpg"
145 |     ],
146 |     "10": [
147 |         "2007_002914.jpg",
148 |         "2007_005790.jpg",
149 |         "2007_003529.jpg",
150 |         "2007_006699.jpg",
151 |         "2007_002668.jpg",
152 |         "2007_003668.jpg",
153 |         "2007_001834.jpg"
154 |     ],
155 |     "11": [
156 |         "2007_002611.jpg",
157 |         "2007_000720.jpg",
158 |         "2007_003604.jpg",
159 |         "2007_001397.jpg",
160 |         "2007_002055.jpg",
161 |         "2007_001225.jpg",
162 |         "2007_009605.jpg",
163 |         "2007_009327.jpg",
164 |         "2007_007585.jpg",
165 |         "2007_007930.jpg"
166 |     ],
167 |     "12": [
168 |         "2007_001724.jpg",
169 |         "2007_002273.jpg",
170 |         "2007_006445.jpg",
171 |         "2007_000392.jpg",
172 |         "2007_003189.jpg",
173 |         "2007_005248.jpg",
174 |         "2007_000836.jpg",
175 |         "2007_001960.jpg",
176 |         "2007_001420.jpg",
177 |         "2007_006134.jpg"
178 |     ],
179 |     "13": [
180 |         "2007_002488.jpg",
181 |         "2007_005989.jpg",
182 |         "2007_000822.jpg",
183 |         "2007_005951.jpg",
184 |         "2007_004003.jpg",
185 |         "2007_005314.jpg",
186 |         "2007_000733.jpg",
187 |         "2007_001709.jpg",
188 |         "2007_000364.jpg",
189 |         "2007_005878.jpg",
190 |         "2007_002105.jpg"
191 |     ],
192 |     "14": [
193 |         "2007_002895.jpg",
194 |         "2007_002639.jpg",
195 |         "2007_002361.jpg",
196 |         "2007_002954.jpg",
197 |         "2007_004289.jpg",
198 |         "2007_004707.jpg",
199 |         "2007_002293.jpg",
200 |         "2007_002142.jpg"
201 |     ],
202 |     "15": [
203 |         "2007_004948.jpg",
204 |         "2007_002967.jpg",
205 |         "2007_001149.jpg"
206 |     ],
207 |     "16": [
208 |         "2007_001872.jpg",
209 |         "2007_003190.jpg",
210 |         "2007_001416.jpg",
211 |         "2007_006136.jpg",
212 |         "2007_002845.jpg",
213 |         "2007_004423.jpg",
214 |         "2007_003593.jpg",
215 |         "2007_004768.jpg",
216 |         "2007_006832.jpg",
217 |         "2007_006899.jpg"
218 |     ],
219 |     "17": [
220 |         "2007_008203.jpg"
221 |     ],
222 |     "18": [
223 |         "2007_004627.jpg",
224 |         "2007_004663.jpg",
225 |         "2007_003286.jpg",
226 |         "2007_006254.jpg",
227 |         "2007_006400.jpg",
228 |         "2007_005360.jpg",
229 |         "2007_002462.jpg",
230 |         "2007_003178.jpg",
231 |         "2007_004951.jpg",
232 |         "2007_000333.jpg"
233 |     ],
234 |     "19": [
235 |         "2007_000121.jpg",
236 |         "2007_006704.jpg",
237 |         "2007_005210.jpg",
238 |         "2007_000039.jpg",
239 |         "2007_005902.jpg",
240 |         "2007_001704.jpg"
241 |     ]
242 | }
--------------------------------------------------------------------------------
/data/voc2012/1k_1_shot.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "0": [
 3 |         "2007_003876.jpg"
 4 |     ],
 5 |     "1": [
 6 |         "2007_005702.jpg"
 7 |     ],
 8 |     "2": [
 9 |         "2007_003565.jpg"
10 |     ],
11 |     "3": [
12 |         "2007_006673.jpg"
13 |     ],
14 |     "4": [
15 |         "2007_003207.jpg"
16 |     ],
17 |     "5": [
18 |         "2007_002216.jpg"
19 |     ],
20 |     "6": [
21 |         "2007_005273.jpg"
22 |     ],
23 |     "7": [
24 |         "2007_003778.jpg"
25 |     ],
26 |     "8": [
27 |         "2007_000063.jpg"
28 |     ],
29 |     "9": [
30 |         "2007_002669.jpg"
31 |     ],
32 |     "10": [
33 |         "2007_002668.jpg"
34 |     ],
35 |     "11": [
36 |         "2007_004166.jpg"
37 |     ],
38 |     "12": [
39 |         "2007_001420.jpg"
40 |     ],
41 |     "13": [
42 |         "2007_001709.jpg"
43 |     ],
44 |     "14": [
45 |         "2007_000648.jpg"
46 |     ],
47 |     "15": [
48 |         "2007_006303.jpg"
49 |     ],
50 |     "16": [
51 |         "2007_002845.jpg"
52 |     ],
53 |     "17": [
54 |         "2007_001901.jpg"
55 |     ],
56 |     "18": [
57 |         "2007_006400.jpg"
58 |     ],
59 |     "19": [
60 |         "2007_000039.jpg"
61 |     ]
62 | }
63 | 
--------------------------------------------------------------------------------
/data/voc2012/label2id.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "aeroplane": 1,
 3 |     "bicycle": 2,
 4 |     "bird": 3,
 5 |     "boat": 4,
 6 |     "bottle": 5,
 7 |     "bus": 6,
 8 |     "car": 7,
 9 |     "cat": 8,
10 |     "chair": 9,
11 |     "cow": 10,
12 |     "diningtable": 11,
13 |     "dog": 12,
14 |     "horse": 13,
15 |     "motorbike": 14,
16 |     "person": 15,
17 |     "pottedplant": 16,
18 |     "sheep": 17,
19 |     "sofa": 18,
20 |     "train": 19,
21 |     "tvmonitor": 20
22 | }
--------------------------------------------------------------------------------
/data/voc2012/labels.txt:
--------------------------------------------------------------------------------
 1 | 0	__background__
 2 | 1	aeroplane
 3 | 2	bicycle
 4 | 3	bird
 5 | 4	boat
 6 | 5	bottle
 7 | 6	bus
 8 | 7	car
 9 | 8	cat
10 | 9	chair
11 | 10	cow
12 | 11	diningtable
13 | 12	dog
14 | 13	horse
15 | 14	motorbike
16 | 15	person
17 | 16	pottedplant
18 | 17	sheep
19 | 18	sofa
20 | 19	train
21 | 20	tvmonitor
--------------------------------------------------------------------------------
/data/voc2012/train_cls-10shot.txt:
--------------------------------------------------------------------------------
  1 | 2007_000032 0 14 
  2 | 2007_000039 19 
  3 | 2007_000063 8 11 
  4 | 2007_000068 2 
  5 | 2007_000121 19 
  6 | 2007_000170 4 14 
  7 | 2007_000241 3 
  8 | 2007_000243 0 
  9 | 2007_000250 4 10 
 10 | 2007_000256 0 
 11 | 2007_000333 18 
 12 | 2007_000363 2 
 13 | 2007_000364 13 14 
 14 | 2007_000392 12 14 
 15 | 2007_000480 0 14 
 16 | 2007_000504 9 14 
 17 | 2007_000515 1 6 14 
 18 | 2007_000528 7 
 19 | 2007_000549 7 
 20 | 2007_000584 1 17 
 21 | 2007_000645 2 
 22 | 2007_000648 0 5 6 14 
 23 | 2007_000713 3 
 24 | 2007_000720 11 
 25 | 2007_000733 13 14 
 26 | 2007_000738 0 
 27 | 2007_000768 5 
 28 | 2007_000793 1 5 14 
 29 | 2007_000822 13 
 30 | 2007_000836 12 14 
 31 | 2007_000876 7 
 32 | 2007_000904 9 12 14 
 33 | 2007_001027 1 8 17 19 
 34 | 2007_001073 9 
 35 | 2007_001149 15 17 19 
 36 | 2007_001185 4 7 10 14 
 37 | 2007_001225 11 
 38 | 2007_001340 8 11 14 
 39 | 2007_001397 11 
 40 | 2007_001416 16 
 41 | 2007_001420 12 14 15 
 42 | 2007_001439 8 10 
 43 | 2007_001487 3 
 44 | 2007_001595 5 
 45 | 2007_001602 4 
 46 | 2007_001609 8 10 
 47 | 2007_001698 3 
 48 | 2007_001704 19 
 49 | 2007_001709 13 14 
 50 | 2007_001724 12 
 51 | 2007_001764 9 
 52 | 2007_001825 7 11 
 53 | 2007_001834 10 
 54 | 2007_001857 6 14 
 55 | 2007_001872 16 
 56 | 2007_001901 8 10 17 
 57 | 2007_001917 9 
 58 | 2007_001960 12 
 59 | 2007_002024 5 14 
 60 | 2007_002055 11 14 17 
 61 | 2007_002088 9 
 62 | 2007_002099 0 
 63 | 2007_002105 13 14 
 64 | 2007_002107 0 
 65 | 2007_002120 2 14 
 66 | 2007_002142 14 
 67 | 2007_002198 0 
 68 | 2007_002212 2 
 69 | 2007_002216 5 
 70 | 2007_002227 1 19 
 71 | 2007_002234 3 
 72 | 2007_002273 12 14 
 73 | 2007_002281 6 14 
 74 | 2007_002293 14 
 75 | 2007_002361 14 15 
 76 | 2007_002368 8 10 17 
 77 | 2007_002370 6 14 
 78 | 2007_002403 2 3 14 
 79 | 2007_002462 18 
 80 | 2007_002488 13 14 
 81 | 2007_002545 4 14 17 
 82 | 2007_002611 11 14 
 83 | 2007_002639 14 
 84 | 2007_002668 10 14 15 
 85 | 2007_002669 9 14 
 86 | 2007_002760 7 
 87 | 2007_002789 6 9 14 
 88 | 2007_002845 16 
 89 | 2007_002895 14 
 90 | 2007_002896 2 
 91 | 2007_002914 10 14 
 92 | 2007_002953 4 19 
 93 | 2007_002954 14 
 94 | 2007_002967 15 
 95 | 2007_003000 0 
 96 | 2007_003118 2 14 
 97 | 2007_003178 18 
 98 | 2007_003189 12 14 15 
 99 | 2007_003190 16 
100 | 2007_003205 8 14 
101 | 2007_003207 4 
102 | 2007_003251 8 10 
103 | 2007_003267 2 
104 | 2007_003286 18 
105 | 2007_003330 2 
106 | 2007_003431 4 14 
107 | 2007_003451 4 8 17 19 
108 | 2007_003525 7 
109 | 2007_003529 10 14 
110 | 2007_003541 8 14 
111 | 2007_003565 2 
112 | 2007_003593 16 
113 | 2007_003604 11 19 
114 | 2007_003668 10 
115 | 2007_003715 5 
116 | 2007_003778 7 15 
117 | 2007_003788 7 8 
118 | 2007_003815 6 14 
119 | 2007_003876 0 
120 | 2007_003889 8 12 14 
121 | 2007_003910 3 14 
122 | 2007_004003 13 
123 | 2007_004009 0 
124 | 2007_004065 5 14 
125 | 2007_004081 9 15 
126 | 2007_004166 8 10 11 17 19 
127 | 2007_004289 14 19 
128 | 2007_004291 4 14 
129 | 2007_004423 16 
130 | 2007_004476 4 14 
131 | 2007_004481 6 14 
132 | 2007_004500 9 
133 | 2007_004537 9 12 14 
134 | 2007_004627 18 
135 | 2007_004663 18 
136 | 2007_004705 5 6 
137 | 2007_004707 14 15 
138 | 2007_004768 16 
139 | 2007_004769 1 14 
140 | 2007_004810 6 14 
141 | 2007_004830 6 14 
142 | 2007_004948 15 
143 | 2007_004951 18 
144 | 2007_004998 7 
145 | 2007_005064 1 14 
146 | 2007_005086 8 10 14 
147 | 2007_005124 9 14 
148 | 2007_005210 19 
149 | 2007_005212 8 13 17 
150 | 2007_005248 12 14 
151 | 2007_005262 5 6 
152 | 2007_005266 8 15 17 
153 | 2007_005273 1 6 14 
154 | 2007_005314 13 
155 | 2007_005360 18 
156 | 2007_005368 1 14 
157 | 2007_005430 1 4 14 
158 | 2007_005647 8 10 17 
159 | 2007_005688 7 
160 | 2007_005702 1 14 
161 | 2007_005790 10 14 
162 | 2007_005797 9 14 
163 | 2007_005878 13 
164 | 2007_005902 19 
165 | 2007_005951 13 14 
166 | 2007_005988 6 11 14 
167 | 2007_005989 13 14 
168 | 2007_006004 8 14 
169 | 2007_006066 8 17 19 
170 | 2007_006134 12 
171 | 2007_006136 16 
172 | 2007_006151 6 12 14 
173 | 2007_006254 18 
174 | 2007_006281 3 
175 | 2007_006303 7 15 
176 | 2007_006317 1 5 14 
177 | 2007_006400 18 
178 | 2007_006409 4 10 14 
179 | 2007_006445 12 14 
180 | 2007_006477 8 14 
181 | 2007_006483 4 14 
182 | 2007_006490 2 3 14 
183 | 2007_006530 8 17 
184 | 2007_006660 3 6 
185 | 2007_006673 3 
186 | 2007_006699 10 14 
187 | 2007_006704 19 
188 | 2007_006832 16 
189 | 2007_006899 16 
190 | 2007_007003 5 6 
191 | 2007_007250 4 19 
192 | 2007_007585 11 
193 | 2007_007930 11 
194 | 2007_008203 17 
195 | 2007_009327 11 14 
196 | 2007_009605 11 
197 | 2007_009607 2 
198 | 2007_009759 2 
199 | 2011_000999 7 
200 | 2011_001004 6 
201 | 
--------------------------------------------------------------------------------
/data/voc2012/train_cls-1shot.txt:
--------------------------------------------------------------------------------
 1 | 2007_000039 19 
 2 | 2007_000063 8 11 
 3 | 2007_000648 0 5 6 14 
 4 | 2007_001420 12 14 15 
 5 | 2007_001709 13 14 
 6 | 2007_001901 8 10 17 
 7 | 2007_002216 5 
 8 | 2007_002668 10 14 15 
 9 | 2007_002669 9 14 
10 | 2007_002845 16 
11 | 2007_003207 4 
12 | 2007_003565 2 
13 | 2007_003778 7 15 
14 | 2007_003876 0 
15 | 2007_004166 8 10 11 17 19 
16 | 2007_005273 1 6 14 
17 | 2007_005702 1 14 
18 | 2007_006303 7 15 
19 | 2007_006400 18 
20 | 2007_006673 3 
21 | 
--------------------------------------------------------------------------------
/detection/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import sys, uuid
  4 | import tempfile
  5 | from detectron2 import model_zoo
  6 | from detectron2.config import get_cfg
  7 | from detectron2.engine import launch
  8 | from utils import setup_cfg, infer, Trainer
  9 | from pathlib import Path
 10 | import ujson as json
 11 | 
 12 | def parse_args():
 13 |     parser = argparse.ArgumentParser()
 14 |     parser.add_argument("--train_dataset", "-s", type=str, required=True, choices=["voc_train", "syn", "coco_train"])
 15 |     parser.add_argument("--train_dir", type=str, required=False, help="if unspecified, use default path")
 16 |     parser.add_argument("--train_coconame", type=str, required=False, help="if unspecified, use default name")
 17 |     parser.add_argument("--syn_dir", type=str, required=False, default="NOT_USED",
 18 |                     help="synthic training data folder, contains `images` for images and `COCO.json` for COCO format annotation and `label2id.json` for labels")
 19 |     parser.add_argument("--additional_dataset", nargs="+", help="when use multiple dataset other than -s, put more heavy dataset in here")
 20 | 
 21 |     parser.add_argument("--test_dataset", "-t", type=str, choices=["voc_val", "coco_val"])
 22 |     parser.add_argument("--test_dir", type=str, required=False, help="if unspecified, use default path")
 23 | 
 24 |     parser.add_argument("--lr", type=float, default=0.001)
 25 |     parser.add_argument("--wd", type=float, default=0.0005)
 26 |     parser.add_argument("--bsz", type=int, default=4)
 27 |     parser.add_argument("--freeze", default=False, action="store_true")
 28 |     parser.add_argument("--data_aug", default=False, action="store_true", help="data augmentation on synthetic data, RandomContrast etc not including crop, use crop only when --crop")
 29 |     parser.add_argument("--crop", default=False, action="store_true")
 30 |     parser.add_argument("--epoch", type=int, default=20)
 31 | 
 32 |     parser.add_argument("--seed", default=42, type=int)
 33 |     parser.add_argument("--debug", default=False, action="store_true", help="if true, don't log in wandb")
 34 | 
 35 |     parser.add_argument("--resnet", choices=[50, 101], default=50, type=int, help="whether use R101 or R50")
 36 | 
 37 |     parser.add_argument("--preview", default=False, action="store_true")
 38 |     
 39 |     parser.add_argument("--init_checkpoint", default=None, type=str)
 40 | 
 41 |     parser.add_argument("--eval_checkpoint", default=None, type=str)
 42 |     parser.add_argument("--eval_threshold", default=0.7, type=float)
 43 | 
 44 |     parser.add_argument("--num_gpus_per_machine", "-g", type=int, default=1, help="number of gpus *per machine*")
 45 |     parser.add_argument("--num_machines", type=int, default=1, help="total number of machines")
 46 | 
 47 |     args = parser.parse_args()
 48 |     return args
 49 | 
 50 | def filter(json_file, td):
 51 |     """
 52 |     filter out instance seg annotation but only object detection one
 53 |     """
 54 |     with open(json_file) as f:
 55 |         data = json.load(f)
 56 |     newanno = []
 57 |     for anno in data["annotations"]:
 58 |         if len(anno["segmentation"]) == 0:
 59 |             # already no seg
 60 |             return json_file
 61 |         anno['segmentation'] = []
 62 |         newanno.append(anno)
 63 |     data["annotations"] = newanno
 64 |     id = str(uuid.uuid4())
 65 |     os.makedirs(Path(td) / id)
 66 |     json_file = Path(td) / id / "COCO.json"
 67 |     with open(json_file, "w") as f:
 68 |         json.dump(data, f)
 69 |     return json_file
 70 | 
 71 | def fetch_cfg(args):
 72 |     cfg = get_cfg()
 73 | 
 74 |     cfg.merge_from_file(model_zoo.get_config_file(f"COCO-Detection/faster_rcnn_R_{args.resnet}_FPN_3x.yaml"))
 75 |     if args.init_checkpoint is not None:
 76 |         print("loading from ckpt:", args.init_checkpoint)
 77 |         if "PT_DATA_DIR" in os.environ:
 78 |             args.init_checkpoint = os.path.join(os.environ["PT_DATA_DIR"], args.init_checkpoint)
 79 |         cfg.MODEL.WEIGHTS = args.init_checkpoint
 80 |     else:
 81 |         cfg.MODEL.WEIGHTS = f"detectron2://ImageNetPretrained/MSRA/R-{args.resnet}.pkl"
 82 |     return cfg
 83 | 
 84 | def main(args):
 85 |     cfg = fetch_cfg(args)
 86 |     cfg = setup_cfg(args, cfg, filter=filter)
 87 |     if args.eval_checkpoint is not None:
 88 |         infer(cfg)
 89 |         sys.exit(0)
 90 | 
 91 |     Trainer.data_aug = args.data_aug
 92 |     Trainer.debug = args.debug
 93 |     Trainer.project_name = "dalle-for-detection"
 94 |     trainer = Trainer(cfg)
 95 |     trainer.resume_or_load(resume=False)
 96 |     trainer.train()
 97 | 
 98 | if __name__ == "__main__":
 99 |     args = parse_args()
100 | 
101 |     with tempfile.TemporaryDirectory() as td:
102 |         args.td = td
103 |         launch(
104 |             main, num_gpus_per_machine=args.num_gpus_per_machine,
105 |             num_machines=args.num_machines, machine_rank=0, dist_url="auto", args=(args, )
106 |         )
--------------------------------------------------------------------------------
/detection/wandb_writer.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Union
 2 | 
 3 | from detectron2.config import CfgNode
 4 | from detectron2.utils.events import EventWriter, get_event_storage
 5 | 
 6 | 
 7 | class WandbWriter(EventWriter):
 8 |     """
 9 |     Write all scalars to a wandb tool.
10 |     """
11 | 
12 |     def __init__(
13 |         self,
14 |         project: str = "detectron2",
15 |         config: Union[Dict, CfgNode] = {},
16 |         window_size: int = 20,
17 |         **kwargs,
18 |     ):
19 |         """
20 |         Args:
21 |             project (str): W&B Project name
22 |             config Union[Dict, CfgNode]: the project level configuration object
23 |             window_size (int): the scalars will be median-smoothed by this window size
24 |             kwargs: other arguments passed to `wandb.init(...)`
25 |         """
26 |         import wandb
27 |         wandb.login(key='YOUR API KEY')
28 | 
29 |         self._window_size = window_size
30 |         self._run = (
31 |             wandb.init(project=project, config=config, **kwargs) if not wandb.run else wandb.run
32 |         )
33 |         self._run._label(repo="detectron2")
34 | 
35 |     def write(self):
36 |         storage = get_event_storage()
37 | 
38 |         log_dict = {}
39 |         for k, (v, _) in storage.latest_with_smoothing_hint(self._window_size).items():
40 |             log_dict[k] = v
41 | 
42 |         self._run.log(log_dict)
43 | 
44 |     def close(self):
45 |         self._run.finish()
--------------------------------------------------------------------------------
/instance_seg/run.sh:
--------------------------------------------------------------------------------
 1 | syn=$1
 2 | resnets=$2
 3 | lrs=$3
 4 | if [[ -d $syn ]]; then
 5 |    echo "$syn exists"
 6 | else
 7 |     exit -1;
 8 | fi;
 9 | 
10 | for resnet in $(echo $resnets | tr "," "\n"); do
11 |     for lr in $(echo $lrs | tr "," "\n"); do
12 |         echo "$resnet with $lr"
13 |         /lab/andy/anaconda3/envs/paste-segment/bin/python seg.py \
14 |             -s syn -t voc_val \
15 |             --blending gaussian \
16 |             --lr $lr \
17 |             --freeze --data_aug --crop \
18 |             --epoch 20 \
19 |             --resnet $resnet \
20 |             --syn_dir $syn;
21 |     done;
22 | done;
--------------------------------------------------------------------------------
/instance_seg/seg.py:
--------------------------------------------------------------------------------
 1 | import os, cv2
 2 | import sys
 3 | sys.path.insert(1, os.path.join(sys.path[0], "../", 'detection'))
 4 | import tempfile
 5 | 
 6 | from detectron2 import model_zoo
 7 | from detectron2.config import get_cfg
 8 | from utils import setup_cfg, infer, Trainer
 9 | from train import parse_args
10 | 
11 | def fetch_cfg(args):
12 |     cfg = get_cfg()
13 | 
14 |     cfg.merge_from_file(model_zoo.get_config_file(f"COCO-InstanceSegmentation/mask_rcnn_R_{args.resnet}_FPN_3x.yaml"))
15 |     # cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml")
16 |     # from scratch
17 |     cfg.MODEL.WEIGHTS = f"detectron2://ImageNetPretrained/MSRA/R-{args.resnet}.pkl"
18 |     return cfg
19 | 
20 | if __name__ == "__main__":
21 |     args = parse_args()
22 | 
23 |     with tempfile.TemporaryDirectory() as td:
24 |         args.td = td
25 |         cfg = fetch_cfg(args)
26 |         cfg = setup_cfg(args, cfg)
27 |         if args.eval_checkpoint is not None:
28 |             infer(cfg)
29 |             sys.exit(0)
30 | 
31 |         Trainer.data_aug = args.data_aug
32 |         Trainer.debug = args.debug
33 |         Trainer.project_name = "paste-seg-instance"
34 |         trainer = Trainer(cfg)
35 |         trainer.resume_or_load(resume=False)
36 |         trainer.train()
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # download gpu version if possible
 2 | # torch==1.10.1
 3 | # torchvision==0.11.2
 4 | # download from https://detectron2.readthedocs.io/en/latest/tutorials/install.html
 5 | # detectron2
 6 | # needed to transform on syn COCO segmentation RLE
 7 | shapely
 8 | 
 9 | numpy
10 | scikit-image==0.18.0
11 | scikit-learn
12 | Pillow
13 | tqdm
14 | opencv-python
15 | wandb
16 | pandas
17 | pycocotools
18 | hydra-core
19 | # weird issue with detectron2
20 | setuptools==59.5.0
21 | # for faster json speedup
22 | ujson
23 | # fix weird bug
24 | omegaconf==2.1
25 | transformers==4.22.2
26 | diffusers==0.9.0
27 | kornia
28 | timm
--------------------------------------------------------------------------------
/t2i_generate/background_captions.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Create prompt to get pure backgrounds
 3 | """
 4 | import json
 5 | 
 6 | templates = [
 7 |     "A real photo of {obj}",
 8 | ]
 9 | classnames = [
10 |     # indoor objects
11 |     "empty living room", "empty kitchen",
12 |     # vehicle
13 |     "blue sky", "empty city street, color", "empty city road, color", "empty lake", "empty sea", "railway without train", "empty railway, color",
14 |     # animal
15 |     "trees", "forest", "empty street, colored", "farms", "nature", "empty farm", "stable"
16 | ]
17 | 
18 | to_save = {"background": {
19 |     class_: [] for class_ in classnames
20 | }}
21 | for class_ in classnames:
22 |     for temp in templates:
23 |         print(temp.format(obj=class_))
24 |     to_save["background"][class_] = [
25 |         temp.format(obj=class_)
26 |         for temp in templates
27 |     ]
28 | with open("background_templates.json", "w") as f:
29 |     json.dump(to_save, f)
--------------------------------------------------------------------------------
/t2i_generate/foreground_captions.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | templates = [
 4 |     "a photo of {obj}",
 5 |     "a realistic photo of {obj}",
 6 |     "a photo of {obj} in pure background",
 7 |     "{obj} in a white background",
 8 |     "{obj} without background",
 9 |     "{obj} isolated on white background",
10 | ]
11 | 
12 | classnames = [
13 |     # 'a truck', 'a traffic light', 'a fire hydrant', 'a stop sign', 'a parking meter', 'a bench',
14 |     # 'an elephant', 'a bear', 'a zebra', 'a giraffe', 'a backpack', 'an umbrella',
15 |     # 'a handbag', 'a tie', 'a suitcase', 'a frisbee', 'a ski', 'a snowboard', 'a sports ball', 'a kite', 'a baseball bat',
16 |     # 'a baseball glove', 'a skateboard', 'a surfboard', 'a tennis racket', 'a wine glass', 'a cup', 'a fork',
17 |     # 'a knife', 'a spoon', 'a bowl', 'a banana', 'an apple', 'a sandwich', 'an orange', 'a broccoli', 'a carrot', 'a hot dog',
18 |     # 'a pizza', 'a donut', 'a cake', 'a couch', 'a bed', 'a toilet',
19 |     # 'a laptop', 'a computer mouse', 'an electronic remote', 'a keyboard', 'a cell phone', 'a microwave', 'an oven', 'a toaster', 'a sink', 'a refrigerator',
20 |     # 'a book', 'a clock', 'a vase', 'a scissors', 'a teddy bear', 'a hair drier', 'a toothbrush',
21 |     # above are 60 classes, used in COCO
22 |     "a person", "a man", "a woman",
23 |     "a bird", "a cat", "a cow", "a dog", "a horse", "a sheep", 
24 |     "an airplane", 
25 |     "a TV", "a monitor", "an old monitor", "a dining table", "a table", 
26 |     "a bicycle", "a boat", "a bus", "a car", "a motorbike", "a train",
27 |     "a bottle", "a chair", "a dining table", "a potted plant", "a sofa", "a tv monitor"
28 | ]
29 | # ]
30 | 
31 | to_save = {"foreground": {
32 |     class_: [] for class_ in classnames
33 | }}
34 | for class_ in classnames:
35 |     for temp in templates:
36 |         print(temp.format(obj=class_))
37 |     to_save["foreground"][class_] = [
38 |         temp.format(obj=class_)
39 |         for temp in templates
40 |     ]
41 | with open("foreground_templates.json", "w") as f:
42 |     json.dump(to_save, f)
--------------------------------------------------------------------------------
/t2i_generate/stable_diffusion2.py:
--------------------------------------------------------------------------------
 1 | # make sure you're logged in with `huggingface-cli login`
 2 | import argparse
 3 | import json, os
 4 | from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
 5 | import torch
 6 | import numpy as np
 7 | 
 8 | def parse_args():
 9 |     parser = argparse.ArgumentParser()
10 |     parser.add_argument("--idx", type=int)
11 |     parser.add_argument("--bsz", type=int, default=8)
12 |     parser.add_argument("--num_gen_images_per_caption", "-n", type=int, default=20)
13 |     parser.add_argument("--caption_json", default="./data", help="if not '', will only generate DallE images from this json, use `idx` and `scene` to select which to generate")
14 |     parser.add_argument("--num_clusters", default=100, type=int, help="when using RuDalle, split all captions into `num_clusters` chunk and let each machine handle one chunk only")
15 | 
16 |     parser.add_argument("--output_dir", default="")
17 |     args = parser.parse_args()
18 |     if 'PT_DATA_DIR' in os.environ:
19 |         args.output_dir = os.path.join(os.environ['PT_DATA_DIR'], args.output_dir)
20 |     return args
21 | 
22 | def batchify(lst, n):
23 |     """Yield successive n-sized chunks from lst."""
24 |     for i in range(0, len(lst), n):
25 |         yield lst[i:i + n]
26 | 
27 | if __name__ == "__main__":
28 |     args = parse_args()
29 | 
30 |     with open(args.caption_json) as f:
31 |         data = json.load(f)
32 | 
33 |     key = next(iter(data))
34 |     # key = "JPEGImages"
35 | 
36 |     data = data[key]
37 | 
38 |     all_keys = sorted(list(data.keys()))
39 |     all_chunks = np.array_split(all_keys, args.num_clusters)
40 |     chunks = all_chunks[args.idx]
41 | 
42 |     pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1", torch_type=torch.float16)
43 |     pipe = pipe.to("cuda")
44 | 
45 |     n_already_generated = 0
46 |     n_generate_per_cycle = 4
47 |     while n_already_generated < args.num_gen_images_per_caption:
48 |         for id in chunks:
49 |             for cap in data[id]:
50 |                 prompts = [cap] * n_generate_per_cycle
51 |                 cap = cap[:50] # too long captions will cause path error
52 |                 cap = cap.replace('"', "") # server don't like ", will map to %2522
53 |                 os.makedirs(os.path.join(args.output_dir, id, cap), exist_ok=True)
54 |                 cur_i = len(list(os.listdir(os.path.join(args.output_dir, id, cap)))) + 1
55 |                 for prompt_chunk in batchify(prompts, n=args.bsz):
56 |                     x = pipe(prompt_chunk)
57 |                     images = x.images
58 |                     for img in images:
59 |                         cur_i += 1
60 |                         # img.resize((256, 256)).save(os.path.join(args.output_dir, id, cap, f"{cur_i}.png"))
61 |                         img.save(os.path.join(args.output_dir, id, cap, f"{cur_i}.png"))
62 | 
63 |         n_already_generated += n_generate_per_cycle
--------------------------------------------------------------------------------
/viz/viz.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os, sys
 3 | import random
 4 | from pathlib import Path
 5 | 
 6 | import cv2
 7 | import matplotlib.pyplot as plt
 8 | from detectron2.data import DatasetCatalog
 9 | from detectron2.data.datasets import register_coco_instances
10 | from detectron2.utils.logger import setup_logger
11 | from detectron2.utils.visualizer import Visualizer
12 | 
13 | pwd = Path(__file__).parent.resolve()
14 | output = pwd / "out"
15 | 
16 | voc_dir = pwd.parent / "data/voc2012" / "VOC2012"
17 | with open(voc_dir.parent / "label2id.json") as f:
18 |     label2id = json.load(f)
19 | 
20 | artifact_dir = Path(sys.argv[1])
21 | assert artifact_dir.exists()
22 | print(artifact_dir)
23 | 
24 | output = output / artifact_dir.stem
25 | os.makedirs(output, exist_ok=True)
26 | coco_name = "COCO.json"
27 | register_coco_instances("synthetic_train", metadata=label2id, json_file=str(artifact_dir / coco_name),
28 |                         image_root=str(artifact_dir))
29 | setup_logger()
30 | 
31 | ds = "synthetic_train"
32 | # ds = "VOC_test"
33 | data = DatasetCatalog.get(ds)
34 | for i, d in enumerate(random.sample(data, 30)):
35 |     img = cv2.imread(d["file_name"])
36 |     visualizer = Visualizer(img[:, :, ::-1],
37 |                             scale=0.5)
38 |     # metadata=balloon_metadata, scale=0.5)
39 |     out = visualizer.draw_dataset_dict(d)
40 |     plt.figure(1, figsize=(10, 8))
41 |     plt.subplot(1, 2, 1)
42 |     plt.title(d['file_name'])
43 |     plt.imshow(out.get_image())
44 |     plt.subplot(1, 2, 2)
45 |     plt.imshow(img[:, :, ::-1])
46 |     plt.title("RGB")
47 |     plt.tight_layout()
48 |     plt.savefig(output / f"demo{i}.png")
49 |     plt.show()
--------------------------------------------------------------------------------