├── .gitignore
├── README.md
├── assets
├── overview.png
└── results.png
├── cutpaste
├── __init__.py
├── anno.py
├── background.py
├── clip_postprocess.py
├── config
│ ├── bg
│ │ └── VOC
│ │ │ └── 1shot
│ │ │ ├── bg_template.yaml
│ │ │ ├── bg_template_plus_dalle.yaml
│ │ │ └── real.yaml
│ ├── config.yaml
│ ├── ds
│ │ └── VOC.yaml
│ ├── exp
│ │ ├── demo_cutpaste.yaml
│ │ ├── demo_syn+real.yaml
│ │ ├── demo_syn.yaml
│ │ └── demo_synFg.yaml
│ ├── fg
│ │ └── VOC
│ │ │ ├── 1shot
│ │ │ └── real.yaml
│ │ │ └── fg_template.yaml
│ └── paster
│ │ └── defaults.yaml
├── foreground.py
├── paste.py
├── paster.py
├── pb.py
├── pyblur3
│ ├── BoxBlur.py
│ ├── DefocusBlur.py
│ ├── GaussianBlur.py
│ ├── LineDictionary.py
│ ├── LinearMotionBlur.py
│ ├── PsfBlur.py
│ ├── RandomizedBlur.py
│ ├── __init__.py
│ └── psf.pkl
└── utils.py
├── data
├── test_data
│ ├── VOC2012
│ │ ├── Annotations
│ │ │ ├── 2007_000039.xml
│ │ │ ├── 2007_000063.xml
│ │ │ ├── 2007_000648.xml
│ │ │ ├── 2007_001420.xml
│ │ │ ├── 2007_001709.xml
│ │ │ ├── 2007_001901.xml
│ │ │ ├── 2007_002216.xml
│ │ │ ├── 2007_002668.xml
│ │ │ ├── 2007_002669.xml
│ │ │ ├── 2007_002845.xml
│ │ │ ├── 2007_003207.xml
│ │ │ ├── 2007_003565.xml
│ │ │ ├── 2007_003778.xml
│ │ │ ├── 2007_003876.xml
│ │ │ ├── 2007_004166.xml
│ │ │ ├── 2007_005273.xml
│ │ │ ├── 2007_005702.xml
│ │ │ ├── 2007_006303.xml
│ │ │ ├── 2007_006400.xml
│ │ │ └── 2007_006673.xml
│ │ ├── JPEGImages
│ │ │ ├── 2007_000039.jpg
│ │ │ ├── 2007_000063.jpg
│ │ │ ├── 2007_000648.jpg
│ │ │ ├── 2007_001420.jpg
│ │ │ ├── 2007_001709.jpg
│ │ │ ├── 2007_001901.jpg
│ │ │ ├── 2007_002216.jpg
│ │ │ ├── 2007_002668.jpg
│ │ │ ├── 2007_002669.jpg
│ │ │ ├── 2007_002845.jpg
│ │ │ ├── 2007_003207.jpg
│ │ │ ├── 2007_003565.jpg
│ │ │ ├── 2007_003778.jpg
│ │ │ ├── 2007_003876.jpg
│ │ │ ├── 2007_004166.jpg
│ │ │ ├── 2007_005273.jpg
│ │ │ ├── 2007_005702.jpg
│ │ │ ├── 2007_006303.jpg
│ │ │ ├── 2007_006400.jpg
│ │ │ └── 2007_006673.jpg
│ │ └── SegmentationObject
│ │ │ ├── 2007_000039.png
│ │ │ ├── 2007_000063.png
│ │ │ ├── 2007_000648.png
│ │ │ ├── 2007_001420.png
│ │ │ ├── 2007_001709.png
│ │ │ ├── 2007_001901.png
│ │ │ ├── 2007_002216.png
│ │ │ ├── 2007_002668.png
│ │ │ ├── 2007_002669.png
│ │ │ ├── 2007_002845.png
│ │ │ ├── 2007_003207.png
│ │ │ ├── 2007_003565.png
│ │ │ ├── 2007_003778.png
│ │ │ ├── 2007_003876.png
│ │ │ ├── 2007_004166.png
│ │ │ ├── 2007_005273.png
│ │ │ ├── 2007_005702.png
│ │ │ ├── 2007_006303.png
│ │ │ ├── 2007_006400.png
│ │ │ └── 2007_006673.png
│ ├── background
│ │ ├── bg_template
│ │ │ ├── forest
│ │ │ │ └── A real photo of forest
│ │ │ │ │ ├── 589.png
│ │ │ │ │ ├── 590.png
│ │ │ │ │ ├── 591.png
│ │ │ │ │ ├── 593.png
│ │ │ │ │ └── 598.png
│ │ │ └── railway without train
│ │ │ │ └── A real photo of railway without train
│ │ │ │ ├── 367.png
│ │ │ │ ├── 373.png
│ │ │ │ ├── 429.png
│ │ │ │ └── 475.png
│ │ └── context_augment
│ │ │ ├── 2007_000504.jpg
│ │ │ └── a colored photo of an empty pile of trash on the ground
│ │ │ │ └── 49.png
│ │ │ ├── 2007_001609.jpg
│ │ │ └── a colored photo of an empty living room
│ │ │ │ └── 69.png
│ │ │ ├── 2007_001764.jpg
│ │ │ └── a real image of an empty grass covered field
│ │ │ │ └── 71.png
│ │ │ ├── 2007_002227.jpg
│ │ │ └── a real image of an empty wall in a living room
│ │ │ │ └── 45.png
│ │ │ ├── 2007_002281.jpg
│ │ │ └── a colored photo of an empty street next to a forest
│ │ │ │ └── 79.png
│ │ │ ├── 2007_002967.jpg
│ │ │ └── a real image of an empty doorstep
│ │ │ │ └── 58.png
│ │ │ ├── 2007_003451.jpg
│ │ │ └── a real image of an empty living room filled with furniture and a large window
│ │ │ │ └── 40.png
│ │ │ ├── 2007_005124.jpg
│ │ │ └── a real image of an empty grass field
│ │ │ │ └── 58.png
│ │ │ ├── 2007_006136.jpg
│ │ │ └── a real image of an empty grass near a forest
│ │ │ │ └── 33.png
│ │ │ ├── 2007_007585.jpg
│ │ │ └── a real image of an empty grass field
│ │ │ │ └── 67.png
│ │ │ └── clip_postprocessed.json
│ ├── foreground
│ │ ├── foreground_mask
│ │ │ ├── aeroplane_mask
│ │ │ │ ├── The picture of an airplane on a pure background422.png
│ │ │ │ ├── an airplane in a pure background449.png
│ │ │ │ └── an airplane106.png
│ │ │ ├── bicycle_mask
│ │ │ │ ├── a bicycle in a pure background122.png
│ │ │ │ ├── a bicycle in a pure background128.png
│ │ │ │ └── a bicycle in a pure background225.png
│ │ │ ├── bird_mask
│ │ │ │ ├── a bird in a pure background162.png
│ │ │ │ ├── a bird in a pure background293.png
│ │ │ │ └── a bird in a pure background374.png
│ │ │ ├── boat_mask
│ │ │ │ ├── The picture of a boat on a pure background39.png
│ │ │ │ ├── a boat in a pure background371.png
│ │ │ │ └── a boat79.png
│ │ │ ├── bottle_mask
│ │ │ │ ├── The picture of a bottle on a pure background407.png
│ │ │ │ ├── a bottle in a pure background108.png
│ │ │ │ └── a bottle89.png
│ │ │ ├── bus_mask
│ │ │ │ ├── The picture of a bus on a pure background460.png
│ │ │ │ ├── a bus105.png
│ │ │ │ └── a bus410.png
│ │ │ ├── car_mask
│ │ │ │ ├── The picture of a car on a pure background100.png
│ │ │ │ ├── The picture of a car on a pure background286.png
│ │ │ │ └── a car97.png
│ │ │ ├── cat_mask
│ │ │ │ ├── a cat245.png
│ │ │ │ ├── a cat58.png
│ │ │ │ └── a cat62.png
│ │ │ ├── chair_mask
│ │ │ │ ├── The picture of a chair on a pure background423.png
│ │ │ │ ├── a chair in a pure background301.png
│ │ │ │ └── a chair402.png
│ │ │ ├── cow_mask
│ │ │ │ ├── a cow in a pure background9.png
│ │ │ │ ├── a cow223.png
│ │ │ │ └── a cow56.png
│ │ │ ├── diningtable_mask
│ │ │ │ ├── a dining table in a pure background63.png
│ │ │ │ ├── a table123.png
│ │ │ │ └── a table300.png
│ │ │ ├── dog_mask
│ │ │ │ ├── The picture of a dog on a pure background236.png
│ │ │ │ ├── a dog in a pure background487.png
│ │ │ │ └── a dog121.png
│ │ │ ├── horse_mask
│ │ │ │ ├── The picture of a horse on a pure background469.png
│ │ │ │ ├── a horse in a pure background293.png
│ │ │ │ └── a horse298.png
│ │ │ ├── motorbike_mask
│ │ │ │ ├── a motorbike in a pure background367.png
│ │ │ │ ├── a motorbike in a pure background421.png
│ │ │ │ └── a motorbike315.png
│ │ │ ├── person_mask
│ │ │ │ ├── a man146.png
│ │ │ │ ├── a man253.png
│ │ │ │ └── a man345.png
│ │ │ ├── pottedplant_mask
│ │ │ │ ├── a potted plant11.png
│ │ │ │ ├── a potted plant374.png
│ │ │ │ └── a potted plant434.png
│ │ │ ├── sheep_mask
│ │ │ │ ├── The picture of a sheep on a pure background212.png
│ │ │ │ ├── a sheep in a pure background219.png
│ │ │ │ └── a sheep351.png
│ │ │ ├── sofa_mask
│ │ │ │ ├── a sofa in a pure background122.png
│ │ │ │ ├── a sofa in a pure background353.png
│ │ │ │ └── a sofa in a pure background395.png
│ │ │ ├── train_mask
│ │ │ │ ├── The picture of a train on a pure background303.png
│ │ │ │ ├── The picture of a train on a pure background32.png
│ │ │ │ └── a train in a pure background133.png
│ │ │ └── tvmonitor_mask
│ │ │ │ ├── a tv monitor251.png
│ │ │ │ ├── an old monitor107.png
│ │ │ │ └── an old monitor426.png
│ │ └── foreground_rgb
│ │ │ ├── a bicycle
│ │ │ └── a bicycle in a pure background
│ │ │ │ ├── 122.png
│ │ │ │ ├── 128.png
│ │ │ │ └── 225.png
│ │ │ ├── a bird
│ │ │ └── a bird in a pure background
│ │ │ │ ├── 162.png
│ │ │ │ ├── 293.png
│ │ │ │ └── 374.png
│ │ │ ├── a boat
│ │ │ ├── The picture of a boat on a pure background
│ │ │ │ └── 39.png
│ │ │ ├── a boat in a pure background
│ │ │ │ └── 371.png
│ │ │ └── a boat
│ │ │ │ └── 79.png
│ │ │ ├── a bottle
│ │ │ ├── The picture of a bottle on a pure background
│ │ │ │ └── 407.png
│ │ │ ├── a bottle in a pure background
│ │ │ │ └── 108.png
│ │ │ └── a bottle
│ │ │ │ └── 89.png
│ │ │ ├── a bus
│ │ │ ├── The picture of a bus on a pure background
│ │ │ │ └── 460.png
│ │ │ └── a bus
│ │ │ │ ├── 105.png
│ │ │ │ └── 410.png
│ │ │ ├── a car
│ │ │ ├── The picture of a car on a pure background
│ │ │ │ ├── 100.png
│ │ │ │ └── 286.png
│ │ │ └── a car
│ │ │ │ └── 97.png
│ │ │ ├── a cat
│ │ │ └── a cat
│ │ │ │ ├── 245.png
│ │ │ │ ├── 58.png
│ │ │ │ └── 62.png
│ │ │ ├── a chair
│ │ │ ├── The picture of a chair on a pure background
│ │ │ │ └── 423.png
│ │ │ ├── a chair in a pure background
│ │ │ │ └── 301.png
│ │ │ └── a chair
│ │ │ │ └── 402.png
│ │ │ ├── a cow
│ │ │ ├── a cow in a pure background
│ │ │ │ └── 9.png
│ │ │ └── a cow
│ │ │ │ ├── 223.png
│ │ │ │ └── 56.png
│ │ │ ├── a dining table
│ │ │ └── a dining table in a pure background
│ │ │ │ └── 63.png
│ │ │ ├── a dog
│ │ │ ├── The picture of a dog on a pure background
│ │ │ │ └── 236.png
│ │ │ ├── a dog in a pure background
│ │ │ │ └── 487.png
│ │ │ └── a dog
│ │ │ │ └── 121.png
│ │ │ ├── a horse
│ │ │ ├── The picture of a horse on a pure background
│ │ │ │ └── 469.png
│ │ │ ├── a horse in a pure background
│ │ │ │ └── 293.png
│ │ │ └── a horse
│ │ │ │ └── 298.png
│ │ │ ├── a man
│ │ │ └── a man
│ │ │ │ ├── 146.png
│ │ │ │ ├── 253.png
│ │ │ │ └── 345.png
│ │ │ ├── a motorbike
│ │ │ ├── a motorbike in a pure background
│ │ │ │ ├── 367.png
│ │ │ │ └── 421.png
│ │ │ └── a motorbike
│ │ │ │ └── 315.png
│ │ │ ├── a potted plant
│ │ │ └── a potted plant
│ │ │ │ ├── 11.png
│ │ │ │ ├── 374.png
│ │ │ │ └── 434.png
│ │ │ ├── a sheep
│ │ │ ├── The picture of a sheep on a pure background
│ │ │ │ └── 212.png
│ │ │ ├── a sheep in a pure background
│ │ │ │ └── 219.png
│ │ │ └── a sheep
│ │ │ │ └── 351.png
│ │ │ ├── a sofa
│ │ │ └── a sofa in a pure background
│ │ │ │ ├── 122.png
│ │ │ │ ├── 353.png
│ │ │ │ └── 395.png
│ │ │ ├── a table
│ │ │ └── a table
│ │ │ │ ├── 123.png
│ │ │ │ └── 300.png
│ │ │ ├── a train
│ │ │ ├── The picture of a train on a pure background
│ │ │ │ ├── 303.png
│ │ │ │ └── 32.png
│ │ │ └── a train in a pure background
│ │ │ │ └── 133.png
│ │ │ ├── a tv monitor
│ │ │ └── a tv monitor
│ │ │ │ └── 251.png
│ │ │ ├── an airplane
│ │ │ ├── The picture of an airplane on a pure background
│ │ │ │ └── 422.png
│ │ │ ├── an airplane in a pure background
│ │ │ │ └── 449.png
│ │ │ └── an airplane
│ │ │ │ └── 106.png
│ │ │ └── an old monitor
│ │ │ └── an old monitor
│ │ │ ├── 107.png
│ │ │ └── 426.png
│ ├── labels.txt
│ └── train_cls-1shot.txt
└── voc2012
│ ├── 1k_10_shot.json
│ ├── 1k_1_shot.json
│ ├── label2id.json
│ ├── labels.txt
│ ├── prompt_replace_original_200_selected.json
│ ├── test.txt
│ ├── train_1k_cls.txt
│ ├── train_aug.txt
│ ├── train_cls-10shot.txt
│ ├── train_cls-1shot.txt
│ ├── train_cls.txt
│ └── val_cls.txt
├── detection
├── train.py
├── utils.py
└── wandb_writer.py
├── instance_seg
├── run.sh
├── seg.py
└── seg_lazy.py
├── requirements.txt
├── t2i_generate
├── background_captions.py
├── foreground_captions.py
└── stable_diffusion2.py
└── viz
└── viz.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | cutpaste/outputs
3 | artifact/
4 | viz/out
5 | **/__pycache__/
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Text2Image-for-Detection
2 |
3 | Official Implementation for ["DALL-E for Detection: Language-driven Compositional Image Synthesis for Object Detection"](https://arxiv.org/pdf/2309.05956.pdf)
4 |
5 | Extention version: ["Beyond Generation: Harnessing Text to Image Models for Object Detection and Segmentation"](https://arxiv.org/pdf/2309.05956.pdf)
6 |
7 |
15 |
16 | Contact: [yunhaoge@usc.edu](mailto:yunhaoge@usc.edu); [jxu1@g.harvard.edu](mailto:jxu1@g.harvard.edu)
17 |
18 | ## Install
19 |
20 | This project is developed using Python 3.10 and PyTorch 1.10.1 under CUDA 11.3. We recommend you to use the same version of Python and PyTorch.
21 |
22 | ```bash
23 | pip install -r requirements.txt
24 | ```
25 |
26 | ## Our method
27 |
28 |
29 |
30 |
31 |
32 | We propose a noval approach for generating diverse and large-scale pseudo-labeled training datasets, tailored specifically to enhance downstream
33 | object detection and segmentation models.
34 | We leverage text-to-image models (e.g. your favourite diffusion model) to independently generate foregrounds and backgrounds.
35 | Then we composite foregrounds onto the backgrounds, a process where we obtain the bounding boxes or segmentation masks of the foregrounds, to be used in the downstream models.
36 |
37 | Specifically,
38 | - Foregrounds are generated using text-to-image models with fixed templates (such as "A photo of ", Table 2 of the paper). Note this is purely zero-shot, i.e. no training data whatsoever is required.
39 | - Backgrounds are slightly more complicated.
40 | - For zero-shot, we have another set of fixed templates (such as "A real photo of forest", Table 3 of the paper).
41 | - For few-shot where we have a limited set of training images, we first image caption each images,
42 | then extract the background contextual words from the caption. Those contextual words are words such as "grass field," indicating the location and context of the training images.
43 | Lastly we augment the original caption by inputting the contextual words into the templates.
44 | This step is necessary as otherwise the caption would possibly contain the foreground objects (eg a dog) as well,
45 | and using this original caption into the text-to-image models will likely generate a dog that we do not have a segmentation or bounding box label, and thus confusing the model.
46 | - Both foregrounds and backgrounds are feed into CLIP to control the quality of the generated images.
47 |
48 | ## Usage
49 |
50 | ### Data
51 | In this project we use Pascal VOC in a low-resource regime.
52 |
53 | You should download original dataset, e.g. [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/).
54 | Note that for Pascal we use train & Val set from the [nsrom repo](https://github.com/NUST-Machine-Intelligence-Laboratory/nsrom).
55 | The data structure will be
56 | ```
57 | data
58 | ├── COCO2017
59 | └── voc2012
60 | ├── labels.txt
61 | ├── train_aug.txt
62 | ├── ...
63 | └── VOC2012
64 | ├── Annotations
65 | ├── ImageSets
66 | ...
67 | ```
68 | We have k-shot selections on `data/voc2012`:
69 | [1 shot](data/voc2012/train_cls-1shot.txt) and [10 shot](data/voc2012/train_cls-10shot.txt).
70 |
71 | ### Diffusion Generation
72 | The code to generate foregrounds and backgrounds are in `t2i_generate/` folder.
73 | First you need to generate captions for [foreground](t2i_generate/foreground_captions.py) and [background](t2i_generate/background_captions.py).
74 | Then you can use stable diffusion 2 to generate images via `python stable_diffusion2.py`.
75 |
76 | ### Cut Paste
77 | The code to paste foregrounds onto backgrounds are in `cutpaste/` folder.
78 | We use [hydra+torch lightning](https://github.com/ashleve/lightning-hydra-template) to run different variants.
79 | Example config files are in `configs/` folder, and we include a test dataset in `data/test_data/` folder.
80 | For example, you can use `python paste.py exp=` to launch the script, where `` is
81 |
82 | - [`demo_cutpaste`](cutpaste/config/exp/demo_cutpaste.yaml): paste real foregrounds onto real backgrounds. This is **Pure Real + cut paste** in [Results](#results).
83 | - [`demo_synFg`](cutpaste/config/exp/demo_synFg.yaml): paste synthetic foregrounds onto real backgrounds. This is **Syn Fg** in [Results](#results).
84 | - [`demo_syn`](cutpaste/config/exp/demo_syn.yaml): paste synthetic foregrounds onto synthetic backgrounds. This is **Pure Syn** in [Results](#results).
85 | - [`demo_syn+real`](cutpaste/config/exp/demo_syn+real.yaml): paste synthetic and real foregrounds onto synthetic and real backgrounds. This is **Syn + real** in [Results](#results).
86 | We recommend you to always use this setting as this offers the benefit of both synthetic and real data.
87 |
88 | You can also use `viz/` to visualize the generated datasets. Simple do
89 | ```shell
90 | python viz/viz.py
91 | ```
92 | This will generate 30 randomly sampled annotated images in `viz/` folder.
93 |
94 | Readers are welcome to check the config files for more parameters to control the process. Some notable mentions:
95 | - by default the foreground is pasted with Space Maximize Paste algorithm described in [EM-paste](https://arxiv.org/pdf/2212.07629.pdf): i.e. each
96 | foreground is re-scaled and pasted within the max inscribing circle of the background contour.
97 | - `repeat_background` is the number of time the algorithm repeats the pasting process. By default it is 2, i.e. each background is used twice, but each time with different foregrounds pasted onto it.
98 | - `num_workers` is the number of workers to use for multiprocessing. We recommend you to use at least 4.
99 | - `use_random_paste` is for random paste (i.e. paste foreground on a random location of background image) while ranom scale the pasted foreground 0.3-0.7. This is adapted from [object cut and paste](https://arxiv.org/abs/1708.01642).
100 | - by default `num_cut_images=4` foregrounds are pasted in each background. You can also set `num_cut_lowerbound` to paste Unif(num_cut_lowerbound, num_cut_images) foregrounds.
101 | - The codebase supports 5 total blendings, namely Guassian Blur, Poisson Blur, Box Blur, Motion Blur and None (do not apply any blendings). We empirically find Gaussian alone achieves the best results.
102 | - Space Maximize Paste will scale the foreground object to match the diameter of the max inscribing circle of the background contour, but sometimes producing small foregrounds. `use_random_scaling=True`
103 | will apply the random scaling factor (0.3-0.7) as [object cut and paste](https://arxiv.org/abs/1708.01642) paper.
104 | - `scale_factor=factor` additionally apply another scaling to the pasted foreground after Space Maximize Paste.
105 | - `center_translation_factor=factor` shift the location to paste the foreground by ± height x factor or ± width x factor, instead of pasting on the center of the max inscribing circle.
106 |
107 |
108 | ### Model Training
109 | Once the dataset is created, you can train object detection model using `detection/` and instance segmentation model using
110 | `instance_seg/`. Both are based on the battle-tested [detectron2](https://github.com/facebookresearch/detectron2).
111 |
112 | For example, on VOC 2012 with 2 GPUs, you can run
113 | ```shell
114 | # object detection
115 | python detection/train.py -s syn \ # use synthetic data
116 | --syn_dir \
117 | -t voc_val \ # test on VOC val
118 | --test_dir \ # data/voc2012/VOC2012, we need to find val set in this folder
119 | -g 4 \ # use 4 GPUs on 1 machine
120 | --freeze --data_aug --bsz 32 --epoch 200 --resnet 50 --lr 0.01 # hyperparameters
121 | ```
122 | For instance segmentation, use `instance_seg/seg.py` instead of `detection/train.py`. The flags are the same.
123 |
124 | For inference, simply apply the additional flag `--eval_checkpoint `.
125 |
126 |
127 | ## Results
128 | Our method results in significant improvement over the baseline on Pascal VOC and MS COCO, especially in the low-resource regime.
129 | We refer details [in the paper](https://arxiv.org/pdf/2309.05956.pdf).
130 |
131 |
132 |
133 |
--------------------------------------------------------------------------------
/assets/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/assets/overview.png
--------------------------------------------------------------------------------
/assets/results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/assets/results.png
--------------------------------------------------------------------------------
/cutpaste/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/cutpaste/__init__.py
--------------------------------------------------------------------------------
/cutpaste/anno.py:
--------------------------------------------------------------------------------
1 | from abc import abstractmethod
2 | from typing import Optional, Tuple
3 |
4 | import numpy as np
5 | import ujson as json
6 | from PIL import Image
7 | from pycocotools import mask as cocomask
8 |
9 |
10 | class Anno:
11 | label2id: dict
12 | id2label: dict
13 |
14 | @abstractmethod
15 | def objects(self):
16 | raise NotImplementedError
17 |
18 | @abstractmethod
19 | def create_mask(self, for_object: Optional[int] = None) -> Image.Image:
20 | raise NotImplementedError
21 |
22 | @abstractmethod
23 | def create_instance_mask(self) -> Tuple[Image.Image, dict]:
24 | raise NotImplementedError
25 |
26 | @staticmethod
27 | def factory(anno_path, seg_img_path):
28 | if anno_path is None:
29 | return EntityAnno(seg_img_path)
30 | elif seg_img_path is None:
31 | return COCOAnno(anno_path)
32 | return VOCAnno(anno_path, seg_img_path)
33 |
34 |
35 | class VOCAnno(Anno):
36 | def __init__(self, anno_path, seg_img_path):
37 | import xml.etree.ElementTree as ET
38 | self.anno_path = anno_path
39 | self.anno = ET.parse(anno_path).getroot()
40 | self.seg_img_path = seg_img_path
41 |
42 | def size(self):
43 | size = self.anno.find("size")
44 | height, width = size.find("./height").text, size.find("./width").text
45 | return int(height), int(width)
46 |
47 | def filename(self) -> str:
48 | return self.anno.find("filename").text
49 |
50 | def objects(self):
51 | objects = self.anno.findall("object")
52 | # hardcode, remove wrong seg annotation
53 | if "2009_005069" in self.anno_path:
54 | objects = objects[:-1]
55 | return objects
56 |
57 | def create_mask(self, for_object: Optional[int] = None):
58 | """
59 | create boolean mask with same shape as .size()
60 | gt (is object) is positive, dummy is 0
61 | if for_object = None, OR all mask
62 | else, mask for this specific object (0 if dummy, positive for this category)
63 | """
64 | # consists of: objects (object number in anno), 0 (dummy bg), 255 (white mask outline)
65 | seg_mask = np.array(Image.open(self.seg_img_path))
66 | objects = self.objects()
67 | if for_object is None:
68 | ids = list(range(1, len(objects) + 1))
69 | categories = [
70 | object.find("./name").text
71 | for object in objects
72 | ]
73 | id2categoryid = {
74 | i: self.label2id[c]
75 | for i, c in zip(ids, categories)
76 | }
77 | # plus mapping to get dummy 255
78 | id2categoryid[0] = 0
79 | id2categoryid[255] = 0
80 | if len(np.unique(seg_mask)) != len(id2categoryid):
81 | # when seg is wrong, there are mismatch
82 | seg_mask = np.where(np.isin(seg_mask, list(id2categoryid)), seg_mask, 0)
83 |
84 | # rn if seg_mask == i, it's ith object, make it ith object's category
85 | mask = np.vectorize(id2categoryid.get)(seg_mask).astype('uint8')
86 | return Image.fromarray(mask)
87 |
88 | assert type(for_object) is int
89 | assert 1 <= for_object <= len(objects)
90 | id = for_object
91 | category = objects[id - 1].find("./name").text
92 |
93 | mask = np.where(seg_mask == id, self.label2id[category], 0).astype("uint8")
94 | return Image.fromarray(mask)
95 |
96 | def create_instance_mask(self):
97 | """
98 | instance mask where each non-dummy object is positive with id (starts from 1, NOT label id)
99 | 0 if background dummy
100 | """
101 | seg_mask = np.array(Image.open(self.seg_img_path))
102 | instance_mask = np.where(np.isin(seg_mask, [0, 255]), 0, seg_mask).astype("uint8")
103 | objects = self.objects()
104 | ids = list(range(1, len(objects) + 1))
105 | categories = [
106 | object.find("./name").text
107 | for object in objects
108 | ]
109 | instance_mask_id2category = {
110 | i: self.label2id[c]
111 | for i, c in zip(ids, categories)
112 | }
113 | return Image.fromarray(instance_mask), instance_mask_id2category
114 |
115 |
116 | class EntityAnno(Anno):
117 | def __init__(self, seg_img_path):
118 | # eg data/voc2012/entity_mask/bottle_mask/2009_000562.png
119 | self.seg_img_path = seg_img_path
120 | _, label, filename = seg_img_path.rsplit("/", 2)
121 | self.label = self.label2id[label.replace("_mask", "")]
122 |
123 | def objects(self):
124 | return [self.label]
125 |
126 | def create_mask(self, for_object: Optional[int] = None):
127 | # if for_object is not None:
128 | # assert for_object in self.objects()
129 | # 0 or 255
130 | mask = np.array(Image.open(self.seg_img_path))
131 | mask = np.where(mask == 255, self.label, 0).astype("uint8")
132 | return Image.fromarray(mask)
133 |
134 | def create_instance_mask(self):
135 | instance_mask = np.array(Image.open(self.seg_img_path))
136 | # 0 or 255
137 | instance_mask = np.where(instance_mask == 255, 1, 0).astype("uint8")
138 | return Image.fromarray(instance_mask), {1: self.label}
139 |
140 | class COCOAnno(Anno):
141 | def __init__(self, anno_path):
142 | with open(anno_path) as f:
143 | self.anno = json.load(f)
144 |
145 | self.id2annos = {
146 | id: []
147 | for id in self.objects()
148 | }
149 | for anno in self.anno["annotations"]:
150 | self.id2annos[anno["category_id"]].append(anno)
151 |
152 | def size(self):
153 | return int(self.anno['images']['height']), int(self.anno['images']['width'])
154 |
155 | def objects(self):
156 | return sorted(set([
157 | anno['category_id']
158 | for anno in self.anno["annotations"]
159 | ]))
160 |
161 | def create_mask(self, for_object: Optional[int] = None):
162 | if for_object: # i-th (1 based)
163 | category = self.objects()[for_object-1]
164 | annos = self.id2annos[category]
165 | mask = np.zeros(self.size(), dtype=int)
166 | for anno in annos:
167 | objs = cocomask.frPyObjects(anno["segmentation"], *self.size())
168 | binary_mask = cocomask.decode(objs) # (h, w, n) binary {0 (dummy), 1 (obj)} where n is \# disjoint anno
169 | if binary_mask.ndim == 2:
170 | binary_mask = binary_mask[:, :, np.newaxis]
171 | for n in range(binary_mask.shape[-1]): #
172 | mask[binary_mask[:, :, n] == 1] = category
173 | # binary_mask = np.where(binary_mask == 1, category, 0)
174 | # mask = np.ma.mask_or(mask, binary_mask)
175 | return Image.fromarray(mask.astype(np.uint8))
176 |
177 | mask = np.zeros(self.size(), dtype=int)
178 | for i, category in enumerate(self.objects(), 1):
179 | mask2 = self.create_mask(for_object = i)
180 | mask[np.array(mask2) == category] = category
181 | return Image.fromarray(mask.astype(np.uint8))
182 |
183 | def create_instance_mask(self):
184 | instance_mask = np.zeros(self.size(), dtype=int)
185 | instance_mask_id2category = {}
186 | for anno in self.anno["annotations"]:
187 | objs = cocomask.frPyObjects(anno["segmentation"], *self.size())
188 | binary_mask = cocomask.decode(objs) # (h, w) binary {0 (dummy), 1 (obj)}
189 | if binary_mask.ndim == 2:
190 | binary_mask = binary_mask[:, :, np.newaxis]
191 | next_id = len(instance_mask_id2category) + 1
192 | for n in range(binary_mask.shape[-1]): #
193 | instance_mask[binary_mask[:, :, n] == 1] = next_id
194 | instance_mask_id2category[next_id] = anno['category_id']
195 |
196 | return Image.fromarray(instance_mask.astype(np.uint8)), instance_mask_id2category
--------------------------------------------------------------------------------
/cutpaste/background.py:
--------------------------------------------------------------------------------
1 | import math
2 | import random
3 | from pathlib import Path
4 | from typing import Optional, Iterator
5 |
6 | import cv2
7 | import numpy as np
8 | from PIL import Image
9 | from pycocotools import mask as cocomask
10 |
11 | from anno import Anno
12 | from foreground import CutObject, CutObjects
13 | from pb import create_mask, poisson_blend
14 | from pyblur3 import LinearMotionBlur
15 |
16 |
17 | def binarize_mask(mask, set_boundary=False) -> np.ndarray:
18 | """
19 | make it 255 if occupied, regardless of pixel category
20 | """
21 | mask = np.array(mask)
22 | mask = np.where(mask > 0, 255, 0).astype('uint8')
23 | if set_boundary:
24 | mask[:, 0] = 255
25 | mask[:, -1] = 255
26 | mask[0, :] = 255
27 | mask[-1, :] = 255
28 | return mask
29 |
30 |
31 | class PastedBackground:
32 | """
33 | background image to be pasted on
34 | """
35 |
36 | def __init__(self, imagepath: str, anno: Optional[Anno] = None):
37 | """
38 | if anno is None, no need to find annotation in given image, i.e. we ignore potential foregrounds
39 | """
40 | self.image: Image.Image = Image.open(imagepath)
41 | self.imagepath = imagepath
42 | if anno:
43 | self.ignore_foreground = False
44 | # semantic mask
45 | # 0 if dummy, positive int (label id for each of the potential object/foreground) is object mask
46 | self.mask = anno.create_mask(for_object=None)
47 | # Image Mask (each instance a unique id) & instance id to actual category (starts from 1)
48 | self.instance_mask, self.instance_mask_id2category = anno.create_instance_mask()
49 | else:
50 | self.ignore_foreground = True
51 | w, h = self.size
52 | self.mask = Image.fromarray(np.zeros((h, w), dtype="uint8"))
53 | self.instance_mask, self.instance_mask_id2category = self.mask.copy(), {}
54 |
55 | @property
56 | def size(self):
57 | """ w and h """
58 | return self.image.size
59 |
60 | def resize(self, out_size: int):
61 | self.image = self.image.resize((out_size, out_size), Image.LANCZOS)
62 | self.mask = self.mask.resize((out_size, out_size), Image.NEAREST)
63 | self.instance_mask = self.instance_mask.resize((out_size, out_size), Image.NEAREST)
64 |
65 | def find_paste_location(self,
66 | foregrounds: CutObjects, max_degree, random_paste=False,
67 | scale_factor=0, center_translation_factor=0, use_random_scaling=False):
68 | """
69 | modify self.mask in place (paste with foreground)
70 | return new foregrounds (after scaling and rotation)
71 | if random_paste: select random paste location (for abalation) and random scale 0.3-0.7
72 | """
73 | # 1. loop through all objects
74 | foregrounds.shuffle()
75 | foregrounds_to_paste = []
76 | locations = []
77 | for foreground_object in foregrounds:
78 | w, h = self.size
79 | if random_paste:
80 | (x, y) = random.randint(0, h), random.randint(0, w)
81 | radius2, center2 = foreground_object.min_enclosing_circle()
82 | scaling = random.uniform(0.3, 0.7)
83 | radius = scaling * radius2
84 | else:
85 | # 2. find max inscribing circle in the background non-occupied area
86 | radius, (x, y) = self.max_inscribing_circle()
87 | if center_translation_factor != 0: # translate center
88 | sgn = 1 if random.random() < 0.5 else -1
89 | x += sgn * h * (center_translation_factor / 100)
90 | y += sgn * w * (center_translation_factor / 100)
91 |
92 | # 3. compute per-object min enclosing circle
93 | radius2, center2 = foreground_object.min_enclosing_circle()
94 |
95 | # 4. after scale, make foreground_object align with max inscribing circle
96 | if use_random_scaling:
97 | scaling = random.uniform(0.3, 0.7)
98 | else:
99 | scaling = radius / radius2
100 | if scale_factor != 0: # scale by @scale_factor %
101 | scaling *= (1 + scale_factor / 100)
102 | try:
103 | foreground_object.scale(scaling)
104 | o_w, o_h = foreground_object.img.size
105 | assert w - o_w >= 0 and h - o_h >= 0 and o_w > 0 and o_h > 0
106 | except:
107 | continue
108 |
109 | foreground_object.rotate(size=self.size, max_degree=max_degree)
110 |
111 | foregrounds_to_paste.append(foreground_object)
112 | locations.append((
113 | int(x - radius), int(y - radius)
114 | ))
115 | self.mask.paste(foreground_object.mask, locations[-1],
116 | Image.fromarray(binarize_mask(foreground_object.mask)))
117 | new_instance_id = len(self.instance_mask_id2category) + 1
118 | self.instance_mask_id2category[new_instance_id] = foreground_object.category
119 | self.instance_mask.paste(
120 | Image.fromarray(
121 | np.where(np.array(foreground_object.mask) == foreground_object.category, new_instance_id, 0).astype(
122 | "uint8")),
123 | locations[-1], Image.fromarray(binarize_mask(foreground_object.mask))
124 | )
125 | if self.ignore_foreground:
126 | # even if ignore foreground first, now it will not since one foreground was pasted
127 | self.ignore_foreground = False
128 |
129 | return locations, CutObjects(foregrounds_to_paste)
130 |
131 | def save(self, name, path=None):
132 | """
133 | before save, mask is label 1 - 20, and 0 if dummy
134 | change it to 0-19 label (corresponding to labels.txt but minus 1), 255 if dummy
135 | """
136 | if path is None:
137 | img_path = mask_path = Path(".")
138 | else:
139 | img_path = path / "Images"
140 | mask_path = path / "Masks"
141 | mask = self.mask
142 | self.image.save(img_path / f"{name}.png")
143 | mask.save(mask_path / f"{name}.png")
144 |
145 | def max_inscribing_circle(self):
146 | """
147 | max inscribing circle that contains all background objects
148 | """
149 | if self.ignore_foreground:
150 | assert list(np.unique(self.mask)) == [0], "should be only 0 i.e. dummy"
151 | w, h = self.size
152 | x, y = random.randint(0, h), random.randint(0, w)
153 | dist_x = min(abs(x), abs(x - h // 2))
154 | dist_y = min(abs(y), abs(y - w // 2))
155 | return min(dist_x, dist_y), (x, y)
156 |
157 | background_mask = binarize_mask(self.mask, set_boundary=True)
158 | dist_map = cv2.distanceTransform(255 - background_mask, cv2.DIST_L2, cv2.DIST_MASK_PRECISE)
159 | _, radius, _, center = cv2.minMaxLoc(dist_map)
160 | return radius, center
161 |
162 | def paste(self, blending: str, paste_location: tuple, foreground_object: CutObject):
163 | def LinearMotionBlur3C(img):
164 | """Performs motion blur on an image with 3 channels. Used to simulate
165 | blurring caused due to motion of camera.
166 | Args:
167 | img(NumPy Array): Input image with 3 channels
168 | Returns:
169 | Image: Blurred image by applying a motion blur with random parameters
170 | """
171 |
172 | def randomAngle(kerneldim):
173 | """Returns a random angle used to produce motion blurring
174 | Args:
175 | kerneldim (int): size of the kernel used in motion blurring
176 | Returns:
177 | int: Random angle
178 | """
179 | kernelCenter = int(math.floor(kerneldim / 2))
180 | numDistinctLines = kernelCenter * 4
181 | validLineAngles = np.linspace(0, 180, numDistinctLines, endpoint=False)
182 | angleIdx = np.random.randint(0, len(validLineAngles))
183 | return int(validLineAngles[angleIdx])
184 |
185 | lineLengths = [3, 5, 7, 9]
186 | lineTypes = ["right", "left", "full"]
187 | lineLengthIdx = np.random.randint(0, len(lineLengths))
188 | lineTypeIdx = np.random.randint(0, len(lineTypes))
189 | lineLength = lineLengths[lineLengthIdx]
190 | lineType = lineTypes[lineTypeIdx]
191 | lineAngle = randomAngle(lineLength)
192 | blurred_img = img
193 | for i in range(3):
194 | blurred_img[:, :, i] = np.asarray(LinearMotionBlur(img[:, :, i], lineLength, lineAngle, lineType))
195 | blurred_img = Image.fromarray(blurred_img, 'RGB')
196 | return blurred_img
197 |
198 | x, y = paste_location
199 | foreground = foreground_object.img
200 | foreground_mask = Image.fromarray(np.where(np.array(foreground_object.mask) != 0, 255, 0).astype('uint8'))
201 | background = self.image.copy()
202 | if blending == 'none':
203 | background.paste(foreground, (x, y), foreground_mask)
204 | elif blending == 'motion':
205 | background.paste(foreground, (x, y), foreground_mask)
206 | background = LinearMotionBlur3C(np.asarray(background))
207 |
208 | elif blending == 'poisson':
209 | offset = (y, x)
210 | img_mask = np.asarray(foreground_mask)
211 | img_src = np.asarray(foreground).astype(np.float64)
212 | img_target = np.asarray(background)
213 | img_mask, img_src, offset_adj \
214 | = create_mask(img_mask.astype(np.float64),
215 | img_target, img_src, offset=offset)
216 | background_array = poisson_blend(img_mask, img_src, img_target,
217 | method='normal', offset_adj=offset_adj)
218 | background = Image.fromarray(background_array, 'RGB')
219 | elif blending == 'gaussian':
220 | background.paste(foreground, (x, y), Image.fromarray(
221 | cv2.GaussianBlur(np.asarray(foreground_mask), (5, 5), 2)))
222 | elif blending == 'box':
223 | background.paste(foreground, (x, y), Image.fromarray(
224 | cv2.blur(np.asarray(foreground_mask), (3, 3))))
225 | else:
226 | raise NotImplementedError
227 | self.image = background
228 |
229 | def to_COCO_ann(self) -> Iterator[tuple]:
230 | """ polygons, bbox, area """
231 | for id, category in self.instance_mask_id2category.items():
232 | mask = np.array(self.instance_mask)
233 | mask = np.where(mask == id, mask, 0)
234 | contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)[-2:]
235 | polygons = []
236 | for contour in contours:
237 | if contour.size >= 6:
238 | polygons.append(contour.flatten().tolist())
239 | if len(polygons) == 0:
240 | continue
241 | RLEs = cocomask.frPyObjects(polygons, mask.shape[0], mask.shape[0])
242 | RLE = cocomask.merge(RLEs)
243 | area = cocomask.area(RLE)
244 | [x, y, w, h] = cv2.boundingRect(mask)
245 | yield polygons, [x, y, w, h], float(area), category
--------------------------------------------------------------------------------
/cutpaste/clip_postprocess.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 | from concurrent import futures
3 | from pathlib import Path
4 |
5 | import json
6 | import torch
7 | from PIL import Image
8 | from tqdm.auto import tqdm
9 | from transformers import CLIPProcessor, CLIPModel
10 |
11 | voc_texts = [
12 | f"a photo of {obj}"
13 | for obj in [
14 | "person",
15 | "bird", 'cat', 'cow', 'dog', 'horse', 'sheep',
16 | 'aeroplane', 'airplane', 'bicycle', 'boat', 'bus', 'car', 'motorbike', 'train',
17 | 'bottle', 'chair', 'dining table', 'potted plant', 'sofa', "tv/ monitor"
18 | ]
19 | ]
20 |
21 | def batchify(lst, n):
22 | """Yield successive n-sized chunks from lst."""
23 | for i in range(0, len(lst), n):
24 | yield lst[i:i + n]
25 |
26 | @torch.no_grad()
27 | def get_CLIP_score(caption: str, images: list):
28 | logits_per_images = []
29 | for img in batchify(images, 400):
30 | inputs = processor(text=[caption] + voc_texts, images=img, return_tensors="pt", padding=True).to("cuda")
31 | outputs = model(**inputs)
32 | logits_per_image = outputs.logits_per_image # this is the image-text similarity score
33 | logits_per_images.append(logits_per_image)
34 | # probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities
35 | return torch.cat(logits_per_images, dim=0)
36 |
37 | def scores_for_one_caption(caption: Path):
38 | keep_files = 30
39 | images = []
40 | for image in caption.iterdir(): # eg 1.png
41 | try:
42 | images.append(Image.open(image))
43 | except:
44 | pass # weird generation error
45 | scores = get_CLIP_score(caption.stem, images) # (#images, 22)
46 |
47 | # 1. select top keep_files*2 lowest consistent_with_voc_labels
48 | consistent_with_voc_labels = scores[:, 1:].max(1).values
49 | double_keep_files = min(keep_files * 2, scores.size(0))
50 | _, indices = torch.topk(-consistent_with_voc_labels.squeeze(), min(double_keep_files, scores.size(0)))
51 | # 2. select top keep_files highest consistent_with_caption
52 | consistent_with_caption = scores[indices, 0]
53 | _, indices = torch.topk(consistent_with_caption, keep_files)
54 | selected_images = [
55 | images[i].filename.split("/")[-1]
56 | for i in indices.detach().cpu().numpy().tolist()
57 | ]
58 | return caption.stem, selected_images
59 |
60 | def sort_images(images):
61 | return sorted(images, key=lambda x: int(x.split(".png")[0]))
62 |
63 | if __name__ == "__main__":
64 | pwd = Path(__file__).parent.resolve()
65 | # root = pwd / "artifact" / "syn" / "voc_1k_bg" / "diffusion_wordnet_v1-10shot"
66 | # root = pwd.parent / "data" / "voc2012" / "background" / "critical_distractor_v1-10shot"
67 | # root = pwd.parent / "data" / "voc2012" / "background" / "critical_distractor_v1-10shot"
68 | # root = pwd.parent / "data" / "voc2012" / "background" / "critical_wordnet_diffusion_v2-10shot"
69 | # root = pwd.parent / "data" / "voc2012" / "background" / "diffusion_v1_600each"
70 | # root = pwd.parent / "data" / "voc2012" / "background" / "critical_wordnet_diffusion_v2-1shot"
71 | # root = pwd.parent / "data" / "voc2012" / "background" / "critical_wordnet_diffusion_v2-10shot_refined"
72 | root = pwd.parent / "data" / "voc2012" / "background" / "critical_context_only-10shot"
73 | # root = pwd.parent / "data" / "voc2012" / "background" / "context_augment"
74 |
75 | model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to("cuda").eval()
76 | processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
77 |
78 | to_save = defaultdict(lambda: {})
79 |
80 | # for object in ['empty living room', 'railway without train', 'stable']:
81 | for object in tqdm(root.iterdir(), total=len(list(root.iterdir()))): # eg a bicycle
82 | object = root / object
83 | if not object.name.endswith(".jpg"):
84 | continue
85 | captions = list(object.iterdir()) # eg a bicyle in a black background
86 | with futures.ThreadPoolExecutor(80) as executor:
87 | res = executor.map(scores_for_one_caption, captions)
88 | for caption, images in res:
89 | to_save[object.stem][caption] = sort_images(images)
90 | # with open(root / "clip_postprocessed.json", "w") as f:
91 | # json.dump(to_save, f, indent=4)
92 | with open("clip_postprocessed.json", "w") as f:
93 | json.dump(to_save, f, indent=4)
--------------------------------------------------------------------------------
/cutpaste/config/bg/VOC/1shot/bg_template.yaml:
--------------------------------------------------------------------------------
1 | - _target_: utils.read_background_template
2 | data_dir: ${dataset_dir}/background/bg_template
3 |
--------------------------------------------------------------------------------
/cutpaste/config/bg/VOC/1shot/bg_template_plus_dalle.yaml:
--------------------------------------------------------------------------------
1 | - _target_: utils.read_dalle_backgrounds
2 | data_dir: ${dataset_dir}/background/context_augment
3 | - _target_: utils.read_background_template
4 | data_dir: ${dataset_dir}/background/bg_template
--------------------------------------------------------------------------------
/cutpaste/config/bg/VOC/1shot/real.yaml:
--------------------------------------------------------------------------------
1 | _target_: utils.read_real_VOC_backgrounds
2 |
3 | # assume dataset_dir: data/voc2012/
4 | data_list: ${dataset_dir}/train_cls-1shot.txt
5 | data_dir: ${dataset_dir}/VOC2012/JPEGImages
6 | anno_dir: ${dataset_dir}/VOC2012/Annotations
7 | seg_dir: ${dataset_dir}/VOC2012/SegmentationObject
--------------------------------------------------------------------------------
/cutpaste/config/config.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 |
3 | # specify here default training configuration
4 | defaults:
5 | - _self_
6 | - ds: null
7 | - fg@fg_real: null
8 | - fg@fg_syn: null
9 | - bg@bg_syn: null
10 | - bg@bg_real: null
11 | - paster: defaults
12 | - exp: null
13 |
14 | # enable color logging
15 | - override hydra/hydra_logging: colorlog
16 | - override hydra/job_logging: colorlog
17 |
18 | # path to original working directory
19 | # hydra hijacks working directory by changing it to the new log directory
20 | # so it's useful to have this path as a special variable
21 | # https://hydra.cc/docs/next/tutorials/basic/running_your_app/working_directory
22 | work_dir: ${hydra:runtime.cwd}
23 |
24 | dataset: ${ds.name}
25 |
26 | name: ??? # name of exp
27 |
28 | output_dir: ${work_dir}/../artifact/${dataset}/${name}
29 |
30 | # seed for random number generators in pytorch, numpy and python.random
31 | seed: 42
32 |
33 | hydra:
34 | job:
35 | chdir: True
--------------------------------------------------------------------------------
/cutpaste/config/ds/VOC.yaml:
--------------------------------------------------------------------------------
1 | name: VOC
2 | label2id:
3 | aeroplane: 1
4 | bicycle: 2
5 | bird: 3
6 | boat: 4
7 | bottle: 5
8 | bus: 6
9 | car: 7
10 | cat: 8
11 | chair: 9
12 | cow: 10
13 | diningtable: 11
14 | dog: 12
15 | horse: 13
16 | motorbike: 14
17 | person: 15
18 | pottedplant: 16
19 | sheep: 17
20 | sofa: 18
21 | train: 19
22 | tvmonitor: 20
--------------------------------------------------------------------------------
/cutpaste/config/exp/demo_cutpaste.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 |
3 | defaults:
4 | - override /ds: VOC
5 | - override /fg@fg_real: VOC/1shot/real # 1 shot real foregrounds
6 | - override /bg@bg_real: VOC/1shot/real # 1 shot real backgrounds
7 |
8 | name: VOC-demo-1shot-cutpaste
9 | dataset_dir: ${work_dir}/../data/test_data
10 | paster:
11 | debug: True
12 |
--------------------------------------------------------------------------------
/cutpaste/config/exp/demo_syn+real.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 |
3 | defaults:
4 | - override /ds: VOC
5 | - override /fg@fg_real: VOC/1shot/real # 1 shot real foregrounds
6 | - override /fg@fg_syn: VOC/fg_template # zero-shot fg template
7 | - override /bg@bg_real: VOC/1shot/real # 1 shot real backgrounds
8 | - override /bg@bg_syn: VOC/1shot/bg_template_plus_dalle # zero-shot bg template + 1 shot context augmented captions
9 |
10 | name: VOC-demo-1shot-syn+real
11 | dataset_dir: ${work_dir}/../data/test_data
12 | paster:
13 | debug: True
--------------------------------------------------------------------------------
/cutpaste/config/exp/demo_syn.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 |
3 | defaults:
4 | - override /ds: VOC
5 | - override /fg@fg_syn: VOC/fg_template # zero-shot fg template
6 | - override /bg@bg_syn: VOC/1shot/bg_template_plus_dalle # zero-shot bg template + 1 shot context augmented captions
7 |
8 | name: VOC-demo-1shot-syn
9 | dataset_dir: ${work_dir}/../data/test_data
10 | paster:
11 | debug: True
12 |
--------------------------------------------------------------------------------
/cutpaste/config/exp/demo_synFg.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 |
3 | defaults:
4 | - override /ds: VOC
5 | - override /fg@fg_real: VOC/1shot/real # 1 shot real foregrounds
6 | - override /fg@fg_syn: VOC/fg_template # zero-shot fg template
7 | - override /bg@bg_real: VOC/1shot/real # 1 shot real backgrounds
8 |
9 | name: VOC-demo-1shot-synFg
10 | dataset_dir: ${work_dir}/../data/test_data
11 | paster:
12 | debug: True
13 |
--------------------------------------------------------------------------------
/cutpaste/config/fg/VOC/1shot/real.yaml:
--------------------------------------------------------------------------------
1 | _target_: utils.read_real_VOC_foregrounds
2 |
3 | # assume dataset_dir: data/voc2012/
4 | data_list: ${dataset_dir}/train_cls-1shot.txt
5 | data_dir: ${dataset_dir}/VOC2012/JPEGImages
6 | anno_dir: ${dataset_dir}/VOC2012/Annotations
7 | seg_dir: ${dataset_dir}/VOC2012/SegmentationObject
--------------------------------------------------------------------------------
/cutpaste/config/fg/VOC/fg_template.yaml:
--------------------------------------------------------------------------------
1 | _target_: utils.read_entity_foregrounds
2 |
3 | dataset: ${dataset}
4 | rgb_dir: ${dataset_dir}/foreground/foreground_rgb
5 | mask_dir: ${dataset_dir}/foreground/foreground_mask
6 |
--------------------------------------------------------------------------------
/cutpaste/config/paster/defaults.yaml:
--------------------------------------------------------------------------------
1 | debug: False # debug mode, if True, only paste 40 images for debugging purpose
2 |
3 | r: 2 # number of repeat
4 | repeat_background: ${paster.r} # alias
5 | c: 4 # number of foregrounds pasted onto each background
6 | num_cut_images: ${paster.c} # alias
7 | j: 1 # number of workers, 1 means no multiprocessing, to accelerate, set to something like 50
8 | max_workers: ${paster.j} # alias
9 |
10 | # random scale in percentage
11 | scale_factor: 0
12 | # random center translation in percentage
13 | center_translation_factor: 0
14 | # if true, after find circle still random scale instead of matching radius
15 | use_random_scaling: False
16 | # if set, each time sample num_cut from [lowerbound, num_cut_images]
17 | num_cut_lowerbound: null
18 | # if true, ablation study of random paste images in anywhere
19 | use_random_paste: False
20 |
21 | select_prob: uniform # how to sample backgrounds
22 | out_size: 512
23 | max_degree: 30
24 |
25 | # if set, only paste [start:to] instead of entire
26 | start: null
27 | to: null
28 |
--------------------------------------------------------------------------------
/cutpaste/foreground.py:
--------------------------------------------------------------------------------
1 | import random
2 | from typing import List
3 |
4 | import cv2
5 | import numpy as np
6 | from PIL import Image
7 |
8 | from anno import Anno
9 |
10 |
11 | def get_box(mask):
12 | rows = np.any(mask, axis=1)
13 | cols = np.any(mask, axis=0)
14 | assert len(np.where(rows)[0]) > 0
15 | ymin, ymax = np.where(rows)[0][[0, -1]]
16 | xmin, xmax = np.where(cols)[0][[0, -1]]
17 | assert ymax >= ymin and xmax >= xmin
18 | return int(xmin), int(xmax), int(ymin), int(ymax)
19 |
20 |
21 | def get_area(xmin, xmax, ymin, ymax):
22 | return (xmax - xmin) * (ymax - ymin)
23 |
24 |
25 | class CutObject:
26 | """
27 | mask object
28 | input mask dummy is 0, positive if occupied (dep on category, 1-20 for VOC)
29 | """
30 | def __init__(self, img_path: str, img: Image.Image, mask: Image.Image):
31 | self.img_path = img_path
32 | self.img = img
33 | self.mask = mask
34 | uniques = set(np.unique(self.mask))
35 | assert len(uniques) in [2, 1] # it's possible to get perfect mask, so only positive val (1)
36 | # if not 0, then it's category for mask
37 | uniques: set = uniques.difference({0})
38 | self.category, = uniques
39 | self.category_name = Anno.id2label[self.category]
40 |
41 |
42 | def min_enclosing_circle(self):
43 | contours, _ = cv2.findContours(np.array(self.mask), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)[-2:]
44 | center, radius = cv2.minEnclosingCircle(np.concatenate(contours, 0))
45 | return radius, center
46 |
47 | def scale(self, scaling_factor: float):
48 | orig_w, orig_h = self.img.size
49 | o_w, o_h = int(scaling_factor * orig_w), int(scaling_factor * orig_h)
50 | self.img = self.img.resize((o_w, o_h), Image.ANTIALIAS)
51 | self.mask = self.mask.resize((o_w, o_h), Image.NEAREST)
52 |
53 | def rotate(self, size, max_degree=60):
54 | w, h = size
55 | while True:
56 | rot_degrees = random.randint(-max_degree, max_degree)
57 | foreground_tmp = self.img.rotate(rot_degrees, expand=True)
58 | foreground_mask_tmp = self.mask.rotate(rot_degrees, expand=True)
59 | o_w, o_h = foreground_tmp.size
60 | if w - o_w > 0 and h - o_h > 0:
61 | break
62 | self.img = foreground_tmp
63 | self.mask = foreground_mask_tmp
64 |
65 | def save(self, name):
66 | self.img.save(f"{name}-fg-image.png")
67 | self.mask.save(f"{name}-fg-mask.png")
68 |
69 | class CutObjects(list):
70 | """
71 | list of objects (i.e. foregrounds) to cut, and later will be pasted on PastedImage
72 | can contain multiple foregrounds from the same image
73 | """
74 | def __init__(self, *args):
75 | super().__init__(*args)
76 | self: List[CutObject]
77 |
78 | def add_image(self, img_path, foreground_anno: Anno, area_threshold=700):
79 | """
80 | add per-object mask of the given image
81 | only add if area exceeds area_threshold
82 | """
83 | foreground_img = Image.open(img_path)
84 | for i, foreground_object in enumerate(foreground_anno.objects(), 1):
85 | """
86 | binary mask, 0 is dummy, positive int (label id for ith object) is object mask
87 | """
88 | foreground_mask = foreground_anno.create_mask(for_object=i)
89 | xmin, xmax, ymin, ymax = get_box(foreground_mask)
90 | if get_area(xmin, xmax, ymin, ymax) < area_threshold:
91 | continue
92 | foreground = foreground_img.crop((xmin, ymin, xmax, ymax))
93 | foreground_mask = foreground_mask.crop((xmin, ymin, xmax, ymax))
94 | self.append(CutObject(img_path, foreground, foreground_mask))
95 | return self
96 |
97 | def shuffle(self):
98 | random.shuffle(self)
--------------------------------------------------------------------------------
/cutpaste/paste.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import shutil
4 | from pathlib import Path
5 |
6 | import hydra
7 | from omegaconf import DictConfig, open_dict, ListConfig
8 | import numpy as np
9 |
10 | from paster import Paster
11 |
12 | from logging import Logger, getLogger
13 |
14 | logger = getLogger(__file__)
15 | @hydra.main(config_path="config", config_name="config", version_base="1.2")
16 | def paste(cfg: DictConfig):
17 | assert cfg.get("dataset") and cfg.get("name")
18 | assert cfg['paster'].get("select_prob") in ["uniform", "balanced"]
19 | with open_dict(cfg):
20 | if not cfg.get("dataset_dir"):
21 | if cfg.dataset == "VOC":
22 | cfg.dataset_dir = Path(cfg.work_dir).parent / "data" / "voc2012"
23 | else:
24 | cfg.dataset_dir = Path(cfg.work_dir).parent / "data" / "COCO2017"
25 | else:
26 | cfg.dataset_dir = Path(cfg.dataset_dir)
27 | cfg.output_dir = Path(cfg.output_dir)
28 | if cfg.get("debug") and cfg.output_dir.exists():
29 | shutil.rmtree(cfg.output_dir)
30 | os.makedirs(cfg.output_dir, exist_ok=True)
31 |
32 | np.random.seed(cfg.seed)
33 | random.seed(cfg.seed)
34 | os.environ["PYTHONHASHSEED"] = str(cfg.seed)
35 | paster = Paster(
36 | label2id=cfg.ds.label2id,
37 | out_size=cfg.paster.out_size,
38 | repeat_background=cfg.paster.repeat_background,
39 | select_prob=cfg.paster.select_prob,
40 | random_paste=cfg.paster.use_random_paste,
41 | )
42 | if (cfg.output_dir / "paster.pt").exists(): # if paster is saved, load it instead of creating a new one
43 | paster = paster.from_save(cfg.output_dir)
44 | else:
45 | #### foreground
46 | if cfg.get("fg_real"):
47 | # from utils import read_real_VOC
48 | paster.foregrounds.extend(hydra.utils.call(cfg.fg_real))
49 | if cfg.get("fg_syn"):
50 | # from utils import read_entity_foregrounds
51 | paster.foregrounds.extend(hydra.utils.call(cfg.fg_syn))
52 | #### background
53 | if cfg.get("bg_real"):
54 | # from utils import read_real_VOC
55 | paster.backgrounds.extend(hydra.utils.call(cfg.bg_real))
56 | if cfg.get("bg_syn"):
57 | # from utils import read_dalle_backgrounds
58 | assert isinstance(cfg.bg_syn, ListConfig)
59 | for syn_cfg in cfg.bg_syn:
60 | paster.backgrounds.extend(hydra.utils.call(syn_cfg))
61 |
62 | paster.save(cfg.output_dir)
63 |
64 | paster.validate()
65 | if cfg.paster.debug:
66 | random.shuffle(paster.backgrounds)
67 | random.shuffle(paster.foregrounds)
68 | paster.truncate(slice(0, 40))
69 | cfg.paster.max_workers = 1
70 | cfg.output_dir /= "debug"
71 | if cfg.output_dir.exists():
72 | shutil.rmtree(cfg.output_dir)
73 | else:
74 | random_str = "-random" if cfg.use_random_paste else ""
75 | prob_str = '' if cfg.select_prob == 'uniform' else "-balanced"
76 | num_cut = cfg.num_cut_images if cfg.num_cut_lowerbound is None else f"{cfg.num_cut_lowerbound}~{cfg.num_cut_images}"
77 | cfg.output_dir = cfg.output_dir \
78 | / f"[{cfg.foreground},{cfg.background}]{random_str}{prob_str}[{cfg.repeat_each_image},{num_cut},{cfg.max_degree},{cfg.out_size}]"
79 |
80 | os.makedirs(cfg.output_dir, exist_ok=True)
81 |
82 | if cfg.paster.get("start") and cfg.paster.get("to"):
83 | slice_idx = slice(cfg.paster.start, cfg.paster.to)
84 | paster.truncate(slice_idx)
85 |
86 | logger.info(f"size of background {len(paster)}; size of foreground {len(paster.foregrounds)}")
87 | logger.info(f"saving to {cfg.output_dir}")
88 |
89 | # TODO, move json
90 | shutil.copy(Path(os.getcwd()) / ".hydra" / "config.yaml", cfg.output_dir / "config.yaml")
91 | shutil.copy(Path(os.getcwd()) / ".hydra" / "overrides.yaml", cfg.output_dir / "overrides.yaml")
92 | paster.cut_and_paste(
93 | out_dir=cfg.output_dir,
94 | max_workers=cfg.paster.max_workers,
95 | num_cut_images=cfg.paster.num_cut_images, max_degree=cfg.paster.max_degree,
96 | num_cut_lowerbound=cfg.paster.num_cut_lowerbound,
97 | scale_factor=cfg.paster.scale_factor, center_translation_factor=cfg.paster.center_translation_factor,
98 | use_random_scaling=cfg.paster.use_random_scaling
99 | )
100 |
101 |
102 | if __name__ == "__main__":
103 | paste()
--------------------------------------------------------------------------------
/cutpaste/paster.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import random
4 | from concurrent import futures
5 | from copy import deepcopy
6 | from functools import partial
7 | from pathlib import Path
8 | from typing import Union, Optional, List
9 |
10 | import numpy as np
11 | import ujson as json
12 | # from detectron2.utils.env import seed_all_rng
13 | from tqdm import tqdm
14 |
15 | from anno import Anno
16 | from background import PastedBackground
17 | from foreground import CutObjects
18 | from utils import convert_to_COCO
19 |
20 |
21 | class Paster:
22 | """
23 | paste @self.foregrounds into @self.backgrounds
24 | """
25 |
26 | def __init__(self, label2id: dict,
27 | out_size: int = 512,
28 | repeat_background: int = 1, select_prob: str = "uniform",
29 | random_paste=False):
30 | """
31 | out_dir/
32 | foregrounds.csv
33 | backgrounds.csv # without repeat
34 | TODO
35 | xxx
36 | Args:
37 | label2id: dict with label text & id
38 | repeat_background: # times background image is repeated
39 | select_prob: how to select foreground
40 | random_paste: whether to use random paste, if False use space maximization paste
41 | """
42 | # blending_list = ['gaussian', 'poisson', 'none', 'box', 'motion']
43 | assert select_prob in ["uniform", "balanced", "supercategory_CDI", "supercategory"]
44 | # seed_all_rng(seed)
45 | # TODO
46 | self.select_prob = select_prob
47 | self.random_paste = random_paste
48 | self.out_size = out_size
49 | self.blending_list: List[str] = ['gaussian', ]
50 | assert all(b in ['gaussian', 'poisson', 'none', 'box', 'motion'] for b in self.blending_list)
51 | self.repeat_background = repeat_background # repeat background only
52 | self.backgrounds = []
53 | self.foregrounds = []
54 |
55 | self.id2label = {v: k for k, v in label2id.items()}
56 | self.label2id = dict(label2id)
57 | Anno.label2id = label2id
58 | Anno.id2label = self.id2label
59 |
60 | def aggregate_json(self,
61 | input_dir: Path, max_workers=1, json_name="COCO"):
62 | """
63 | convert instance mask to COCO format
64 | input_dir must contain @image_folder folder for pasted images, and json are saved here
65 | """
66 | output_json_dict = {
67 | "images": [],
68 | "type": "instances",
69 | "annotations": [],
70 | "categories": [
71 | {'supercategory': 'none', 'id': label_id, 'name': label}
72 | for label, label_id in self.label2id.items()
73 | ]
74 | }
75 |
76 | def read_json(path):
77 | with open(input_dir / "tmp" / path) as f:
78 | return json.load(f)
79 |
80 | files = list(os.listdir(input_dir / "tmp"))
81 | with tqdm(total=len(files), desc="COCO agg") as pbar, \
82 | futures.ThreadPoolExecutor(max_workers) as executor:
83 | todos = []
84 | bnd_id = 0 # coco need integer bnd ids
85 | for file in files:
86 | todos.append(executor.submit(read_json, file))
87 | for future in futures.as_completed(todos):
88 | data = future.result()
89 | output_json_dict["images"].extend(data["images"])
90 | anno = data["annotations"]
91 | for bbox in anno:
92 | bbox["id"] = bnd_id
93 | bnd_id += 1
94 | output_json_dict["annotations"].extend(anno)
95 | pbar.update(1)
96 | print(f"saving to {input_dir}")
97 | with open(input_dir / f"{json_name}.json", "w") as f:
98 | json.dump(output_json_dict, f)
99 | with open(input_dir / "label2id.json", "w") as f:
100 | json.dump(dict(self.label2id), f, indent=4)
101 | # can rm tmp folder if you want
102 | # shutil.rmtree(input_dir / "tmp")
103 |
104 | def cut_and_paste_one_image(self, i: int, out_dir: Path, out_size: int,
105 | num_cut_images: int, blending_list: List[str], probs: List[float],
106 | num_cut_lowerbound: Optional[int],
107 | max_degree: int, random_paste: bool, scale_factor: int, center_translation_factor: int,
108 | use_random_scaling: bool):
109 | """ return path of background image + all objects (original bg + pasted fg) for this image """
110 | background: PastedBackground = deepcopy(self.backgrounds[i])
111 | path_to_save = f"{Path(background.imagepath).stem}_{i}"
112 | if (out_dir / "tmp" / f"{path_to_save}.json").exists():
113 | return
114 |
115 | background.resize(out_size)
116 | if num_cut_lowerbound:
117 | k = random.randint(num_cut_lowerbound, num_cut_images)
118 | else:
119 | k = num_cut_images
120 | cut_images = random.choices(self.foregrounds, k=k, weights=probs)
121 | foregrounds = CutObjects(cut_images)
122 | locations, foregrounds = background.find_paste_location(foregrounds, max_degree=max_degree,
123 | random_paste=random_paste, scale_factor=scale_factor,
124 | center_translation_factor=center_translation_factor,
125 | use_random_scaling=use_random_scaling)
126 |
127 | for blending in blending_list:
128 | pasted_background = deepcopy(background)
129 | for paste_location, foreground_object in zip(locations, foregrounds):
130 | pasted_background.paste(
131 | foreground_object=foreground_object, paste_location=paste_location, blending=blending)
132 | pasted_background.save(path=out_dir, name=f"{path_to_save}_{blending}")
133 |
134 | convert_to_COCO(out_dir, path_to_save, background, blending_list)
135 |
136 | def foreground_sample_prob_by_supercategory(self, bg_filename, probs):
137 | """
138 | either sample based on supercategory of @bg_filename
139 | or fallback to @probs
140 | """
141 | if "background" in bg_filename and self.select_prob == "supercategory_CDI": # bgtemplate
142 | return probs # random select fg on bg template images
143 | return probs
144 |
145 | def save(self, output_dir: Path):
146 | import torch
147 | with open(output_dir / "paster.json", "w") as f:
148 | json.dump({
149 | "counts": [len(self.foregrounds), len(self)],
150 | "foreground": [str(fg.img_path) for fg in self.foregrounds],
151 | "background": [str(bg.imagepath) for bg in self.backgrounds],
152 | }, f)
153 | torch.save(self, output_dir / "paster.pt")
154 | # with open(output_dir / "paster.pt", "w") as f:
155 |
156 | @staticmethod
157 | def from_save(input_dir: Path):
158 | import torch
159 | return torch.load(input_dir / "paster.pt")
160 |
161 | def get_select_prob(self, select_prob) -> np.ndarray:
162 | """
163 | return selection prob for each ele of @self.foregrounds
164 | """
165 | if select_prob == "uniform": # uniform over provided data, thus can be balanced and reflect distribution of bg
166 | probs = np.ones(len(self.foregrounds)) / len(self.foregrounds)
167 | else: # balanced based on label st each label shows up equal likely
168 | labels = np.array([label for _, label, _, _ in self.foregrounds]) # (N, )
169 | probs = np.zeros_like(labels).astype(float)
170 | for class_i in np.unique(labels):
171 | class_indices = labels == class_i # boolean (N, )
172 | num_samples_class_i = class_indices.sum()
173 | assert num_samples_class_i > 0
174 | probs[class_indices] = 1.0 / num_samples_class_i
175 | probs /= probs.sum() # st sum(probs) == 1
176 | """
177 | # check if probs work
178 | labels = []
179 | for _ in range(10000):
180 | cut_images = random.choices(self.backgrounds, k=4, weights=probs)
181 | for _, l, _, _ in cut_images:
182 | labels.append(l)
183 | from collections import Counter
184 | ct = Counter(labels)
185 | print(ct) # should be almost same number for each class
186 | """
187 | assert len(probs) == len(self.foregrounds)
188 | return probs
189 |
190 | def __len__(self): # len def as background
191 | return len(self.backgrounds)
192 |
193 | def truncate(self, slice):
194 | self.backgrounds = self.backgrounds[slice]
195 |
196 | def validate(self):
197 | assert len(self.foregrounds) > 0 and len(self.backgrounds) > 0
198 |
199 | def cut_and_paste(
200 | self, out_dir: Path, num_cut_images: int = 2, max_workers=1,
201 | # rotate
202 | max_degree: int = 30,
203 | # variant
204 | scale_factor=0, center_translation_factor=0, use_random_scaling=False,
205 | num_cut_lowerbound: Optional[int] = None
206 | ):
207 | """
208 | will create the following in @out_dir:
209 | Images folder: pasted RGB images
210 | Masks folder: semantic level segmentation mask
211 | COCO.json: instance level COCO segmentation annotation
212 | """
213 | self.validate()
214 | self.backgrounds = self.backgrounds * self.repeat_background
215 | self.save(output_dir=out_dir) # with updated backgrounds
216 | os.makedirs(out_dir / "Images", exist_ok=True)
217 | os.makedirs(out_dir / "Masks", exist_ok=True)
218 |
219 | probs = self.get_select_prob(self.select_prob)
220 |
221 | cut_and_paste_one_image = partial(
222 | self.cut_and_paste_one_image, out_dir=out_dir, out_size=self.out_size, probs=probs,
223 | num_cut_images=num_cut_images, max_degree=max_degree, blending_list=self.blending_list,
224 | num_cut_lowerbound=num_cut_lowerbound,
225 | random_paste=self.random_paste, scale_factor=scale_factor, center_translation_factor=center_translation_factor,
226 | use_random_scaling=use_random_scaling)
227 | for i in list(range(len(self))):
228 | cut_and_paste_one_image(i)
229 |
230 | # todos = []
231 | # with tqdm(total=len(self), desc="cutpaste") as pbar, \
232 | # futures.ThreadPoolExecutor(max_workers) as executor:
233 | # for i in list(range(len(self))):
234 | # todos.append(executor.submit(
235 | # cut_and_paste_one_image, i))
236 | # for future in futures.as_completed(todos):
237 | # future.result()
238 | # pbar.update(1)
239 | print("converting to COCO format")
240 | self.aggregate_json(out_dir, max_workers, json_name="COCO")
--------------------------------------------------------------------------------
/cutpaste/pb.py:
--------------------------------------------------------------------------------
1 | """
2 | pb: Poisson Image Blending implemented by Python
3 | """
4 |
5 | import matplotlib.pyplot as plt
6 | import numpy as np
7 | import scipy.sparse
8 | from skimage import io
9 |
10 |
11 | def create_mask(img_mask, img_target, img_src, offset=(0, 0)):
12 | '''
13 | Takes the np.array from the grayscale image
14 | '''
15 |
16 | # crop img_mask and img_src to fit to the img_target
17 | hm, wm = img_mask.shape
18 | ht, wt, nl = img_target.shape
19 |
20 | hd0 = max(0, -offset[0])
21 | wd0 = max(0, -offset[1])
22 |
23 | hd1 = hm - max(hm + offset[0] - ht, 0)
24 | wd1 = wm - max(wm + offset[1] - wt, 0)
25 |
26 | mask = np.zeros((hm, wm))
27 | mask[img_mask > 0] = 1
28 | mask[img_mask == 0] = 0
29 |
30 | mask = mask[hd0:hd1, wd0:wd1]
31 | src = img_src[hd0:hd1, wd0:wd1]
32 |
33 | # fix offset
34 | offset_adj = (max(offset[0], 0), max(offset[1], 0))
35 |
36 | # remove edge from the mask so that we don't have to check the
37 | # edge condition
38 | mask[:, -1] = 0
39 | mask[:, 0] = 0
40 | mask[-1, :] = 0
41 | mask[0, :] = 0
42 |
43 | return mask, src, offset_adj
44 |
45 |
46 | def get_gradient_sum(img, i, j, h, w):
47 | """
48 | Return the sum of the gradient of the source imgae.
49 | * 3D array for RGB
50 | """
51 |
52 | v_sum = np.array([0.0, 0.0, 0.0])
53 | v_sum = img[i, j] * 4 \
54 | - img[i + 1, j] - img[i - 1, j] - img[i, j + 1] - img[i, j - 1]
55 |
56 | return v_sum
57 |
58 |
59 | def get_mixed_gradient_sum(img_src, img_target, i, j, h, w, ofs,
60 | c=1.0):
61 | """
62 | Return the sum of the gradient of the source imgae.
63 | * 3D array for RGB
64 | c(>=0): larger, the more important the target image gradient is
65 | """
66 |
67 | v_sum = np.array([0.0, 0.0, 0.0])
68 | nb = np.array([[1, 0], [-1, 0], [0, 1], [0, -1]])
69 |
70 | for kk in range(4):
71 | fp = img_src[i, j] - img_src[i + nb[kk, 0], j + nb[kk, 1]]
72 | gp = img_target[i + ofs[0], j + ofs[1]] \
73 | - img_target[i + nb[kk, 0] + ofs[0], j + nb[kk, 1] + ofs[1]]
74 |
75 | # if np.linalg.norm(fp) > np.linalg.norm(gp):
76 | # v_sum += fp
77 | # else:
78 | # v_sum += gp
79 |
80 | v_sum += np.array([fp[0] if abs(fp[0] * c) > abs(gp[0]) else gp[0],
81 | fp[1] if abs(fp[1] * c) > abs(gp[1]) else gp[1],
82 | fp[2] if abs(fp[2] * c) > abs(gp[2]) else gp[2]])
83 |
84 | return v_sum
85 |
86 |
87 | def poisson_blend(img_mask, img_src, img_target, method='mix', c=1.0,
88 | offset_adj=(0, 0)):
89 | hm, wm = img_mask.shape
90 | region_size = hm * wm
91 |
92 | F = np.zeros((region_size, 3))
93 | A = scipy.sparse.identity(region_size, format='lil')
94 |
95 | get_k = lambda i, j: i + j * hm
96 |
97 | # plane insertion
98 | if method in ['target', 'src']:
99 | for i in range(hm):
100 | for j in range(wm):
101 | k = get_k(i, j)
102 |
103 | # ignore the edge case (# of neighboor is always 4)
104 | if img_mask[i, j] == 1:
105 |
106 | if method == 'target':
107 | F[k] = img_target[i + offset_adj[0], j + offset_adj[1]]
108 | elif method == 'src':
109 | F[k] = img_src[i, j]
110 | else:
111 | F[k] = img_target[i + offset_adj[0], j + offset_adj[1]]
112 |
113 | # poisson blending
114 | else:
115 | if method == 'mix':
116 | grad_func = lambda ii, jj: get_mixed_gradient_sum(
117 | img_src, img_target, ii, jj, hm, wm, offset_adj, c=c)
118 | else:
119 | grad_func = lambda ii, jj: get_gradient_sum(
120 | img_src, ii, jj, hm, wm)
121 |
122 | for i in range(hm):
123 | for j in range(wm):
124 | k = get_k(i, j)
125 |
126 | # ignore the edge case (# of neighboor is always 4)
127 | if img_mask[i, j] == 1:
128 | f_star = np.array([0.0, 0.0, 0.0])
129 |
130 | if img_mask[i - 1, j] == 1:
131 | A[k, k - 1] = -1
132 | else:
133 | f_star += img_target[i - 1 +
134 | offset_adj[0], j + offset_adj[1]]
135 |
136 | if img_mask[i + 1, j] == 1:
137 | A[k, k + 1] = -1
138 | else:
139 | f_star += img_target[i + 1 +
140 | offset_adj[0], j + offset_adj[1]]
141 |
142 | if img_mask[i, j - 1] == 1:
143 | A[k, k - hm] = -1
144 | else:
145 | f_star += img_target[i +
146 | offset_adj[0], j - 1 + offset_adj[1]]
147 |
148 | if img_mask[i, j + 1] == 1:
149 | A[k, k + hm] = -1
150 | else:
151 | f_star += img_target[i +
152 | offset_adj[0], j + 1 + offset_adj[1]]
153 |
154 | A[k, k] = 4
155 | F[k] = grad_func(i, j) + f_star
156 |
157 | else:
158 | F[k] = img_target[i + offset_adj[0], j + offset_adj[1]]
159 |
160 | A = A.tocsr()
161 |
162 | img_pro = np.empty_like(img_target.astype(np.uint8))
163 | img_pro[:] = img_target.astype(np.uint8)
164 |
165 | for l in range(3):
166 | # x = pyamg.solve(A, F[:, l], verb=True, tol=1e-15, maxiter=100)
167 | x = scipy.sparse.linalg.spsolve(A, F[:, l])
168 | x[x > 255] = 255
169 | x[x < 0] = 0
170 | x = np.array(x, img_pro.dtype)
171 |
172 | img_pro[offset_adj[0]:offset_adj[0] + hm,
173 | offset_adj[1]:offset_adj[1] + wm, l] \
174 | = x.reshape(hm, wm, order='F')
175 |
176 | return img_pro
177 |
178 |
179 | if __name__ == "__main__":
180 | offset = (40, -30)
181 | img_mask = io.imread('/Users/ysakamoto/Projects/sccomp/mask.png', as_grey=True)
182 | img_src = io.imread('./testimages/0.png').astype(np.float64)
183 | img_target = io.imread('./testimages/0.png')
184 |
185 | # img_src = io.imread('./testimages/test1_src.png').astype(np.float64)
186 | # img_target = io.imread('./testimages/test1_target.png')
187 | # img_mask = io.imread('./testimages/test1_mask.png', as_grey=True)
188 |
189 | # resize src and mask images
190 | # import skimage.transform
191 | # from skimage import color
192 | # fac = 3
193 | # img_src = skimage.transform.resize(img_src, (np.array(img_src.shape)//fac)[:2])
194 | # img_mask = io.imread('/Users/ysakamoto/Desktop/mask.png', as_grey=True)
195 | # img_mask = skimage.transform.resize(img_mask, (np.array(img_mask.shape)//fac)[:2])
196 | # img_mask = color.rgb2grey(img_mask)
197 |
198 | img_mask, img_src, offset_adj \
199 | = create_mask(img_mask.astype(np.float64),
200 | img_target, img_src, offset=offset)
201 |
202 | img_pro = poisson_blend(img_mask, img_src, img_target,
203 | method='normal', offset_adj=offset_adj)
204 | plt.imshow(img_pro)
205 | plt.show()
206 | io.imsave('./testimages/poisson_normal.png', img_pro)
207 |
208 | # pdb.set_trace()
209 | # i=14
210 | # for c in np.linspace(10.0, 50.0, 5):
211 | # i+=1
212 | # img_pro = poisson_blend(img_mask, img_src, img_target, method='mix', c=c)
213 | # plt.imshow(img_pro)
214 | # plt.show()
215 | # io.imsave('./testimages/poisson_mix_%d.png' %i, img_pro)
216 |
217 | # img_pro = poisson_blend(img_mask, img_src, img_target, method='src')
218 | # io.imsave('./testimages/poisson_src.png', img_pro)
219 |
220 | # img_pro = poisson_blend(img_mask, img_src, img_target, method='target')
221 | # io.imsave('./testimages/poisson_target.png', img_pro)
222 |
223 | # def plot_coo_matrix(m):
224 | # if not isinstance(m, coo_matrix):
225 | # m = coo_matrix(m)
226 | # fig = plt.figure()
227 | # ax = fig.add_subplot(111, axisbg='white')
228 | # ax.plot(m.col, m.row, 's', color='black', ms=1)
229 | # ax.set_xlim(0, m.shape[1])
230 | # ax.set_ylim(0, m.shape[0])
231 | # ax.set_aspect('equal')
232 | # for spine in ax.spines.values():
233 | # spine.set_visible(False)
234 | # ax.invert_yaxis()
235 | # ax.set_aspect('equal')
236 | # ax.set_xticks([])
237 | # ax.set_yticks([])
238 | # return ax
239 |
240 | # B = A.tocoo()
241 | # plot_coo_matrix(B)
242 | # plt.show()
243 |
--------------------------------------------------------------------------------
/cutpaste/pyblur3/BoxBlur.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from PIL import Image
3 | from scipy.signal import convolve2d
4 |
5 | boxKernelDims = [3, 5, 7, 9]
6 |
7 |
8 | def BoxBlur_random(img):
9 | kernelidx = np.random.randint(0, len(boxKernelDims))
10 | kerneldim = boxKernelDims[kernelidx]
11 | return BoxBlur(img, kerneldim)
12 |
13 |
14 | def BoxBlur(img, dim):
15 | imgarray = np.array(img, dtype="float32")
16 | kernel = BoxKernel(dim)
17 | convolved = convolve2d(imgarray, kernel, mode='same', fillvalue=255.0).astype("uint8")
18 | img = Image.fromarray(convolved)
19 | return img
20 |
21 |
22 | def BoxKernel(dim):
23 | kernelwidth = dim
24 | kernel = np.ones((kernelwidth, kernelwidth), dtype=np.float32)
25 | normalizationFactor = np.count_nonzero(kernel)
26 | kernel = kernel / normalizationFactor
27 | return kernel
28 |
--------------------------------------------------------------------------------
/cutpaste/pyblur3/DefocusBlur.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import numpy as np
3 | from PIL import Image
4 | from scipy.signal import convolve2d
5 | from skimage.draw import circle
6 |
7 | defocusKernelDims = [3, 5, 7, 9]
8 |
9 |
10 | def DefocusBlur_random(img):
11 | kernelidx = np.random.randint(0, len(defocusKernelDims))
12 | kerneldim = defocusKernelDims[kernelidx]
13 | return DefocusBlur(img, kerneldim)
14 |
15 |
16 | def DefocusBlur(img, dim):
17 | imgarray = np.array(img, dtype="float32")
18 | kernel = DiskKernel(dim)
19 | convolved = convolve2d(imgarray, kernel, mode='same', fillvalue=255.0).astype("uint8")
20 | img = Image.fromarray(convolved)
21 | return img
22 |
23 |
24 | def DiskKernel(dim):
25 | kernelwidth = dim
26 | kernel = np.zeros((kernelwidth, kernelwidth), dtype=np.float32)
27 | circleCenterCoord = dim / 2
28 | circleRadius = circleCenterCoord + 1
29 |
30 | rr, cc = circle(circleCenterCoord, circleCenterCoord, circleRadius)
31 | kernel[rr, cc] = 1
32 |
33 | if (dim == 3 or dim == 5):
34 | kernel = Adjust(kernel, dim)
35 |
36 | normalizationFactor = np.count_nonzero(kernel)
37 | kernel = kernel / normalizationFactor
38 | return kernel
39 |
40 |
41 | def Adjust(kernel, kernelwidth):
42 | kernel[0, 0] = 0
43 | kernel[0, kernelwidth - 1] = 0
44 | kernel[kernelwidth - 1, 0] = 0
45 | kernel[kernelwidth - 1, kernelwidth - 1] = 0
46 | return kernel
47 |
--------------------------------------------------------------------------------
/cutpaste/pyblur3/GaussianBlur.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from PIL import ImageFilter
3 |
4 | gaussianbandwidths = [0.5, 1, 1.5, 2, 2.5, 3, 3.5]
5 |
6 |
7 | def GaussianBlur_random(img):
8 | gaussianidx = np.random.randint(0, len(gaussianbandwidths))
9 | gaussianbandwidth = gaussianbandwidths[gaussianidx]
10 | return GaussianBlur(img, gaussianbandwidth)
11 |
12 |
13 | def GaussianBlur(img, bandwidth):
14 | img = img.filter(ImageFilter.GaussianBlur(bandwidth))
15 | return img
16 |
--------------------------------------------------------------------------------
/cutpaste/pyblur3/LineDictionary.py:
--------------------------------------------------------------------------------
1 | class LineDictionary:
2 | def __init__(self):
3 | self.lines = {}
4 | self.Create3x3Lines()
5 | self.Create5x5Lines()
6 | self.Create7x7Lines()
7 | self.Create9x9Lines()
8 | return
9 |
10 | def Create3x3Lines(self):
11 | lines = {}
12 | lines[0] = [1, 0, 1, 2]
13 | lines[45] = [2, 0, 0, 2]
14 | lines[90] = [0, 1, 2, 1]
15 | lines[135] = [0, 0, 2, 2]
16 | self.lines[3] = lines
17 | return
18 |
19 | def Create5x5Lines(self):
20 | lines = {}
21 | lines[0] = [2, 0, 2, 4]
22 | lines[22.5] = [3, 0, 1, 4]
23 | lines[45] = [0, 4, 4, 0]
24 | lines[67.5] = [0, 3, 4, 1]
25 | lines[90] = [0, 2, 4, 2]
26 | lines[112.5] = [0, 1, 4, 3]
27 | lines[135] = [0, 0, 4, 4]
28 | lines[157.5] = [1, 0, 3, 4]
29 | self.lines[5] = lines
30 | return
31 |
32 | def Create7x7Lines(self):
33 | lines = {}
34 | lines[0] = [3, 0, 3, 6]
35 | lines[15] = [4, 0, 2, 6]
36 | lines[30] = [5, 0, 1, 6]
37 | lines[45] = [6, 0, 0, 6]
38 | lines[60] = [6, 1, 0, 5]
39 | lines[75] = [6, 2, 0, 4]
40 | lines[90] = [0, 3, 6, 3]
41 | lines[105] = [0, 2, 6, 4]
42 | lines[120] = [0, 1, 6, 5]
43 | lines[135] = [0, 0, 6, 6]
44 | lines[150] = [1, 0, 5, 6]
45 | lines[165] = [2, 0, 4, 6]
46 | self.lines[7] = lines
47 | return
48 |
49 | def Create9x9Lines(self):
50 | lines = {}
51 | lines[0] = [4, 0, 4, 8]
52 | lines[11.25] = [5, 0, 3, 8]
53 | lines[22.5] = [6, 0, 2, 8]
54 | lines[33.75] = [7, 0, 1, 8]
55 | lines[45] = [8, 0, 0, 8]
56 | lines[56.25] = [8, 1, 0, 7]
57 | lines[67.5] = [8, 2, 0, 6]
58 | lines[78.75] = [8, 3, 0, 5]
59 | lines[90] = [8, 4, 0, 4]
60 | lines[101.25] = [0, 3, 8, 5]
61 | lines[112.5] = [0, 2, 8, 6]
62 | lines[123.75] = [0, 1, 8, 7]
63 | lines[135] = [0, 0, 8, 8]
64 | lines[146.25] = [1, 0, 7, 8]
65 | lines[157.5] = [2, 0, 6, 8]
66 | lines[168.75] = [3, 0, 5, 8]
67 | self.lines[9] = lines
68 | return
69 |
--------------------------------------------------------------------------------
/cutpaste/pyblur3/LinearMotionBlur.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import math
3 |
4 | import numpy as np
5 | from PIL import Image
6 | from scipy.signal import convolve2d
7 | from skimage.draw import line
8 |
9 | from .LineDictionary import LineDictionary
10 |
11 | lineLengths = [3, 5, 7, 9]
12 | lineTypes = ["full", "right", "left"]
13 |
14 | lineDict = LineDictionary()
15 |
16 |
17 | def LinearMotionBlur_random(img):
18 | lineLengthIdx = np.random.randint(0, len(lineLengths))
19 | lineTypeIdx = np.random.randint(0, len(lineTypes))
20 | lineLength = lineLengths[lineLengthIdx]
21 | lineType = lineTypes[lineTypeIdx]
22 | lineAngle = randomAngle(lineLength)
23 | return LinearMotionBlur(img, lineLength, lineAngle, lineType)
24 |
25 |
26 | def LinearMotionBlur(img, dim, angle, linetype):
27 | imgarray = np.array(img, dtype="float32")
28 | kernel = LineKernel(dim, angle, linetype)
29 | convolved = convolve2d(imgarray, kernel, mode='same', fillvalue=255.0).astype("uint8")
30 | img = Image.fromarray(convolved)
31 | return img
32 |
33 |
34 | def LineKernel(dim, angle, linetype):
35 | kernelwidth = dim
36 | kernelCenter = int(math.floor(dim / 2))
37 | angle = SanitizeAngleValue(kernelCenter, angle)
38 | kernel = np.zeros((kernelwidth, kernelwidth), dtype=np.float32)
39 | lineAnchors = lineDict.lines[dim][angle]
40 | if (linetype == 'right'):
41 | lineAnchors[0] = kernelCenter
42 | lineAnchors[1] = kernelCenter
43 | if (linetype == 'left'):
44 | lineAnchors[2] = kernelCenter
45 | lineAnchors[3] = kernelCenter
46 | rr, cc = line(lineAnchors[0], lineAnchors[1], lineAnchors[2], lineAnchors[3])
47 | kernel[rr, cc] = 1
48 | normalizationFactor = np.count_nonzero(kernel)
49 | kernel = kernel / normalizationFactor
50 | return kernel
51 |
52 |
53 | def SanitizeAngleValue(kernelCenter, angle):
54 | numDistinctLines = kernelCenter * 4
55 | angle = math.fmod(angle, 180.0)
56 | validLineAngles = np.linspace(0, 180, numDistinctLines, endpoint=False)
57 | angle = nearestValue(angle, validLineAngles)
58 | return angle
59 |
60 |
61 | def nearestValue(theta, validAngles):
62 | idx = (np.abs(validAngles - theta)).argmin()
63 | return validAngles[idx]
64 |
65 |
66 | def randomAngle(kerneldim):
67 | kernelCenter = int(math.floor(kerneldim / 2))
68 | numDistinctLines = kernelCenter * 4
69 | validLineAngles = np.linspace(0, 180, numDistinctLines, endpoint=False)
70 | angleIdx = np.random.randint(0, len(validLineAngles))
71 | return int(validLineAngles[angleIdx])
72 |
--------------------------------------------------------------------------------
/cutpaste/pyblur3/PsfBlur.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import os.path
3 | import pickle
4 |
5 | import numpy as np
6 | from PIL import Image
7 | from scipy.signal import convolve2d
8 |
9 | pickledPsfFilename = os.path.join(os.path.dirname(__file__), "psf.pkl")
10 |
11 | with open(pickledPsfFilename, 'rb') as pklfile:
12 | psfDictionary = pickle.load(pklfile, encoding='latin1')
13 |
14 |
15 | def PsfBlur(img, psfid):
16 | imgarray = np.array(img, dtype="float32")
17 | kernel = psfDictionary[psfid]
18 | convolved = convolve2d(imgarray, kernel, mode='same', fillvalue=255.0).astype("uint8")
19 | img = Image.fromarray(convolved)
20 | return img
21 |
22 |
23 | def PsfBlur_random(img):
24 | psfid = np.random.randint(0, len(psfDictionary))
25 | return PsfBlur(img, psfid)
26 |
--------------------------------------------------------------------------------
/cutpaste/pyblur3/RandomizedBlur.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from .BoxBlur import BoxBlur_random
4 | from .DefocusBlur import DefocusBlur_random
5 | from .GaussianBlur import GaussianBlur_random
6 | from .LinearMotionBlur import LinearMotionBlur_random
7 | from .PsfBlur import PsfBlur_random
8 |
9 | blurFunctions = {"0": BoxBlur_random, "1": DefocusBlur_random, "2": GaussianBlur_random, "3": LinearMotionBlur_random,
10 | "4": PsfBlur_random}
11 |
12 |
13 | def RandomizedBlur(img):
14 | blurToApply = blurFunctions[str(np.random.randint(0, len(blurFunctions)))]
15 | return blurToApply(img)
16 |
--------------------------------------------------------------------------------
/cutpaste/pyblur3/__init__.py:
--------------------------------------------------------------------------------
1 | from .BoxBlur import BoxBlur, BoxBlur_random
2 | from .DefocusBlur import DefocusBlur, DefocusBlur_random
3 | from .GaussianBlur import GaussianBlur, GaussianBlur_random
4 | from .LinearMotionBlur import LinearMotionBlur, LinearMotionBlur_random
5 | from .PsfBlur import PsfBlur, PsfBlur_random
6 | from .RandomizedBlur import RandomizedBlur
7 |
8 | __all__ = ["BoxBlur", "BoxBlur_random",
9 | "DefocusBlur", "DefocusBlur_random",
10 | "GaussianBlur", "GaussianBlur_random",
11 | "LinearMotionBlur", "LinearMotionBlur_random",
12 | "PsfBlur", "PsfBlur_random",
13 | "RandomizedBlur"]
14 |
--------------------------------------------------------------------------------
/cutpaste/pyblur3/psf.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/cutpaste/pyblur3/psf.pkl
--------------------------------------------------------------------------------
/cutpaste/utils.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import json
3 | import os
4 | import re
5 | from concurrent import futures
6 | from pathlib import Path
7 | from typing import List
8 |
9 | from PIL import Image
10 | from tqdm import tqdm
11 |
12 | from anno import VOCAnno, Anno, EntityAnno
13 | from cutpaste.background import PastedBackground
14 | from foreground import CutObjects, CutObject
15 |
16 |
17 | ####################################################################################################
18 | # foregrounds
19 | def read_real_VOC_foregrounds(data_dir, anno_dir, seg_dir, data_list) -> List[CutObjects]:
20 | """
21 | Read real foregrounds from VOC dataset
22 | Each has VOCAnno
23 | """
24 | foregrounds = []
25 | with open(data_list, 'r') as f:
26 | lines = f.readlines()
27 | for line in tqdm(lines, total=len(lines), desc="reading real VOC fg"):
28 | fields = line.strip().split()
29 | img_path = os.path.join(data_dir, fields[0] + '.jpg')
30 | anno_path = os.path.join(anno_dir, fields[0] + '.xml')
31 | seg_img_path = os.path.join(seg_dir, fields[0] + '.png')
32 | assert os.path.exists(anno_path) and os.path.exists(img_path) and os.path.exists(seg_img_path)
33 | anno: VOCAnno = Anno.factory(anno_path, seg_img_path)
34 | foregrounds.extend(CutObjects().add_image(img_path, anno))
35 | return foregrounds
36 |
37 |
38 | def read_entity_foregrounds(dataset, rgb_dir, mask_dir) -> List[CutObjects]:
39 | """
40 | Read syn foregrounds (processed by entity segmentation, then selected by GradCAM)
41 | Each has EntityAnno
42 | """
43 | rgb_dir, mask_dir = map(Path, [rgb_dir, mask_dir])
44 |
45 | foregrounds = []
46 | # VOC
47 | def get_voc_image(mask_file):
48 | # eg voc2012/foreground/foreground_mask_old/car_mask/a car in a white background30.png
49 | _, label, filename = mask_file.rsplit("/", 2)
50 | label = label.replace("_mask", "")
51 | # infer rgb img_path
52 | # eg (a car in a white background, 30, _)
53 | target_caption, target_num, _ = re.split(r'(\d+)', filename)
54 | img_path = None
55 | for class_dir in rgb_dir.iterdir():
56 | for caption in os.listdir(class_dir):
57 | if caption == target_caption:
58 | img_path = class_dir / caption / f"{target_num}.png"
59 | if img_path is not None:
60 | break
61 | assert img_path is not None, f"{str(mask_file)} Not found!"
62 | assert os.path.exists(img_path)
63 | anno: EntityAnno = Anno.factory(None, mask_file)
64 | return CutObjects().add_image(img_path, anno)
65 |
66 | todos = []
67 | all_mask_files = list(glob.glob(str(mask_dir / "*_mask" / "*.png")))
68 | with tqdm(total=len(all_mask_files), desc="collecting real fg") as pbar, \
69 | futures.ThreadPoolExecutor(100) as executor:
70 | for mask_file in all_mask_files:
71 | todos.append(executor.submit(get_voc_image, mask_file))
72 | for future in futures.as_completed(todos):
73 | res = future.result()
74 | foregrounds.extend(res)
75 | pbar.update(1)
76 | return foregrounds
77 |
78 | ####################################################################################################
79 | # backgrounds
80 | def read_real_VOC_backgrounds(data_dir, anno_dir, seg_dir, data_list) -> List[PastedBackground]:
81 | """
82 | load list of image name and image labels ([i] is img_name_list[i]'s K+1 class boolean vector)
83 | can be used in reading fg or bg
84 | """
85 | backgrounds = []
86 | with open(data_list, 'r') as f:
87 | lines = f.readlines()
88 | for line in tqdm(lines, total=len(lines), desc="reading real VOC bg"):
89 | fields = line.strip().split()
90 | img_path = os.path.join(data_dir, fields[0] + '.jpg')
91 | anno_path = os.path.join(anno_dir, fields[0] + '.xml')
92 | seg_img_path = os.path.join(seg_dir, fields[0] + '.png')
93 | assert os.path.exists(anno_path)
94 | assert os.path.exists(img_path)
95 | assert os.path.exists(seg_img_path)
96 | backgrounds.append(PastedBackground(
97 | imagepath=img_path, anno=VOCAnno(anno_path, seg_img_path)
98 | ))
99 | return backgrounds
100 | def read_background_template(data_dir: str) -> List[PastedBackground]:
101 | backgrounds = []
102 | for rgb_file in glob.iglob(f"{data_dir}/**/*.png",
103 | recursive=True):
104 | if "azDownload" in str(rgb_file): continue
105 | if "group_0" in str(rgb_file): continue
106 | backgrounds.append(PastedBackground(rgb_file))
107 | return backgrounds
108 |
109 | def read_dalle_backgrounds(data_dir: str, clip_strategy="use") -> List[PastedBackground]:
110 | """
111 | use syn images for background, and ignore possible foreground in syn images
112 | data_dir: path to get dalle generated syn images
113 | """
114 | data_dir = Path(data_dir)
115 | backgrounds = []
116 | if clip_strategy in ["use", "reverse"]:
117 | with open(data_dir / "clip_postprocessed.json") as f:
118 | data = json.load(f)
119 | if clip_strategy == "use":
120 | # keep only in CLIP processed
121 | for imageid, captions in data.items():
122 | for caption, selected_ids in captions.items():
123 | for id in selected_ids:
124 | img = data_dir / f"{imageid}.jpg" / caption / id
125 | assert img.exists()
126 | backgrounds.append(PastedBackground(str(img)))
127 | else: # keep only NOT in CLIP processed
128 | for imgid in data_dir.iterdir():
129 | if not imgid.name.endswith(".jpg"):
130 | continue
131 | if imgid not in data:
132 | for caption in imgid.iterdir():
133 | for img in caption.iterdir():
134 | backgrounds.append(PastedBackground(str(img)))
135 | else: # do not use clip but raw
136 | for img in data_dir.iterdir():
137 | backgrounds.append(PastedBackground(str(img)))
138 | return backgrounds
139 |
140 | def convert_to_COCO(input_dir,
141 | image_id_src, background, blending_list,
142 | image_folder="Images", image_suffix="png", output_dir=None):
143 | """
144 | image in input_dir / image_folder / image_id_src
145 | save in output_dir / tmp
146 | COCO image path in the format of image_folder / image_id_src, relative so that in detectron we can provide input_dir (eg use in remote server)
147 | """
148 | if output_dir is None:
149 | output_dir = input_dir
150 | output_json_dict = {
151 | "images": [],
152 | "annotations": []
153 | }
154 | bnd_id = 1
155 | for blending in blending_list:
156 | # image_id eg 2007_000515_16
157 | if blending != "":
158 | image_id = f"{image_id_src}_{blending}"
159 | else:
160 | image_id = image_id_src
161 | file_name = os.path.join(image_folder, f"{image_id}.{image_suffix}")
162 | from PIL import PngImagePlugin
163 | LARGE_ENOUGH_NUMBER = 100
164 | PngImagePlugin.MAX_TEXT_CHUNK = LARGE_ENOUGH_NUMBER * (1024 ** 2)
165 | img = Image.open(input_dir / file_name)
166 | width, height = img.size
167 | output_json_dict["images"].append({
168 | "file_name": file_name,
169 | "height": height,
170 | "width": width,
171 | "id": image_id
172 | })
173 | for polygons, bbox, area, category in background.to_COCO_ann():
174 | output_json_dict["annotations"].append({
175 | "segmentation": polygons,
176 | "area": area,
177 | "iscrowd": 0,
178 | "image_id": image_id,
179 | "bbox": bbox,
180 | "category_id": int(category),
181 | "id": f"{image_id}_{bnd_id}"
182 | })
183 | bnd_id += 1
184 | tmpdir = output_dir / "tmp"
185 | os.makedirs(tmpdir, exist_ok=True)
186 | tmp_json_path = tmpdir / f"{image_id_src}.json"
187 | with open(tmp_json_path, "w") as f:
188 | json.dump(output_json_dict, f)
189 |
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_000039.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2012
3 | 2007_000039.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 |
9 |
10 | 500
11 | 375
12 | 3
13 |
14 | 1
15 |
16 | tvmonitor
17 | Frontal
18 | 0
19 | 0
20 |
21 | 156
22 | 89
23 | 344
24 | 279
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_000063.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2012
3 | 2007_000063.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 |
9 |
10 | 500
11 | 375
12 | 3
13 |
14 | 1
15 |
16 | dog
17 | Unspecified
18 | 0
19 | 0
20 |
21 | 123
22 | 115
23 | 379
24 | 275
25 |
26 |
27 |
28 | chair
29 | Frontal
30 | 1
31 | 0
32 |
33 | 75
34 | 1
35 | 428
36 | 375
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_000648.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2012
3 | 2007_000648.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 |
9 |
10 | 500
11 | 333
12 | 3
13 |
14 | 1
15 |
16 | person
17 | Unspecified
18 | 0
19 | 1
20 |
21 | 394
22 | 199
23 | 404
24 | 223
25 |
26 |
27 |
28 | person
29 | Unspecified
30 | 0
31 | 1
32 |
33 | 424
34 | 199
35 | 436
36 | 220
37 |
38 |
39 |
40 | person
41 | Unspecified
42 | 0
43 | 1
44 |
45 | 434
46 | 196
47 | 444
48 | 220
49 |
50 |
51 |
52 | person
53 | Unspecified
54 | 0
55 | 1
56 |
57 | 443
58 | 195
59 | 452
60 | 220
61 |
62 |
63 |
64 | bus
65 | Right
66 | 0
67 | 0
68 |
69 | 29
70 | 113
71 | 353
72 | 266
73 |
74 |
75 |
76 | aeroplane
77 | Unspecified
78 | 1
79 | 1
80 |
81 | 328
82 | 86
83 | 474
84 | 192
85 |
86 |
87 |
88 | car
89 | Unspecified
90 | 1
91 | 1
92 |
93 | 2
94 | 213
95 | 28
96 | 235
97 |
98 |
99 |
100 |
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_001420.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2012
3 | 2007_001420.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 |
9 |
10 | 500
11 | 332
12 | 3
13 |
14 | 1
15 |
16 | horse
17 | Unspecified
18 | 0
19 | 0
20 |
21 | 192
22 | 109
23 | 340
24 | 270
25 |
26 |
27 |
28 | person
29 | Unspecified
30 | 0
31 | 0
32 |
33 | 281
34 | 80
35 | 364
36 | 268
37 |
38 |
39 |
40 | pottedplant
41 | Unspecified
42 | 0
43 | 0
44 |
45 | 436
46 | 148
47 | 500
48 | 306
49 |
50 |
51 |
52 |
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_001709.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2012
3 | 2007_001709.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 |
9 |
10 | 500
11 | 333
12 | 3
13 |
14 | 1
15 |
16 | person
17 | Frontal
18 | 1
19 | 0
20 |
21 | 367
22 | 132
23 | 426
24 | 287
25 |
26 |
27 |
28 | person
29 | Unspecified
30 | 1
31 | 1
32 |
33 | 1
34 | 1
35 | 202
36 | 333
37 |
38 |
39 |
40 | motorbike
41 | Left
42 | 1
43 | 0
44 |
45 | 1
46 | 45
47 | 412
48 | 333
49 |
50 |
51 |
52 |
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_001901.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2012
3 | 2007_001901.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 |
9 |
10 | 375
11 | 500
12 | 3
13 |
14 | 1
15 |
16 | chair
17 | Unspecified
18 | 0
19 | 0
20 |
21 | 192
22 | 261
23 | 247
24 | 390
25 |
26 |
27 |
28 | chair
29 | Unspecified
30 | 1
31 | 1
32 |
33 | 292
34 | 255
35 | 312
36 | 415
37 |
38 |
39 |
40 | chair
41 | Right
42 | 0
43 | 0
44 |
45 | 233
46 | 258
47 | 308
48 | 400
49 |
50 |
51 |
52 | diningtable
53 | Unspecified
54 | 1
55 | 0
56 |
57 | 238
58 | 266
59 | 312
60 | 413
61 |
62 |
63 |
64 | sofa
65 | Unspecified
66 | 1
67 | 1
68 |
69 | 4
70 | 284
71 | 173
72 | 497
73 |
74 |
75 |
76 |
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_002216.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2012
3 | 2007_002216.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 |
9 |
10 | 500
11 | 340
12 | 3
13 |
14 | 1
15 |
16 | bus
17 | Unspecified
18 | 1
19 | 0
20 |
21 | 360
22 | 110
23 | 500
24 | 313
25 |
26 |
27 |
28 | bus
29 | Unspecified
30 | 0
31 | 0
32 |
33 | 65
34 | 104
35 | 396
36 | 319
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_002668.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2012
3 | 2007_002668.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 |
9 |
10 | 500
11 | 375
12 | 3
13 |
14 | 1
15 |
16 | diningtable
17 | Unspecified
18 | 1
19 | 1
20 |
21 | 86
22 | 312
23 | 499
24 | 374
25 |
26 |
27 |
28 | person
29 | Unspecified
30 | 1
31 | 1
32 |
33 | 320
34 | 102
35 | 500
36 | 319
37 |
38 |
39 |
40 | person
41 | Unspecified
42 | 1
43 | 0
44 |
45 | 426
46 | 105
47 | 462
48 | 134
49 |
50 |
51 |
52 | person
53 | Frontal
54 | 1
55 | 0
56 |
57 | 95
58 | 83
59 | 309
60 | 319
61 |
62 |
63 |
64 | person
65 | Frontal
66 | 1
67 | 0
68 |
69 | 89
70 | 169
71 | 112
72 | 190
73 |
74 |
75 |
76 | pottedplant
77 | Unspecified
78 | 1
79 | 1
80 |
81 | 19
82 | 124
83 | 97
84 | 187
85 |
86 |
87 |
88 |
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_002669.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2012
3 | 2007_002669.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 |
9 |
10 | 500
11 | 375
12 | 3
13 |
14 | 1
15 |
16 | cow
17 | Unspecified
18 | 1
19 | 1
20 |
21 | 292
22 | 137
23 | 372
24 | 174
25 |
26 |
27 |
28 | cow
29 | Left
30 | 0
31 | 0
32 |
33 | 116
34 | 191
35 | 245
36 | 287
37 |
38 |
39 |
40 | person
41 | Frontal
42 | 0
43 | 0
44 |
45 | 442
46 | 241
47 | 478
48 | 294
49 |
50 |
51 |
52 | cow
53 | Right
54 | 0
55 | 0
56 |
57 | 163
58 | 184
59 | 273
60 | 269
61 |
62 |
63 |
64 |
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_002845.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2012
3 | 2007_002845.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 |
9 |
10 | 375
11 | 500
12 | 3
13 |
14 | 1
15 |
16 | sheep
17 | Right
18 | 0
19 | 0
20 |
21 | 284
22 | 196
23 | 326
24 | 216
25 |
26 |
27 |
28 | sheep
29 | Right
30 | 0
31 | 0
32 |
33 | 14
34 | 265
35 | 175
36 | 347
37 |
38 |
39 |
40 | sheep
41 | Right
42 | 0
43 | 0
44 |
45 | 140
46 | 199
47 | 186
48 | 235
49 |
50 |
51 |
52 | sheep
53 | Frontal
54 | 0
55 | 0
56 |
57 | 220
58 | 210
59 | 248
60 | 229
61 |
62 |
63 |
64 | sheep
65 | Frontal
66 | 0
67 | 0
68 |
69 | 254
70 | 204
71 | 279
72 | 227
73 |
74 |
75 |
76 | sheep
77 | Unspecified
78 | 0
79 | 0
80 |
81 | 230
82 | 201
83 | 256
84 | 220
85 |
86 |
87 |
88 |
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_003207.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2012
3 | 2007_003207.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 |
9 |
10 | 500
11 | 333
12 | 3
13 |
14 | 1
15 |
16 | bottle
17 | Unspecified
18 | 0
19 | 0
20 |
21 | 124
22 | 6
23 | 162
24 | 102
25 |
26 |
27 |
28 | bottle
29 | Unspecified
30 | 1
31 | 0
32 |
33 | 94
34 | 36
35 | 132
36 | 91
37 |
38 |
39 |
40 | bottle
41 | Unspecified
42 | 1
43 | 0
44 |
45 | 71
46 | 57
47 | 108
48 | 166
49 |
50 |
51 |
52 | bottle
53 | Unspecified
54 | 0
55 | 0
56 |
57 | 100
58 | 58
59 | 150
60 | 188
61 |
62 |
63 |
64 | bottle
65 | Unspecified
66 | 1
67 | 0
68 |
69 | 13
70 | 71
71 | 61
72 | 184
73 |
74 |
75 |
76 | bottle
77 | Unspecified
78 | 0
79 | 0
80 |
81 | 152
82 | 115
83 | 203
84 | 237
85 |
86 |
87 |
88 | bottle
89 | Unspecified
90 | 0
91 | 0
92 |
93 | 216
94 | 102
95 | 261
96 | 222
97 |
98 |
99 |
100 | bottle
101 | Unspecified
102 | 0
103 | 0
104 |
105 | 259
106 | 134
107 | 307
108 | 260
109 |
110 |
111 |
112 | bottle
113 | Unspecified
114 | 0
115 | 0
116 |
117 | 308
118 | 126
119 | 356
120 | 251
121 |
122 |
123 |
124 | bottle
125 | Unspecified
126 | 0
127 | 0
128 |
129 | 350
130 | 139
131 | 408
132 | 264
133 |
134 |
135 |
136 |
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_003565.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2012
3 | 2007_003565.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 |
9 |
10 | 500
11 | 375
12 | 3
13 |
14 | 1
15 |
16 | bird
17 | Rear
18 | 0
19 | 0
20 |
21 | 280
22 | 218
23 | 500
24 | 317
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_003778.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2012
3 | 2007_003778.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 |
9 |
10 | 500
11 | 500
12 | 3
13 |
14 | 1
15 |
16 | cat
17 | Frontal
18 | 0
19 | 0
20 |
21 | 38
22 | 67
23 | 443
24 | 466
25 |
26 |
27 |
28 | pottedplant
29 | Unspecified
30 | 1
31 | 0
32 |
33 | 55
34 | 75
35 | 221
36 | 256
37 |
38 |
39 |
40 | pottedplant
41 | Unspecified
42 | 1
43 | 0
44 |
45 | 380
46 | 94
47 | 496
48 | 270
49 |
50 |
51 |
52 | pottedplant
53 | Unspecified
54 | 0
55 | 0
56 |
57 | 432
58 | 54
59 | 500
60 | 289
61 |
62 |
63 |
64 |
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_003876.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2012
3 | 2007_003876.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 |
9 |
10 | 500
11 | 382
12 | 3
13 |
14 | 1
15 |
16 | aeroplane
17 | Unspecified
18 | 0
19 | 0
20 |
21 | 93
22 | 98
23 | 174
24 | 144
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_004166.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2012
3 | 2007_004166.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 |
9 |
10 | 500
11 | 332
12 | 3
13 |
14 | 1
15 |
16 | tvmonitor
17 | Unspecified
18 | 0
19 | 1
20 |
21 | 243
22 | 129
23 | 304
24 | 181
25 |
26 |
27 |
28 | chair
29 | Unspecified
30 | 1
31 | 1
32 |
33 | 485
34 | 189
35 | 500
36 | 222
37 |
38 |
39 |
40 | chair
41 | Unspecified
42 | 1
43 | 1
44 |
45 | 358
46 | 195
47 | 398
48 | 239
49 |
50 |
51 |
52 | chair
53 | Unspecified
54 | 1
55 | 1
56 |
57 | 300
58 | 205
59 | 355
60 | 254
61 |
62 |
63 |
64 | chair
65 | Unspecified
66 | 1
67 | 1
68 |
69 | 265
70 | 278
71 | 374
72 | 332
73 |
74 |
75 |
76 | sofa
77 | Unspecified
78 | 1
79 | 1
80 |
81 | 1
82 | 199
83 | 99
84 | 332
85 |
86 |
87 |
88 | diningtable
89 | Unspecified
90 | 1
91 | 0
92 |
93 | 234
94 | 204
95 | 500
96 | 332
97 |
98 |
99 |
100 | dog
101 | Unspecified
102 | 0
103 | 0
104 |
105 | 210
106 | 194
107 | 257
108 | 234
109 |
110 |
111 |
112 |
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_005273.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2012
3 | 2007_005273.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 |
9 |
10 | 500
11 | 375
12 | 3
13 |
14 | 1
15 |
16 | car
17 | Right
18 | 1
19 | 0
20 |
21 | 108
22 | 55
23 | 285
24 | 156
25 |
26 |
27 |
28 | car
29 | Right
30 | 1
31 | 0
32 |
33 | 235
34 | 84
35 | 418
36 | 153
37 |
38 |
39 |
40 | person
41 | Left
42 | 0
43 | 0
44 |
45 | 336
46 | 12
47 | 415
48 | 160
49 |
50 |
51 |
52 | person
53 | Unspecified
54 | 1
55 | 0
56 |
57 | 397
58 | 50
59 | 439
60 | 154
61 |
62 |
63 |
64 | bicycle
65 | Unspecified
66 | 1
67 | 0
68 |
69 | 1
70 | 112
71 | 305
72 | 357
73 |
74 |
75 |
76 |
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_005702.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2012
3 | 2007_005702.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 |
9 |
10 | 333
11 | 500
12 | 3
13 |
14 | 1
15 |
16 | bicycle
17 | Unspecified
18 | 0
19 | 0
20 |
21 | 38
22 | 227
23 | 292
24 | 500
25 |
26 |
27 |
28 | person
29 | Unspecified
30 | 0
31 | 0
32 |
33 | 76
34 | 63
35 | 327
36 | 465
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_006303.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2012
3 | 2007_006303.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 |
9 |
10 | 500
11 | 371
12 | 3
13 |
14 | 1
15 |
16 | cat
17 | Unspecified
18 | 0
19 | 0
20 |
21 | 181
22 | 136
23 | 302
24 | 320
25 |
26 |
27 |
28 | pottedplant
29 | Unspecified
30 | 1
31 | 0
32 |
33 | 354
34 | 3
35 | 500
36 | 371
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_006400.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2012
3 | 2007_006400.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 |
9 |
10 | 500
11 | 335
12 | 3
13 |
14 | 1
15 |
16 | train
17 | Frontal
18 | 1
19 | 0
20 |
21 | 433
22 | 80
23 | 500
24 | 256
25 |
26 |
27 |
28 | train
29 | Unspecified
30 | 1
31 | 0
32 |
33 | 189
34 | 101
35 | 440
36 | 235
37 |
38 |
39 |
40 | train
41 | Unspecified
42 | 1
43 | 0
44 |
45 | 40
46 | 125
47 | 199
48 | 203
49 |
50 |
51 |
52 |
--------------------------------------------------------------------------------
/data/test_data/VOC2012/Annotations/2007_006673.xml:
--------------------------------------------------------------------------------
1 |
2 | VOC2012
3 | 2007_006673.jpg
4 |
5 | The VOC2007 Database
6 | PASCAL VOC2007
7 | flickr
8 |
9 |
10 | 500
11 | 333
12 | 3
13 |
14 | 1
15 |
16 | boat
17 | Right
18 | 0
19 | 0
20 |
21 | 394
22 | 42
23 | 445
24 | 234
25 |
26 |
27 |
28 | boat
29 | Right
30 | 0
31 | 0
32 |
33 | 353
34 | 81
35 | 390
36 | 207
37 |
38 |
39 |
40 | boat
41 | Right
42 | 0
43 | 0
44 |
45 | 309
46 | 52
47 | 372
48 | 246
49 |
50 |
51 |
52 | boat
53 | Right
54 | 0
55 | 0
56 |
57 | 294
58 | 110
59 | 324
60 | 196
61 |
62 |
63 |
64 | boat
65 | Right
66 | 0
67 | 0
68 |
69 | 242
70 | 70
71 | 275
72 | 212
73 |
74 |
75 |
76 | boat
77 | Right
78 | 0
79 | 0
80 |
81 | 230
82 | 117
83 | 245
84 | 189
85 |
86 |
87 |
88 | boat
89 | Right
90 | 0
91 | 0
92 |
93 | 214
94 | 112
95 | 239
96 | 201
97 |
98 |
99 |
100 | boat
101 | Right
102 | 0
103 | 0
104 |
105 | 187
106 | 114
107 | 210
108 | 195
109 |
110 |
111 |
112 | boat
113 | Right
114 | 0
115 | 0
116 |
117 | 171
118 | 107
119 | 206
120 | 223
121 |
122 |
123 |
124 | boat
125 | Right
126 | 0
127 | 0
128 |
129 | 137
130 | 105
131 | 171
132 | 213
133 |
134 |
135 |
136 | boat
137 | Unspecified
138 | 0
139 | 1
140 |
141 | 43
142 | 224
143 | 71
144 | 234
145 |
146 |
147 |
148 | boat
149 | Unspecified
150 | 0
151 | 1
152 |
153 | 4
154 | 231
155 | 29
156 | 244
157 |
158 |
159 |
160 |
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_000039.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_000039.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_000063.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_000063.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_000648.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_000648.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_001420.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_001420.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_001709.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_001709.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_001901.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_001901.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_002216.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_002216.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_002668.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_002668.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_002669.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_002669.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_002845.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_002845.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_003207.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_003207.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_003565.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_003565.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_003778.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_003778.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_003876.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_003876.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_004166.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_004166.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_005273.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_005273.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_005702.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_005702.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_006303.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_006303.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_006400.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_006400.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/JPEGImages/2007_006673.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/JPEGImages/2007_006673.jpg
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_000039.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_000039.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_000063.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_000063.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_000648.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_000648.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_001420.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_001420.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_001709.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_001709.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_001901.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_001901.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_002216.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_002216.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_002668.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_002668.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_002669.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_002669.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_002845.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_002845.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_003207.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_003207.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_003565.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_003565.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_003778.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_003778.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_003876.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_003876.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_004166.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_004166.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_005273.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_005273.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_005702.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_005702.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_006303.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_006303.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_006400.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_006400.png
--------------------------------------------------------------------------------
/data/test_data/VOC2012/SegmentationObject/2007_006673.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/VOC2012/SegmentationObject/2007_006673.png
--------------------------------------------------------------------------------
/data/test_data/background/bg_template/forest/A real photo of forest/589.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/forest/A real photo of forest/589.png
--------------------------------------------------------------------------------
/data/test_data/background/bg_template/forest/A real photo of forest/590.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/forest/A real photo of forest/590.png
--------------------------------------------------------------------------------
/data/test_data/background/bg_template/forest/A real photo of forest/591.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/forest/A real photo of forest/591.png
--------------------------------------------------------------------------------
/data/test_data/background/bg_template/forest/A real photo of forest/593.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/forest/A real photo of forest/593.png
--------------------------------------------------------------------------------
/data/test_data/background/bg_template/forest/A real photo of forest/598.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/forest/A real photo of forest/598.png
--------------------------------------------------------------------------------
/data/test_data/background/bg_template/railway without train/A real photo of railway without train/367.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/railway without train/A real photo of railway without train/367.png
--------------------------------------------------------------------------------
/data/test_data/background/bg_template/railway without train/A real photo of railway without train/373.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/railway without train/A real photo of railway without train/373.png
--------------------------------------------------------------------------------
/data/test_data/background/bg_template/railway without train/A real photo of railway without train/429.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/railway without train/A real photo of railway without train/429.png
--------------------------------------------------------------------------------
/data/test_data/background/bg_template/railway without train/A real photo of railway without train/475.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/bg_template/railway without train/A real photo of railway without train/475.png
--------------------------------------------------------------------------------
/data/test_data/background/context_augment/2007_000504.jpg/a colored photo of an empty pile of trash on the ground/49.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_000504.jpg/a colored photo of an empty pile of trash on the ground/49.png
--------------------------------------------------------------------------------
/data/test_data/background/context_augment/2007_001609.jpg/a colored photo of an empty living room/69.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_001609.jpg/a colored photo of an empty living room/69.png
--------------------------------------------------------------------------------
/data/test_data/background/context_augment/2007_001764.jpg/a real image of an empty grass covered field/71.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_001764.jpg/a real image of an empty grass covered field/71.png
--------------------------------------------------------------------------------
/data/test_data/background/context_augment/2007_002227.jpg/a real image of an empty wall in a living room/45.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_002227.jpg/a real image of an empty wall in a living room/45.png
--------------------------------------------------------------------------------
/data/test_data/background/context_augment/2007_002281.jpg/a colored photo of an empty street next to a forest/79.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_002281.jpg/a colored photo of an empty street next to a forest/79.png
--------------------------------------------------------------------------------
/data/test_data/background/context_augment/2007_002967.jpg/a real image of an empty doorstep/58.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_002967.jpg/a real image of an empty doorstep/58.png
--------------------------------------------------------------------------------
/data/test_data/background/context_augment/2007_003451.jpg/a real image of an empty living room filled with furniture and a large window/40.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_003451.jpg/a real image of an empty living room filled with furniture and a large window/40.png
--------------------------------------------------------------------------------
/data/test_data/background/context_augment/2007_005124.jpg/a real image of an empty grass field/58.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_005124.jpg/a real image of an empty grass field/58.png
--------------------------------------------------------------------------------
/data/test_data/background/context_augment/2007_006136.jpg/a real image of an empty grass near a forest/33.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_006136.jpg/a real image of an empty grass near a forest/33.png
--------------------------------------------------------------------------------
/data/test_data/background/context_augment/2007_007585.jpg/a real image of an empty grass field/67.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/background/context_augment/2007_007585.jpg/a real image of an empty grass field/67.png
--------------------------------------------------------------------------------
/data/test_data/background/context_augment/clip_postprocessed.json:
--------------------------------------------------------------------------------
1 | {
2 | "2007_003451": {
3 | "a real image of an empty living room filled with furniture and a large window": [
4 | "40.png"
5 | ]
6 | },
7 | "2007_002227": {
8 | "a real image of an empty wall in a living room": [
9 | "45.png"
10 | ]
11 | },
12 | "2007_006136": {
13 | "a real image of an empty grass near a forest": [
14 | "33.png"
15 | ]
16 | },
17 | "2007_002281": {
18 | "a colored photo of an empty street next to a forest": [
19 | "79.png"
20 | ]
21 | },
22 | "2007_000504": {
23 | "a colored photo of an empty pile of trash on the ground": [
24 | "49.png"
25 | ]
26 | },
27 | "2007_007585": {
28 | "a real image of an empty grass field": [
29 | "67.png"
30 | ]
31 | },
32 | "2007_001764": {
33 | "a real image of an empty grass covered field": [
34 | "71.png"
35 | ]
36 | },
37 | "2007_002967": {
38 | "a real image of an empty doorstep": [
39 | "58.png"
40 | ]
41 | },
42 | "2007_001609": {
43 | "a colored photo of an empty living room": [
44 | "69.png"
45 | ]
46 | },
47 | "2007_005124": {
48 | "a real image of an empty grass field": [
49 | "58.png"
50 | ]
51 | }
52 | }
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/aeroplane_mask/The picture of an airplane on a pure background422.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/aeroplane_mask/The picture of an airplane on a pure background422.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/aeroplane_mask/an airplane in a pure background449.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/aeroplane_mask/an airplane in a pure background449.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/aeroplane_mask/an airplane106.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/aeroplane_mask/an airplane106.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bicycle_mask/a bicycle in a pure background122.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bicycle_mask/a bicycle in a pure background122.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bicycle_mask/a bicycle in a pure background128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bicycle_mask/a bicycle in a pure background128.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bicycle_mask/a bicycle in a pure background225.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bicycle_mask/a bicycle in a pure background225.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bird_mask/a bird in a pure background162.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bird_mask/a bird in a pure background162.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bird_mask/a bird in a pure background293.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bird_mask/a bird in a pure background293.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bird_mask/a bird in a pure background374.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bird_mask/a bird in a pure background374.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/boat_mask/The picture of a boat on a pure background39.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/boat_mask/The picture of a boat on a pure background39.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/boat_mask/a boat in a pure background371.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/boat_mask/a boat in a pure background371.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/boat_mask/a boat79.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/boat_mask/a boat79.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bottle_mask/The picture of a bottle on a pure background407.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bottle_mask/The picture of a bottle on a pure background407.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bottle_mask/a bottle in a pure background108.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bottle_mask/a bottle in a pure background108.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bottle_mask/a bottle89.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bottle_mask/a bottle89.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bus_mask/The picture of a bus on a pure background460.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bus_mask/The picture of a bus on a pure background460.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bus_mask/a bus105.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bus_mask/a bus105.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/bus_mask/a bus410.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/bus_mask/a bus410.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/car_mask/The picture of a car on a pure background100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/car_mask/The picture of a car on a pure background100.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/car_mask/The picture of a car on a pure background286.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/car_mask/The picture of a car on a pure background286.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/car_mask/a car97.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/car_mask/a car97.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/cat_mask/a cat245.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/cat_mask/a cat245.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/cat_mask/a cat58.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/cat_mask/a cat58.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/cat_mask/a cat62.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/cat_mask/a cat62.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/chair_mask/The picture of a chair on a pure background423.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/chair_mask/The picture of a chair on a pure background423.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/chair_mask/a chair in a pure background301.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/chair_mask/a chair in a pure background301.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/chair_mask/a chair402.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/chair_mask/a chair402.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/cow_mask/a cow in a pure background9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/cow_mask/a cow in a pure background9.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/cow_mask/a cow223.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/cow_mask/a cow223.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/cow_mask/a cow56.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/cow_mask/a cow56.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/diningtable_mask/a dining table in a pure background63.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/diningtable_mask/a dining table in a pure background63.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/diningtable_mask/a table123.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/diningtable_mask/a table123.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/diningtable_mask/a table300.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/diningtable_mask/a table300.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/dog_mask/The picture of a dog on a pure background236.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/dog_mask/The picture of a dog on a pure background236.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/dog_mask/a dog in a pure background487.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/dog_mask/a dog in a pure background487.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/dog_mask/a dog121.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/dog_mask/a dog121.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/horse_mask/The picture of a horse on a pure background469.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/horse_mask/The picture of a horse on a pure background469.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/horse_mask/a horse in a pure background293.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/horse_mask/a horse in a pure background293.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/horse_mask/a horse298.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/horse_mask/a horse298.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/motorbike_mask/a motorbike in a pure background367.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/motorbike_mask/a motorbike in a pure background367.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/motorbike_mask/a motorbike in a pure background421.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/motorbike_mask/a motorbike in a pure background421.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/motorbike_mask/a motorbike315.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/motorbike_mask/a motorbike315.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/person_mask/a man146.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/person_mask/a man146.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/person_mask/a man253.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/person_mask/a man253.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/person_mask/a man345.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/person_mask/a man345.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/pottedplant_mask/a potted plant11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/pottedplant_mask/a potted plant11.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/pottedplant_mask/a potted plant374.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/pottedplant_mask/a potted plant374.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/pottedplant_mask/a potted plant434.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/pottedplant_mask/a potted plant434.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/sheep_mask/The picture of a sheep on a pure background212.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/sheep_mask/The picture of a sheep on a pure background212.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/sheep_mask/a sheep in a pure background219.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/sheep_mask/a sheep in a pure background219.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/sheep_mask/a sheep351.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/sheep_mask/a sheep351.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/sofa_mask/a sofa in a pure background122.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/sofa_mask/a sofa in a pure background122.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/sofa_mask/a sofa in a pure background353.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/sofa_mask/a sofa in a pure background353.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/sofa_mask/a sofa in a pure background395.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/sofa_mask/a sofa in a pure background395.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/train_mask/The picture of a train on a pure background303.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/train_mask/The picture of a train on a pure background303.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/train_mask/The picture of a train on a pure background32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/train_mask/The picture of a train on a pure background32.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/train_mask/a train in a pure background133.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/train_mask/a train in a pure background133.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/tvmonitor_mask/a tv monitor251.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/tvmonitor_mask/a tv monitor251.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/tvmonitor_mask/an old monitor107.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/tvmonitor_mask/an old monitor107.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_mask/tvmonitor_mask/an old monitor426.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_mask/tvmonitor_mask/an old monitor426.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bicycle/a bicycle in a pure background/122.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bicycle/a bicycle in a pure background/122.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bicycle/a bicycle in a pure background/128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bicycle/a bicycle in a pure background/128.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bicycle/a bicycle in a pure background/225.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bicycle/a bicycle in a pure background/225.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bird/a bird in a pure background/162.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bird/a bird in a pure background/162.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bird/a bird in a pure background/293.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bird/a bird in a pure background/293.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bird/a bird in a pure background/374.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bird/a bird in a pure background/374.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a boat/The picture of a boat on a pure background/39.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a boat/The picture of a boat on a pure background/39.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a boat/a boat in a pure background/371.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a boat/a boat in a pure background/371.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a boat/a boat/79.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a boat/a boat/79.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bottle/The picture of a bottle on a pure background/407.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bottle/The picture of a bottle on a pure background/407.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bottle/a bottle in a pure background/108.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bottle/a bottle in a pure background/108.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bottle/a bottle/89.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bottle/a bottle/89.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bus/The picture of a bus on a pure background/460.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bus/The picture of a bus on a pure background/460.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bus/a bus/105.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bus/a bus/105.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a bus/a bus/410.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a bus/a bus/410.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a car/The picture of a car on a pure background/100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a car/The picture of a car on a pure background/100.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a car/The picture of a car on a pure background/286.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a car/The picture of a car on a pure background/286.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a car/a car/97.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a car/a car/97.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a cat/a cat/245.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a cat/a cat/245.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a cat/a cat/58.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a cat/a cat/58.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a cat/a cat/62.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a cat/a cat/62.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a chair/The picture of a chair on a pure background/423.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a chair/The picture of a chair on a pure background/423.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a chair/a chair in a pure background/301.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a chair/a chair in a pure background/301.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a chair/a chair/402.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a chair/a chair/402.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a cow/a cow in a pure background/9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a cow/a cow in a pure background/9.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a cow/a cow/223.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a cow/a cow/223.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a cow/a cow/56.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a cow/a cow/56.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a dining table/a dining table in a pure background/63.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a dining table/a dining table in a pure background/63.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a dog/The picture of a dog on a pure background/236.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a dog/The picture of a dog on a pure background/236.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a dog/a dog in a pure background/487.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a dog/a dog in a pure background/487.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a dog/a dog/121.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a dog/a dog/121.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a horse/The picture of a horse on a pure background/469.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a horse/The picture of a horse on a pure background/469.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a horse/a horse in a pure background/293.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a horse/a horse in a pure background/293.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a horse/a horse/298.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a horse/a horse/298.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a man/a man/146.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a man/a man/146.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a man/a man/253.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a man/a man/253.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a man/a man/345.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a man/a man/345.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a motorbike/a motorbike in a pure background/367.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a motorbike/a motorbike in a pure background/367.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a motorbike/a motorbike in a pure background/421.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a motorbike/a motorbike in a pure background/421.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a motorbike/a motorbike/315.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a motorbike/a motorbike/315.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a potted plant/a potted plant/11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a potted plant/a potted plant/11.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a potted plant/a potted plant/374.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a potted plant/a potted plant/374.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a potted plant/a potted plant/434.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a potted plant/a potted plant/434.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a sheep/The picture of a sheep on a pure background/212.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a sheep/The picture of a sheep on a pure background/212.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a sheep/a sheep in a pure background/219.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a sheep/a sheep in a pure background/219.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a sheep/a sheep/351.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a sheep/a sheep/351.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a sofa/a sofa in a pure background/122.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a sofa/a sofa in a pure background/122.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a sofa/a sofa in a pure background/353.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a sofa/a sofa in a pure background/353.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a sofa/a sofa in a pure background/395.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a sofa/a sofa in a pure background/395.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a table/a table/123.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a table/a table/123.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a table/a table/300.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a table/a table/300.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a train/The picture of a train on a pure background/303.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a train/The picture of a train on a pure background/303.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a train/The picture of a train on a pure background/32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a train/The picture of a train on a pure background/32.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a train/a train in a pure background/133.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a train/a train in a pure background/133.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/a tv monitor/a tv monitor/251.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/a tv monitor/a tv monitor/251.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/an airplane/The picture of an airplane on a pure background/422.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/an airplane/The picture of an airplane on a pure background/422.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/an airplane/an airplane in a pure background/449.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/an airplane/an airplane in a pure background/449.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/an airplane/an airplane/106.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/an airplane/an airplane/106.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/an old monitor/an old monitor/107.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/an old monitor/an old monitor/107.png
--------------------------------------------------------------------------------
/data/test_data/foreground/foreground_rgb/an old monitor/an old monitor/426.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyhandy/Text2Image-for-Detection/233cc3668a19efc4e1464756b706721c04c2e2f3/data/test_data/foreground/foreground_rgb/an old monitor/an old monitor/426.png
--------------------------------------------------------------------------------
/data/test_data/labels.txt:
--------------------------------------------------------------------------------
1 | 0 __background__
2 | 1 aeroplane
3 | 2 bicycle
4 | 3 bird
5 | 4 boat
6 | 5 bottle
7 | 6 bus
8 | 7 car
9 | 8 cat
10 | 9 chair
11 | 10 cow
12 | 11 diningtable
13 | 12 dog
14 | 13 horse
15 | 14 motorbike
16 | 15 person
17 | 16 pottedplant
18 | 17 sheep
19 | 18 sofa
20 | 19 train
21 | 20 tvmonitor
--------------------------------------------------------------------------------
/data/test_data/train_cls-1shot.txt:
--------------------------------------------------------------------------------
1 | 2007_000039 19
2 | 2007_000063 8 11
3 | 2007_000648 0 5 6 14
4 | 2007_001420 12 14 15
5 | 2007_001709 13 14
6 | 2007_001901 8 10 17
7 | 2007_002216 5
8 | 2007_002668 10 14 15
9 | 2007_002669 9 14
10 | 2007_002845 16
11 | 2007_003207 4
12 | 2007_003565 2
13 | 2007_003778 7 15
14 | 2007_003876 0
15 | 2007_004166 8 10 11 17 19
16 | 2007_005273 1 6 14
17 | 2007_005702 1 14
18 | 2007_006303 7 15
19 | 2007_006400 18
20 | 2007_006673 3
21 |
--------------------------------------------------------------------------------
/data/voc2012/1k_10_shot.json:
--------------------------------------------------------------------------------
1 | {
2 | "0": [
3 | "2007_000480.jpg",
4 | "2007_002198.jpg",
5 | "2007_004009.jpg",
6 | "2007_000648.jpg",
7 | "2007_000738.jpg",
8 | "2007_003000.jpg",
9 | "2007_000032.jpg",
10 | "2007_002107.jpg",
11 | "2007_003876.jpg",
12 | "2007_002099.jpg",
13 | "2007_000256.jpg",
14 | "2007_000243.jpg"
15 | ],
16 | "1": [
17 | "2007_004769.jpg",
18 | "2007_002227.jpg",
19 | "2007_000584.jpg",
20 | "2007_006317.jpg",
21 | "2007_000793.jpg",
22 | "2007_005273.jpg",
23 | "2007_001027.jpg",
24 | "2007_005430.jpg",
25 | "2007_000515.jpg",
26 | "2007_005368.jpg",
27 | "2007_005064.jpg",
28 | "2007_005702.jpg"
29 | ],
30 | "2": [
31 | "2007_009607.jpg",
32 | "2007_003565.jpg",
33 | "2007_002212.jpg",
34 | "2007_009759.jpg",
35 | "2007_002403.jpg",
36 | "2007_003330.jpg",
37 | "2007_002896.jpg",
38 | "2007_003267.jpg",
39 | "2007_006490.jpg",
40 | "2007_003118.jpg",
41 | "2007_000645.jpg",
42 | "2007_000363.jpg",
43 | "2007_000068.jpg",
44 | "2007_002120.jpg"
45 | ],
46 | "3": [
47 | "2007_006281.jpg",
48 | "2007_000713.jpg",
49 | "2007_000241.jpg",
50 | "2007_006660.jpg",
51 | "2007_006673.jpg",
52 | "2007_003910.jpg",
53 | "2007_002234.jpg",
54 | "2007_001487.jpg",
55 | "2007_001698.jpg"
56 | ],
57 | "4": [
58 | "2007_006409.jpg",
59 | "2007_004476.jpg",
60 | "2007_003207.jpg",
61 | "2007_000170.jpg",
62 | "2007_006483.jpg",
63 | "2007_002545.jpg",
64 | "2007_003451.jpg",
65 | "2007_004291.jpg",
66 | "2007_001185.jpg",
67 | "2007_002953.jpg",
68 | "2007_003431.jpg",
69 | "2007_007250.jpg",
70 | "2007_001602.jpg",
71 | "2007_000250.jpg"
72 | ],
73 | "5": [
74 | "2007_004705.jpg",
75 | "2007_004065.jpg",
76 | "2007_001595.jpg",
77 | "2007_007003.jpg",
78 | "2007_000768.jpg",
79 | "2007_003715.jpg",
80 | "2007_002024.jpg",
81 | "2007_002216.jpg",
82 | "2007_005262.jpg"
83 | ],
84 | "6": [
85 | "2007_002281.jpg",
86 | "2007_004481.jpg",
87 | "2007_004810.jpg",
88 | "2007_005988.jpg",
89 | "2007_002370.jpg",
90 | "2011_001004.jpg",
91 | "2007_003815.jpg",
92 | "2007_002789.jpg",
93 | "2007_006151.jpg",
94 | "2007_004830.jpg",
95 | "2007_001857.jpg"
96 | ],
97 | "7": [
98 | "2007_005688.jpg",
99 | "2007_001825.jpg",
100 | "2007_002760.jpg",
101 | "2007_000549.jpg",
102 | "2007_003778.jpg",
103 | "2007_000528.jpg",
104 | "2007_003788.jpg",
105 | "2007_006303.jpg",
106 | "2011_000999.jpg",
107 | "2007_004998.jpg",
108 | "2007_000876.jpg",
109 | "2007_003525.jpg"
110 | ],
111 | "8": [
112 | "2007_006004.jpg",
113 | "2007_005212.jpg",
114 | "2007_003541.jpg",
115 | "2007_006477.jpg",
116 | "2007_004166.jpg",
117 | "2007_005647.jpg",
118 | "2007_003251.jpg",
119 | "2007_001609.jpg",
120 | "2007_006530.jpg",
121 | "2007_001901.jpg",
122 | "2007_001340.jpg",
123 | "2007_005266.jpg",
124 | "2007_000063.jpg",
125 | "2007_003205.jpg",
126 | "2007_003889.jpg",
127 | "2007_006066.jpg",
128 | "2007_002368.jpg",
129 | "2007_005086.jpg",
130 | "2007_001439.jpg"
131 | ],
132 | "9": [
133 | "2007_002669.jpg",
134 | "2007_000904.jpg",
135 | "2007_001764.jpg",
136 | "2007_000504.jpg",
137 | "2007_001917.jpg",
138 | "2007_004537.jpg",
139 | "2007_004081.jpg",
140 | "2007_004500.jpg",
141 | "2007_002088.jpg",
142 | "2007_001073.jpg",
143 | "2007_005797.jpg",
144 | "2007_005124.jpg"
145 | ],
146 | "10": [
147 | "2007_002914.jpg",
148 | "2007_005790.jpg",
149 | "2007_003529.jpg",
150 | "2007_006699.jpg",
151 | "2007_002668.jpg",
152 | "2007_003668.jpg",
153 | "2007_001834.jpg"
154 | ],
155 | "11": [
156 | "2007_002611.jpg",
157 | "2007_000720.jpg",
158 | "2007_003604.jpg",
159 | "2007_001397.jpg",
160 | "2007_002055.jpg",
161 | "2007_001225.jpg",
162 | "2007_009605.jpg",
163 | "2007_009327.jpg",
164 | "2007_007585.jpg",
165 | "2007_007930.jpg"
166 | ],
167 | "12": [
168 | "2007_001724.jpg",
169 | "2007_002273.jpg",
170 | "2007_006445.jpg",
171 | "2007_000392.jpg",
172 | "2007_003189.jpg",
173 | "2007_005248.jpg",
174 | "2007_000836.jpg",
175 | "2007_001960.jpg",
176 | "2007_001420.jpg",
177 | "2007_006134.jpg"
178 | ],
179 | "13": [
180 | "2007_002488.jpg",
181 | "2007_005989.jpg",
182 | "2007_000822.jpg",
183 | "2007_005951.jpg",
184 | "2007_004003.jpg",
185 | "2007_005314.jpg",
186 | "2007_000733.jpg",
187 | "2007_001709.jpg",
188 | "2007_000364.jpg",
189 | "2007_005878.jpg",
190 | "2007_002105.jpg"
191 | ],
192 | "14": [
193 | "2007_002895.jpg",
194 | "2007_002639.jpg",
195 | "2007_002361.jpg",
196 | "2007_002954.jpg",
197 | "2007_004289.jpg",
198 | "2007_004707.jpg",
199 | "2007_002293.jpg",
200 | "2007_002142.jpg"
201 | ],
202 | "15": [
203 | "2007_004948.jpg",
204 | "2007_002967.jpg",
205 | "2007_001149.jpg"
206 | ],
207 | "16": [
208 | "2007_001872.jpg",
209 | "2007_003190.jpg",
210 | "2007_001416.jpg",
211 | "2007_006136.jpg",
212 | "2007_002845.jpg",
213 | "2007_004423.jpg",
214 | "2007_003593.jpg",
215 | "2007_004768.jpg",
216 | "2007_006832.jpg",
217 | "2007_006899.jpg"
218 | ],
219 | "17": [
220 | "2007_008203.jpg"
221 | ],
222 | "18": [
223 | "2007_004627.jpg",
224 | "2007_004663.jpg",
225 | "2007_003286.jpg",
226 | "2007_006254.jpg",
227 | "2007_006400.jpg",
228 | "2007_005360.jpg",
229 | "2007_002462.jpg",
230 | "2007_003178.jpg",
231 | "2007_004951.jpg",
232 | "2007_000333.jpg"
233 | ],
234 | "19": [
235 | "2007_000121.jpg",
236 | "2007_006704.jpg",
237 | "2007_005210.jpg",
238 | "2007_000039.jpg",
239 | "2007_005902.jpg",
240 | "2007_001704.jpg"
241 | ]
242 | }
--------------------------------------------------------------------------------
/data/voc2012/1k_1_shot.json:
--------------------------------------------------------------------------------
1 | {
2 | "0": [
3 | "2007_003876.jpg"
4 | ],
5 | "1": [
6 | "2007_005702.jpg"
7 | ],
8 | "2": [
9 | "2007_003565.jpg"
10 | ],
11 | "3": [
12 | "2007_006673.jpg"
13 | ],
14 | "4": [
15 | "2007_003207.jpg"
16 | ],
17 | "5": [
18 | "2007_002216.jpg"
19 | ],
20 | "6": [
21 | "2007_005273.jpg"
22 | ],
23 | "7": [
24 | "2007_003778.jpg"
25 | ],
26 | "8": [
27 | "2007_000063.jpg"
28 | ],
29 | "9": [
30 | "2007_002669.jpg"
31 | ],
32 | "10": [
33 | "2007_002668.jpg"
34 | ],
35 | "11": [
36 | "2007_004166.jpg"
37 | ],
38 | "12": [
39 | "2007_001420.jpg"
40 | ],
41 | "13": [
42 | "2007_001709.jpg"
43 | ],
44 | "14": [
45 | "2007_000648.jpg"
46 | ],
47 | "15": [
48 | "2007_006303.jpg"
49 | ],
50 | "16": [
51 | "2007_002845.jpg"
52 | ],
53 | "17": [
54 | "2007_001901.jpg"
55 | ],
56 | "18": [
57 | "2007_006400.jpg"
58 | ],
59 | "19": [
60 | "2007_000039.jpg"
61 | ]
62 | }
63 |
--------------------------------------------------------------------------------
/data/voc2012/label2id.json:
--------------------------------------------------------------------------------
1 | {
2 | "aeroplane": 1,
3 | "bicycle": 2,
4 | "bird": 3,
5 | "boat": 4,
6 | "bottle": 5,
7 | "bus": 6,
8 | "car": 7,
9 | "cat": 8,
10 | "chair": 9,
11 | "cow": 10,
12 | "diningtable": 11,
13 | "dog": 12,
14 | "horse": 13,
15 | "motorbike": 14,
16 | "person": 15,
17 | "pottedplant": 16,
18 | "sheep": 17,
19 | "sofa": 18,
20 | "train": 19,
21 | "tvmonitor": 20
22 | }
--------------------------------------------------------------------------------
/data/voc2012/labels.txt:
--------------------------------------------------------------------------------
1 | 0 __background__
2 | 1 aeroplane
3 | 2 bicycle
4 | 3 bird
5 | 4 boat
6 | 5 bottle
7 | 6 bus
8 | 7 car
9 | 8 cat
10 | 9 chair
11 | 10 cow
12 | 11 diningtable
13 | 12 dog
14 | 13 horse
15 | 14 motorbike
16 | 15 person
17 | 16 pottedplant
18 | 17 sheep
19 | 18 sofa
20 | 19 train
21 | 20 tvmonitor
--------------------------------------------------------------------------------
/data/voc2012/train_cls-10shot.txt:
--------------------------------------------------------------------------------
1 | 2007_000032 0 14
2 | 2007_000039 19
3 | 2007_000063 8 11
4 | 2007_000068 2
5 | 2007_000121 19
6 | 2007_000170 4 14
7 | 2007_000241 3
8 | 2007_000243 0
9 | 2007_000250 4 10
10 | 2007_000256 0
11 | 2007_000333 18
12 | 2007_000363 2
13 | 2007_000364 13 14
14 | 2007_000392 12 14
15 | 2007_000480 0 14
16 | 2007_000504 9 14
17 | 2007_000515 1 6 14
18 | 2007_000528 7
19 | 2007_000549 7
20 | 2007_000584 1 17
21 | 2007_000645 2
22 | 2007_000648 0 5 6 14
23 | 2007_000713 3
24 | 2007_000720 11
25 | 2007_000733 13 14
26 | 2007_000738 0
27 | 2007_000768 5
28 | 2007_000793 1 5 14
29 | 2007_000822 13
30 | 2007_000836 12 14
31 | 2007_000876 7
32 | 2007_000904 9 12 14
33 | 2007_001027 1 8 17 19
34 | 2007_001073 9
35 | 2007_001149 15 17 19
36 | 2007_001185 4 7 10 14
37 | 2007_001225 11
38 | 2007_001340 8 11 14
39 | 2007_001397 11
40 | 2007_001416 16
41 | 2007_001420 12 14 15
42 | 2007_001439 8 10
43 | 2007_001487 3
44 | 2007_001595 5
45 | 2007_001602 4
46 | 2007_001609 8 10
47 | 2007_001698 3
48 | 2007_001704 19
49 | 2007_001709 13 14
50 | 2007_001724 12
51 | 2007_001764 9
52 | 2007_001825 7 11
53 | 2007_001834 10
54 | 2007_001857 6 14
55 | 2007_001872 16
56 | 2007_001901 8 10 17
57 | 2007_001917 9
58 | 2007_001960 12
59 | 2007_002024 5 14
60 | 2007_002055 11 14 17
61 | 2007_002088 9
62 | 2007_002099 0
63 | 2007_002105 13 14
64 | 2007_002107 0
65 | 2007_002120 2 14
66 | 2007_002142 14
67 | 2007_002198 0
68 | 2007_002212 2
69 | 2007_002216 5
70 | 2007_002227 1 19
71 | 2007_002234 3
72 | 2007_002273 12 14
73 | 2007_002281 6 14
74 | 2007_002293 14
75 | 2007_002361 14 15
76 | 2007_002368 8 10 17
77 | 2007_002370 6 14
78 | 2007_002403 2 3 14
79 | 2007_002462 18
80 | 2007_002488 13 14
81 | 2007_002545 4 14 17
82 | 2007_002611 11 14
83 | 2007_002639 14
84 | 2007_002668 10 14 15
85 | 2007_002669 9 14
86 | 2007_002760 7
87 | 2007_002789 6 9 14
88 | 2007_002845 16
89 | 2007_002895 14
90 | 2007_002896 2
91 | 2007_002914 10 14
92 | 2007_002953 4 19
93 | 2007_002954 14
94 | 2007_002967 15
95 | 2007_003000 0
96 | 2007_003118 2 14
97 | 2007_003178 18
98 | 2007_003189 12 14 15
99 | 2007_003190 16
100 | 2007_003205 8 14
101 | 2007_003207 4
102 | 2007_003251 8 10
103 | 2007_003267 2
104 | 2007_003286 18
105 | 2007_003330 2
106 | 2007_003431 4 14
107 | 2007_003451 4 8 17 19
108 | 2007_003525 7
109 | 2007_003529 10 14
110 | 2007_003541 8 14
111 | 2007_003565 2
112 | 2007_003593 16
113 | 2007_003604 11 19
114 | 2007_003668 10
115 | 2007_003715 5
116 | 2007_003778 7 15
117 | 2007_003788 7 8
118 | 2007_003815 6 14
119 | 2007_003876 0
120 | 2007_003889 8 12 14
121 | 2007_003910 3 14
122 | 2007_004003 13
123 | 2007_004009 0
124 | 2007_004065 5 14
125 | 2007_004081 9 15
126 | 2007_004166 8 10 11 17 19
127 | 2007_004289 14 19
128 | 2007_004291 4 14
129 | 2007_004423 16
130 | 2007_004476 4 14
131 | 2007_004481 6 14
132 | 2007_004500 9
133 | 2007_004537 9 12 14
134 | 2007_004627 18
135 | 2007_004663 18
136 | 2007_004705 5 6
137 | 2007_004707 14 15
138 | 2007_004768 16
139 | 2007_004769 1 14
140 | 2007_004810 6 14
141 | 2007_004830 6 14
142 | 2007_004948 15
143 | 2007_004951 18
144 | 2007_004998 7
145 | 2007_005064 1 14
146 | 2007_005086 8 10 14
147 | 2007_005124 9 14
148 | 2007_005210 19
149 | 2007_005212 8 13 17
150 | 2007_005248 12 14
151 | 2007_005262 5 6
152 | 2007_005266 8 15 17
153 | 2007_005273 1 6 14
154 | 2007_005314 13
155 | 2007_005360 18
156 | 2007_005368 1 14
157 | 2007_005430 1 4 14
158 | 2007_005647 8 10 17
159 | 2007_005688 7
160 | 2007_005702 1 14
161 | 2007_005790 10 14
162 | 2007_005797 9 14
163 | 2007_005878 13
164 | 2007_005902 19
165 | 2007_005951 13 14
166 | 2007_005988 6 11 14
167 | 2007_005989 13 14
168 | 2007_006004 8 14
169 | 2007_006066 8 17 19
170 | 2007_006134 12
171 | 2007_006136 16
172 | 2007_006151 6 12 14
173 | 2007_006254 18
174 | 2007_006281 3
175 | 2007_006303 7 15
176 | 2007_006317 1 5 14
177 | 2007_006400 18
178 | 2007_006409 4 10 14
179 | 2007_006445 12 14
180 | 2007_006477 8 14
181 | 2007_006483 4 14
182 | 2007_006490 2 3 14
183 | 2007_006530 8 17
184 | 2007_006660 3 6
185 | 2007_006673 3
186 | 2007_006699 10 14
187 | 2007_006704 19
188 | 2007_006832 16
189 | 2007_006899 16
190 | 2007_007003 5 6
191 | 2007_007250 4 19
192 | 2007_007585 11
193 | 2007_007930 11
194 | 2007_008203 17
195 | 2007_009327 11 14
196 | 2007_009605 11
197 | 2007_009607 2
198 | 2007_009759 2
199 | 2011_000999 7
200 | 2011_001004 6
201 |
--------------------------------------------------------------------------------
/data/voc2012/train_cls-1shot.txt:
--------------------------------------------------------------------------------
1 | 2007_000039 19
2 | 2007_000063 8 11
3 | 2007_000648 0 5 6 14
4 | 2007_001420 12 14 15
5 | 2007_001709 13 14
6 | 2007_001901 8 10 17
7 | 2007_002216 5
8 | 2007_002668 10 14 15
9 | 2007_002669 9 14
10 | 2007_002845 16
11 | 2007_003207 4
12 | 2007_003565 2
13 | 2007_003778 7 15
14 | 2007_003876 0
15 | 2007_004166 8 10 11 17 19
16 | 2007_005273 1 6 14
17 | 2007_005702 1 14
18 | 2007_006303 7 15
19 | 2007_006400 18
20 | 2007_006673 3
21 |
--------------------------------------------------------------------------------
/detection/train.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | import sys, uuid
4 | import tempfile
5 | from detectron2 import model_zoo
6 | from detectron2.config import get_cfg
7 | from detectron2.engine import launch
8 | from utils import setup_cfg, infer, Trainer
9 | from pathlib import Path
10 | import ujson as json
11 |
12 | def parse_args():
13 | parser = argparse.ArgumentParser()
14 | parser.add_argument("--train_dataset", "-s", type=str, required=True, choices=["voc_train", "syn", "coco_train"])
15 | parser.add_argument("--train_dir", type=str, required=False, help="if unspecified, use default path")
16 | parser.add_argument("--train_coconame", type=str, required=False, help="if unspecified, use default name")
17 | parser.add_argument("--syn_dir", type=str, required=False, default="NOT_USED",
18 | help="synthic training data folder, contains `images` for images and `COCO.json` for COCO format annotation and `label2id.json` for labels")
19 | parser.add_argument("--additional_dataset", nargs="+", help="when use multiple dataset other than -s, put more heavy dataset in here")
20 |
21 | parser.add_argument("--test_dataset", "-t", type=str, choices=["voc_val", "coco_val"])
22 | parser.add_argument("--test_dir", type=str, required=False, help="if unspecified, use default path")
23 |
24 | parser.add_argument("--lr", type=float, default=0.001)
25 | parser.add_argument("--wd", type=float, default=0.0005)
26 | parser.add_argument("--bsz", type=int, default=4)
27 | parser.add_argument("--freeze", default=False, action="store_true")
28 | parser.add_argument("--data_aug", default=False, action="store_true", help="data augmentation on synthetic data, RandomContrast etc not including crop, use crop only when --crop")
29 | parser.add_argument("--crop", default=False, action="store_true")
30 | parser.add_argument("--epoch", type=int, default=20)
31 |
32 | parser.add_argument("--seed", default=42, type=int)
33 | parser.add_argument("--debug", default=False, action="store_true", help="if true, don't log in wandb")
34 |
35 | parser.add_argument("--resnet", choices=[50, 101], default=50, type=int, help="whether use R101 or R50")
36 |
37 | parser.add_argument("--preview", default=False, action="store_true")
38 |
39 | parser.add_argument("--init_checkpoint", default=None, type=str)
40 |
41 | parser.add_argument("--eval_checkpoint", default=None, type=str)
42 | parser.add_argument("--eval_threshold", default=0.7, type=float)
43 |
44 | parser.add_argument("--num_gpus_per_machine", "-g", type=int, default=1, help="number of gpus *per machine*")
45 | parser.add_argument("--num_machines", type=int, default=1, help="total number of machines")
46 |
47 | args = parser.parse_args()
48 | return args
49 |
50 | def filter(json_file, td):
51 | """
52 | filter out instance seg annotation but only object detection one
53 | """
54 | with open(json_file) as f:
55 | data = json.load(f)
56 | newanno = []
57 | for anno in data["annotations"]:
58 | if len(anno["segmentation"]) == 0:
59 | # already no seg
60 | return json_file
61 | anno['segmentation'] = []
62 | newanno.append(anno)
63 | data["annotations"] = newanno
64 | id = str(uuid.uuid4())
65 | os.makedirs(Path(td) / id)
66 | json_file = Path(td) / id / "COCO.json"
67 | with open(json_file, "w") as f:
68 | json.dump(data, f)
69 | return json_file
70 |
71 | def fetch_cfg(args):
72 | cfg = get_cfg()
73 |
74 | cfg.merge_from_file(model_zoo.get_config_file(f"COCO-Detection/faster_rcnn_R_{args.resnet}_FPN_3x.yaml"))
75 | if args.init_checkpoint is not None:
76 | print("loading from ckpt:", args.init_checkpoint)
77 | if "PT_DATA_DIR" in os.environ:
78 | args.init_checkpoint = os.path.join(os.environ["PT_DATA_DIR"], args.init_checkpoint)
79 | cfg.MODEL.WEIGHTS = args.init_checkpoint
80 | else:
81 | cfg.MODEL.WEIGHTS = f"detectron2://ImageNetPretrained/MSRA/R-{args.resnet}.pkl"
82 | return cfg
83 |
84 | def main(args):
85 | cfg = fetch_cfg(args)
86 | cfg = setup_cfg(args, cfg, filter=filter)
87 | if args.eval_checkpoint is not None:
88 | infer(cfg)
89 | sys.exit(0)
90 |
91 | Trainer.data_aug = args.data_aug
92 | Trainer.debug = args.debug
93 | Trainer.project_name = "dalle-for-detection"
94 | trainer = Trainer(cfg)
95 | trainer.resume_or_load(resume=False)
96 | trainer.train()
97 |
98 | if __name__ == "__main__":
99 | args = parse_args()
100 |
101 | with tempfile.TemporaryDirectory() as td:
102 | args.td = td
103 | launch(
104 | main, num_gpus_per_machine=args.num_gpus_per_machine,
105 | num_machines=args.num_machines, machine_rank=0, dist_url="auto", args=(args, )
106 | )
--------------------------------------------------------------------------------
/detection/wandb_writer.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Union
2 |
3 | from detectron2.config import CfgNode
4 | from detectron2.utils.events import EventWriter, get_event_storage
5 |
6 |
7 | class WandbWriter(EventWriter):
8 | """
9 | Write all scalars to a wandb tool.
10 | """
11 |
12 | def __init__(
13 | self,
14 | project: str = "detectron2",
15 | config: Union[Dict, CfgNode] = {},
16 | window_size: int = 20,
17 | **kwargs,
18 | ):
19 | """
20 | Args:
21 | project (str): W&B Project name
22 | config Union[Dict, CfgNode]: the project level configuration object
23 | window_size (int): the scalars will be median-smoothed by this window size
24 | kwargs: other arguments passed to `wandb.init(...)`
25 | """
26 | import wandb
27 | wandb.login(key='YOUR API KEY')
28 |
29 | self._window_size = window_size
30 | self._run = (
31 | wandb.init(project=project, config=config, **kwargs) if not wandb.run else wandb.run
32 | )
33 | self._run._label(repo="detectron2")
34 |
35 | def write(self):
36 | storage = get_event_storage()
37 |
38 | log_dict = {}
39 | for k, (v, _) in storage.latest_with_smoothing_hint(self._window_size).items():
40 | log_dict[k] = v
41 |
42 | self._run.log(log_dict)
43 |
44 | def close(self):
45 | self._run.finish()
--------------------------------------------------------------------------------
/instance_seg/run.sh:
--------------------------------------------------------------------------------
1 | syn=$1
2 | resnets=$2
3 | lrs=$3
4 | if [[ -d $syn ]]; then
5 | echo "$syn exists"
6 | else
7 | exit -1;
8 | fi;
9 |
10 | for resnet in $(echo $resnets | tr "," "\n"); do
11 | for lr in $(echo $lrs | tr "," "\n"); do
12 | echo "$resnet with $lr"
13 | /lab/andy/anaconda3/envs/paste-segment/bin/python seg.py \
14 | -s syn -t voc_val \
15 | --blending gaussian \
16 | --lr $lr \
17 | --freeze --data_aug --crop \
18 | --epoch 20 \
19 | --resnet $resnet \
20 | --syn_dir $syn;
21 | done;
22 | done;
--------------------------------------------------------------------------------
/instance_seg/seg.py:
--------------------------------------------------------------------------------
1 | import os, cv2
2 | import sys
3 | sys.path.insert(1, os.path.join(sys.path[0], "../", 'detection'))
4 | import tempfile
5 |
6 | from detectron2 import model_zoo
7 | from detectron2.config import get_cfg
8 | from utils import setup_cfg, infer, Trainer
9 | from train import parse_args
10 |
11 | def fetch_cfg(args):
12 | cfg = get_cfg()
13 |
14 | cfg.merge_from_file(model_zoo.get_config_file(f"COCO-InstanceSegmentation/mask_rcnn_R_{args.resnet}_FPN_3x.yaml"))
15 | # cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml")
16 | # from scratch
17 | cfg.MODEL.WEIGHTS = f"detectron2://ImageNetPretrained/MSRA/R-{args.resnet}.pkl"
18 | return cfg
19 |
20 | if __name__ == "__main__":
21 | args = parse_args()
22 |
23 | with tempfile.TemporaryDirectory() as td:
24 | args.td = td
25 | cfg = fetch_cfg(args)
26 | cfg = setup_cfg(args, cfg)
27 | if args.eval_checkpoint is not None:
28 | infer(cfg)
29 | sys.exit(0)
30 |
31 | Trainer.data_aug = args.data_aug
32 | Trainer.debug = args.debug
33 | Trainer.project_name = "paste-seg-instance"
34 | trainer = Trainer(cfg)
35 | trainer.resume_or_load(resume=False)
36 | trainer.train()
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # download gpu version if possible
2 | # torch==1.10.1
3 | # torchvision==0.11.2
4 | # download from https://detectron2.readthedocs.io/en/latest/tutorials/install.html
5 | # detectron2
6 | # needed to transform on syn COCO segmentation RLE
7 | shapely
8 |
9 | numpy
10 | scikit-image==0.18.0
11 | scikit-learn
12 | Pillow
13 | tqdm
14 | opencv-python
15 | wandb
16 | pandas
17 | pycocotools
18 | hydra-core
19 | # weird issue with detectron2
20 | setuptools==59.5.0
21 | # for faster json speedup
22 | ujson
23 | # fix weird bug
24 | omegaconf==2.1
25 | transformers==4.22.2
26 | diffusers==0.9.0
27 | kornia
28 | timm
--------------------------------------------------------------------------------
/t2i_generate/background_captions.py:
--------------------------------------------------------------------------------
1 | """
2 | Create prompt to get pure backgrounds
3 | """
4 | import json
5 |
6 | templates = [
7 | "A real photo of {obj}",
8 | ]
9 | classnames = [
10 | # indoor objects
11 | "empty living room", "empty kitchen",
12 | # vehicle
13 | "blue sky", "empty city street, color", "empty city road, color", "empty lake", "empty sea", "railway without train", "empty railway, color",
14 | # animal
15 | "trees", "forest", "empty street, colored", "farms", "nature", "empty farm", "stable"
16 | ]
17 |
18 | to_save = {"background": {
19 | class_: [] for class_ in classnames
20 | }}
21 | for class_ in classnames:
22 | for temp in templates:
23 | print(temp.format(obj=class_))
24 | to_save["background"][class_] = [
25 | temp.format(obj=class_)
26 | for temp in templates
27 | ]
28 | with open("background_templates.json", "w") as f:
29 | json.dump(to_save, f)
--------------------------------------------------------------------------------
/t2i_generate/foreground_captions.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | templates = [
4 | "a photo of {obj}",
5 | "a realistic photo of {obj}",
6 | "a photo of {obj} in pure background",
7 | "{obj} in a white background",
8 | "{obj} without background",
9 | "{obj} isolated on white background",
10 | ]
11 |
12 | classnames = [
13 | # 'a truck', 'a traffic light', 'a fire hydrant', 'a stop sign', 'a parking meter', 'a bench',
14 | # 'an elephant', 'a bear', 'a zebra', 'a giraffe', 'a backpack', 'an umbrella',
15 | # 'a handbag', 'a tie', 'a suitcase', 'a frisbee', 'a ski', 'a snowboard', 'a sports ball', 'a kite', 'a baseball bat',
16 | # 'a baseball glove', 'a skateboard', 'a surfboard', 'a tennis racket', 'a wine glass', 'a cup', 'a fork',
17 | # 'a knife', 'a spoon', 'a bowl', 'a banana', 'an apple', 'a sandwich', 'an orange', 'a broccoli', 'a carrot', 'a hot dog',
18 | # 'a pizza', 'a donut', 'a cake', 'a couch', 'a bed', 'a toilet',
19 | # 'a laptop', 'a computer mouse', 'an electronic remote', 'a keyboard', 'a cell phone', 'a microwave', 'an oven', 'a toaster', 'a sink', 'a refrigerator',
20 | # 'a book', 'a clock', 'a vase', 'a scissors', 'a teddy bear', 'a hair drier', 'a toothbrush',
21 | # above are 60 classes, used in COCO
22 | "a person", "a man", "a woman",
23 | "a bird", "a cat", "a cow", "a dog", "a horse", "a sheep",
24 | "an airplane",
25 | "a TV", "a monitor", "an old monitor", "a dining table", "a table",
26 | "a bicycle", "a boat", "a bus", "a car", "a motorbike", "a train",
27 | "a bottle", "a chair", "a dining table", "a potted plant", "a sofa", "a tv monitor"
28 | ]
29 | # ]
30 |
31 | to_save = {"foreground": {
32 | class_: [] for class_ in classnames
33 | }}
34 | for class_ in classnames:
35 | for temp in templates:
36 | print(temp.format(obj=class_))
37 | to_save["foreground"][class_] = [
38 | temp.format(obj=class_)
39 | for temp in templates
40 | ]
41 | with open("foreground_templates.json", "w") as f:
42 | json.dump(to_save, f)
--------------------------------------------------------------------------------
/t2i_generate/stable_diffusion2.py:
--------------------------------------------------------------------------------
1 | # make sure you're logged in with `huggingface-cli login`
2 | import argparse
3 | import json, os
4 | from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
5 | import torch
6 | import numpy as np
7 |
8 | def parse_args():
9 | parser = argparse.ArgumentParser()
10 | parser.add_argument("--idx", type=int)
11 | parser.add_argument("--bsz", type=int, default=8)
12 | parser.add_argument("--num_gen_images_per_caption", "-n", type=int, default=20)
13 | parser.add_argument("--caption_json", default="./data", help="if not '', will only generate DallE images from this json, use `idx` and `scene` to select which to generate")
14 | parser.add_argument("--num_clusters", default=100, type=int, help="when using RuDalle, split all captions into `num_clusters` chunk and let each machine handle one chunk only")
15 |
16 | parser.add_argument("--output_dir", default="")
17 | args = parser.parse_args()
18 | if 'PT_DATA_DIR' in os.environ:
19 | args.output_dir = os.path.join(os.environ['PT_DATA_DIR'], args.output_dir)
20 | return args
21 |
22 | def batchify(lst, n):
23 | """Yield successive n-sized chunks from lst."""
24 | for i in range(0, len(lst), n):
25 | yield lst[i:i + n]
26 |
27 | if __name__ == "__main__":
28 | args = parse_args()
29 |
30 | with open(args.caption_json) as f:
31 | data = json.load(f)
32 |
33 | key = next(iter(data))
34 | # key = "JPEGImages"
35 |
36 | data = data[key]
37 |
38 | all_keys = sorted(list(data.keys()))
39 | all_chunks = np.array_split(all_keys, args.num_clusters)
40 | chunks = all_chunks[args.idx]
41 |
42 | pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1", torch_type=torch.float16)
43 | pipe = pipe.to("cuda")
44 |
45 | n_already_generated = 0
46 | n_generate_per_cycle = 4
47 | while n_already_generated < args.num_gen_images_per_caption:
48 | for id in chunks:
49 | for cap in data[id]:
50 | prompts = [cap] * n_generate_per_cycle
51 | cap = cap[:50] # too long captions will cause path error
52 | cap = cap.replace('"', "") # server don't like ", will map to %2522
53 | os.makedirs(os.path.join(args.output_dir, id, cap), exist_ok=True)
54 | cur_i = len(list(os.listdir(os.path.join(args.output_dir, id, cap)))) + 1
55 | for prompt_chunk in batchify(prompts, n=args.bsz):
56 | x = pipe(prompt_chunk)
57 | images = x.images
58 | for img in images:
59 | cur_i += 1
60 | # img.resize((256, 256)).save(os.path.join(args.output_dir, id, cap, f"{cur_i}.png"))
61 | img.save(os.path.join(args.output_dir, id, cap, f"{cur_i}.png"))
62 |
63 | n_already_generated += n_generate_per_cycle
--------------------------------------------------------------------------------
/viz/viz.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os, sys
3 | import random
4 | from pathlib import Path
5 |
6 | import cv2
7 | import matplotlib.pyplot as plt
8 | from detectron2.data import DatasetCatalog
9 | from detectron2.data.datasets import register_coco_instances
10 | from detectron2.utils.logger import setup_logger
11 | from detectron2.utils.visualizer import Visualizer
12 |
13 | pwd = Path(__file__).parent.resolve()
14 | output = pwd / "out"
15 |
16 | voc_dir = pwd.parent / "data/voc2012" / "VOC2012"
17 | with open(voc_dir.parent / "label2id.json") as f:
18 | label2id = json.load(f)
19 |
20 | artifact_dir = Path(sys.argv[1])
21 | assert artifact_dir.exists()
22 | print(artifact_dir)
23 |
24 | output = output / artifact_dir.stem
25 | os.makedirs(output, exist_ok=True)
26 | coco_name = "COCO.json"
27 | register_coco_instances("synthetic_train", metadata=label2id, json_file=str(artifact_dir / coco_name),
28 | image_root=str(artifact_dir))
29 | setup_logger()
30 |
31 | ds = "synthetic_train"
32 | # ds = "VOC_test"
33 | data = DatasetCatalog.get(ds)
34 | for i, d in enumerate(random.sample(data, 30)):
35 | img = cv2.imread(d["file_name"])
36 | visualizer = Visualizer(img[:, :, ::-1],
37 | scale=0.5)
38 | # metadata=balloon_metadata, scale=0.5)
39 | out = visualizer.draw_dataset_dict(d)
40 | plt.figure(1, figsize=(10, 8))
41 | plt.subplot(1, 2, 1)
42 | plt.title(d['file_name'])
43 | plt.imshow(out.get_image())
44 | plt.subplot(1, 2, 2)
45 | plt.imshow(img[:, :, ::-1])
46 | plt.title("RGB")
47 | plt.tight_layout()
48 | plt.savefig(output / f"demo{i}.png")
49 | plt.show()
--------------------------------------------------------------------------------