├── utils
    ├── __init__.py
    └── cocosplit.py
├── requirements.txt
├── scripts
    └── train_prima.sh
├── .gitignore
├── README.md
├── configs
    └── prima
    │   ├── fast_rcnn_R_50_FPN_3x.yaml
    │   └── mask_rcnn_R_50_FPN_3x.yaml
└── tools
    ├── train_net.py
    └── convert_prima_to_coco.py


/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | layoutparser
2 | funcy
3 | bs4
4 | scikit-learn
5 | imagesize
6 | tqdm


--------------------------------------------------------------------------------
/scripts/train_prima.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | cd ../tools
 4 | 
 5 | python convert_prima_to_coco.py \
 6 |     --prima_datapath ../data/prima \
 7 |     --anno_savepath ../data/prima/annotations.json 
 8 | 
 9 | python train_net.py \
10 |     --dataset_name          prima-layout \
11 |     --json_annotation_train ../data/prima/annotations-train.json \
12 |     --image_path_train      ../data/prima/Images \
13 |     --json_annotation_val   ../data/prima/annotations-val.json \
14 |     --image_path_val        ../data/prima/Images \
15 |     --config-file           ../configs/prima/mask_rcnn_R_50_FPN_3x.yaml \
16 |     OUTPUT_DIR  ../outputs/prima/mask_rcnn_R_50_FPN_3x/ \
17 |     SOLVER.IMS_PER_BATCH 2 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # folrder
  2 | data
  3 | data/
  4 | credential
  5 | credential/
  6 | model
  7 | model/
  8 | result
  9 | result*/
 10 | outputs/
 11 | 
 12 | # Mac Finder Configurations
 13 | .DS_Store
 14 | 
 15 | # IDEA configurations
 16 | .idea/
 17 | 
 18 | # IPython checkpoints
 19 | .ipynb_checkpoints/
 20 | log
 21 | 
 22 | # Visual Studio Code
 23 | .vscode/
 24 | 
 25 | # Byte-compiled / optimized / DLL files
 26 | __pycache__/
 27 | *.py[cod]
 28 | *$py.class
 29 | 
 30 | # C extensions
 31 | *.so
 32 | 
 33 | # Distribution / packaging
 34 | .Python
 35 | build/
 36 | develop-eggs/
 37 | dist/
 38 | downloads/
 39 | eggs/
 40 | .eggs/
 41 | lib64/
 42 | parts/
 43 | sdist/
 44 | var/
 45 | wheels/
 46 | *.egg-info/
 47 | .installed.cfg
 48 | *.egg
 49 | MANIFEST
 50 | 
 51 | # PyInstaller
 52 | #  Usually these files are written by a python script from a template
 53 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 54 | *.manifest
 55 | *.spec
 56 | 
 57 | # Installer logs
 58 | pip-log.txt
 59 | pip-delete-this-directory.txt
 60 | 
 61 | # Unit test / coverage reports
 62 | htmlcov/
 63 | .tox/
 64 | .coverage
 65 | .coverage.*
 66 | .cache
 67 | nosetests.xml
 68 | coverage.xml
 69 | *.cover
 70 | .hypothesis/
 71 | .pytest_cache/
 72 | 
 73 | # Translations
 74 | *.mo
 75 | *.pot
 76 | 
 77 | # Django stuff:
 78 | *.log
 79 | local_settings.py
 80 | db.sqlite3
 81 | 
 82 | # Flask stuff:
 83 | instance/
 84 | .webassets-cache
 85 | 
 86 | # Scrapy stuff:
 87 | .scrapy
 88 | 
 89 | # Sphinx documentation
 90 | docs/_build/
 91 | 
 92 | # PyBuilder
 93 | target/
 94 | 
 95 | # Jupyter Notebook
 96 | .ipynb_checkpoints
 97 | 
 98 | # IPython
 99 | profile_default/
100 | ipython_config.py
101 | 
102 | # pyenv
103 | .python-version
104 | 
105 | # celery beat schedule file
106 | celerybeat-schedule
107 | 
108 | # SageMath parsed files
109 | *.sage.py
110 | 
111 | # Environments
112 | .env
113 | .venv
114 | env/
115 | venv/
116 | ENV/
117 | env.bak/
118 | venv.bak/
119 | 
120 | # Spyder project settings
121 | .spyderproject
122 | .spyproject
123 | 
124 | # Rope project settings
125 | .ropeproject
126 | 
127 | # mkdocs documentation
128 | /site
129 | 
130 | # mypy
131 | .mypy_cache/
132 | .dmypy.json
133 | dmypy.json
134 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Scripts for training Layout Detection Models using Detectron2
 2 | 
 3 | ## Usage
 4 | 
 5 | ### Directory Structure
 6 | 
 7 | - In `tools/`, we provide a series of handy scripts for converting data formats and training the models.
 8 | - In `scripts/`, it lists specific command for running the code for processing the given dataset. 
 9 | - The `configs/` contains the configuration for different deep learning models, and is organized by datasets.
10 | 
11 | ### How to train the models? 
12 | 
13 | - Get the dataset and annotations -- if you are not sure, feel free to check [this tutorial](https://github.com/Layout-Parser/layout-parser/tree/main/examples/Customizing%20Layout%20Models%20with%20Label%20Studio%20Annotation). 
14 | - Duplicate and modify the config files and training scripts
15 |     - For example, you might want to copy [`configs/prima/fast_rcnn_R_50_FPN_3x`](configs/prima/fast_rcnn_R_50_FPN_3x.yaml) to [`configs/your-dataset-name/fast_rcnn_R_50_FPN_3x`](configs/prima/fast_rcnn_R_50_FPN_3x.yaml), and you can create your own `scripts/train_<your-dataset-name>.sh` based on [`scripts/train_prima.sh`](scripts/train_prima.sh).
16 |     - You'll modify the `--dataset_name`, `--json_annotation_train`, `--image_path_train`, `--json_annotation_val`, `--image_path_val`, and `--config-file` args appropriately. 
17 | - If you have a dataset with segmentation masks, you can try to train with the [`mask_rcnn model`](configs/prima/mask_rcnn_R_50_FPN_3x.yaml); otherwise you might want to start with the [`fast_rcnn model`](configs/prima/fast_rcnn_R_50_FPN_3x.yaml)
18 |     - If you see error `AttributeError: Cannot find field 'gt_masks' in the given Instances!` during training, this means you should not use 
19 | 
20 | ## Supported Datasets
21 | 
22 | - Prima Layout Analysis Dataset [`scripts/train_prima.sh`](https://github.com/Layout-Parser/layout-model-training/blob/master/scripts/train_prima.sh)
23 |     - You will need to download the dataset from the [official website](https://www.primaresearch.org/dataset/) and put it in the `data/prima` folder. 
24 |     - As the original dataset is stored in the [PAGE format](https://www.primaresearch.org/tools/PAGEViewer), the script will use [`tools/convert_prima_to_coco.py`](https://github.com/Layout-Parser/layout-model-training/blob/master/tools/convert_prima_to_coco.py) to convert it to COCO format. 
25 |     - The final dataset folder structure should look like:
26 |         ```bash
27 |         data/
28 |         └── prima/
29 |             ├── Images/
30 |             ├── XML/
31 |             ├── License.txt
32 |             └── annotations*.json
33 |         ```
34 | 
35 | ## Reference 
36 | 
37 | - **[cocosplit](https://github.com/akarazniewicz/cocosplit)**  A script that splits the coco annotations into train and test sets.
38 | - **[Detectron2](https://github.com/facebookresearch/detectron2)** Detectron2 is Facebook AI Research's next generation software system that implements state-of-the-art object detection algorithms. 


--------------------------------------------------------------------------------
/utils/cocosplit.py:
--------------------------------------------------------------------------------
  1 | # Modified based on https://github.com/akarazniewicz/cocosplit/blob/master/cocosplit.py
  2 | 
  3 | import json
  4 | import argparse
  5 | import funcy
  6 | from sklearn.model_selection import train_test_split
  7 | 
  8 | parser = argparse.ArgumentParser(
  9 |     description="Splits COCO annotations file into training and test sets."
 10 | )
 11 | parser.add_argument(
 12 |     "--annotation-path",
 13 |     metavar="coco_annotations",
 14 |     type=str,
 15 |     help="Path to COCO annotations file.",
 16 | )
 17 | parser.add_argument(
 18 |     "--train", type=str, help="Where to store COCO training annotations"
 19 | )
 20 | parser.add_argument("--test", type=str, help="Where to store COCO test annotations")
 21 | parser.add_argument(
 22 |     "--split-ratio",
 23 |     dest="split_ratio",
 24 |     type=float,
 25 |     required=True,
 26 |     help="A percentage of a split; a number in (0, 1)",
 27 | )
 28 | parser.add_argument(
 29 |     "--having-annotations",
 30 |     dest="having_annotations",
 31 |     action="store_true",
 32 |     help="Ignore all images without annotations. Keep only these with at least one annotation",
 33 | )
 34 | 
 35 | 
 36 | def save_coco(file, tagged_data):
 37 |     with open(file, "wt", encoding="UTF-8") as coco:
 38 |         json.dump(tagged_data, coco, indent=2, sort_keys=True)
 39 | 
 40 | 
 41 | def filter_annotations(annotations, images):
 42 |     image_ids = funcy.lmap(lambda i: int(i["id"]), images)
 43 |     return funcy.lfilter(lambda a: int(a["image_id"]) in image_ids, annotations)
 44 | 
 45 | 
 46 | def main(
 47 |     annotation_path,
 48 |     split_ratio,
 49 |     having_annotations,
 50 |     train_save_path,
 51 |     test_save_path,
 52 |     random_state=None,
 53 | ):
 54 | 
 55 |     with open(annotation_path, "rt", encoding="UTF-8") as annotations:
 56 |         coco = json.load(annotations)
 57 | 
 58 |     images = coco["images"]
 59 |     annotations = coco["annotations"]
 60 | 
 61 |     ids_with_annotations = funcy.lmap(lambda a: int(a["image_id"]), annotations)
 62 | 
 63 |     # Images with annotations
 64 |     img_ann = funcy.lremove(lambda i: i["id"] not in ids_with_annotations, images)
 65 |     tr_ann, ts_ann = train_test_split(
 66 |         img_ann, train_size=split_ratio, random_state=random_state
 67 |     )
 68 | 
 69 |     img_wo_ann = funcy.lremove(lambda i: i["id"] in ids_with_annotations, images)
 70 |     if len(img_wo_ann) > 0:
 71 |         tr_wo_ann, ts_wo_ann = train_test_split(
 72 |             img_wo_ann, train_size=split_ratio, random_state=random_state
 73 |         )
 74 |     else:
 75 |         tr_wo_ann, ts_wo_ann = [], []  # Images without annotations
 76 | 
 77 |     if having_annotations:
 78 |         tr, ts = tr_ann, ts_ann
 79 | 
 80 |     else:
 81 |         # Merging the 2 image lists (i.e. with and without annotation)
 82 |         tr_ann.extend(tr_wo_ann)
 83 |         ts_ann.extend(ts_wo_ann)
 84 | 
 85 |         tr, ts = tr_ann, ts_ann
 86 | 
 87 |     # Train Data
 88 |     coco.update({"images": tr, "annotations": filter_annotations(annotations, tr)})
 89 |     save_coco(train_save_path, coco)
 90 | 
 91 |     # Test Data
 92 |     coco.update({"images": ts, "annotations": filter_annotations(annotations, ts)})
 93 |     save_coco(test_save_path, coco)
 94 | 
 95 |     print(
 96 |         "Saved {} entries in {} and {} in {}".format(
 97 |             len(tr), train_save_path, len(ts), test_save_path
 98 |         )
 99 |     )
100 | 
101 | 
102 | if __name__ == "__main__":
103 |     args = parser.parse_args()
104 | 
105 |     main(
106 |         args.annotation_path,
107 |         args.split_ratio,
108 |         args.having_annotations,
109 |         args.train,
110 |         args.test,
111 |         random_state=24,
112 |     )
113 | 


--------------------------------------------------------------------------------
/configs/prima/fast_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN_BENCHMARK: false
  2 | DATALOADER:
  3 |   ASPECT_RATIO_GROUPING: true
  4 |   FILTER_EMPTY_ANNOTATIONS: true
  5 |   NUM_WORKERS: 4
  6 |   REPEAT_THRESHOLD: 0.0
  7 |   SAMPLER_TRAIN: TrainingSampler
  8 | DATASETS:
  9 |   PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
 10 |   PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
 11 |   PROPOSAL_FILES_TEST: []
 12 |   PROPOSAL_FILES_TRAIN: []
 13 |   TEST: []
 14 |   TRAIN: []
 15 | GLOBAL:
 16 |   HACK: 1.0
 17 | INPUT:
 18 |   CROP:
 19 |     ENABLED: false
 20 |     SIZE:
 21 |     - 0.9
 22 |     - 0.9
 23 |     TYPE: relative_range
 24 |   FORMAT: BGR
 25 |   MASK_FORMAT: polygon
 26 |   MAX_SIZE_TEST: 1333
 27 |   MAX_SIZE_TRAIN: 1333
 28 |   MIN_SIZE_TEST: 800
 29 |   MIN_SIZE_TRAIN:
 30 |   - 640
 31 |   - 672
 32 |   - 704
 33 |   - 736
 34 |   - 768
 35 |   - 800
 36 |   MIN_SIZE_TRAIN_SAMPLING: choice
 37 | MODEL:
 38 |   ANCHOR_GENERATOR:
 39 |     ANGLES:
 40 |     - - -90
 41 |       - 0
 42 |       - 90
 43 |     ASPECT_RATIOS:
 44 |     - - 0.5
 45 |       - 1.0
 46 |       - 2.0
 47 |     NAME: DefaultAnchorGenerator
 48 |     OFFSET: 0.0
 49 |     SIZES:
 50 |     - - 32
 51 |     - - 64
 52 |     - - 128
 53 |     - - 256
 54 |     - - 512
 55 |   BACKBONE:
 56 |     FREEZE_AT: 2
 57 |     NAME: build_resnet_fpn_backbone
 58 |   DEVICE: cuda
 59 |   FPN:
 60 |     FUSE_TYPE: sum
 61 |     IN_FEATURES:
 62 |     - res2
 63 |     - res3
 64 |     - res4
 65 |     - res5
 66 |     NORM: ''
 67 |     OUT_CHANNELS: 256
 68 |   KEYPOINT_ON: false
 69 |   LOAD_PROPOSALS: false
 70 |   MASK_ON: false
 71 |   META_ARCHITECTURE: GeneralizedRCNN
 72 |   PANOPTIC_FPN:
 73 |     COMBINE:
 74 |       ENABLED: true
 75 |       INSTANCES_CONFIDENCE_THRESH: 0.5
 76 |       OVERLAP_THRESH: 0.5
 77 |       STUFF_AREA_LIMIT: 4096
 78 |     INSTANCE_LOSS_WEIGHT: 1.0
 79 |   PIXEL_MEAN:
 80 |   - 103.53
 81 |   - 116.28
 82 |   - 123.675
 83 |   PIXEL_STD:
 84 |   - 1.0
 85 |   - 1.0
 86 |   - 1.0
 87 |   PROPOSAL_GENERATOR:
 88 |     MIN_SIZE: 0
 89 |     NAME: RPN
 90 |   RESNETS:
 91 |     DEFORM_MODULATED: false
 92 |     DEFORM_NUM_GROUPS: 1
 93 |     DEFORM_ON_PER_STAGE:
 94 |     - false
 95 |     - false
 96 |     - false
 97 |     - false
 98 |     DEPTH: 50
 99 |     NORM: FrozenBN
100 |     NUM_GROUPS: 1
101 |     OUT_FEATURES:
102 |     - res2
103 |     - res3
104 |     - res4
105 |     - res5
106 |     RES2_OUT_CHANNELS: 256
107 |     RES5_DILATION: 1
108 |     STEM_OUT_CHANNELS: 64
109 |     STRIDE_IN_1X1: true
110 |     WIDTH_PER_GROUP: 64
111 |   RETINANET:
112 |     BBOX_REG_WEIGHTS:
113 |     - 1.0
114 |     - 1.0
115 |     - 1.0
116 |     - 1.0
117 |     FOCAL_LOSS_ALPHA: 0.25
118 |     FOCAL_LOSS_GAMMA: 2.0
119 |     IN_FEATURES:
120 |     - p3
121 |     - p4
122 |     - p5
123 |     - p6
124 |     - p7
125 |     IOU_LABELS:
126 |     - 0
127 |     - -1
128 |     - 1
129 |     IOU_THRESHOLDS:
130 |     - 0.4
131 |     - 0.5
132 |     NMS_THRESH_TEST: 0.5
133 |     NUM_CLASSES: 80
134 |     NUM_CONVS: 4
135 |     PRIOR_PROB: 0.01
136 |     SCORE_THRESH_TEST: 0.05
137 |     SMOOTH_L1_LOSS_BETA: 0.1
138 |     TOPK_CANDIDATES_TEST: 1000
139 |   ROI_BOX_CASCADE_HEAD:
140 |     BBOX_REG_WEIGHTS:
141 |     - - 10.0
142 |       - 10.0
143 |       - 5.0
144 |       - 5.0
145 |     - - 20.0
146 |       - 20.0
147 |       - 10.0
148 |       - 10.0
149 |     - - 30.0
150 |       - 30.0
151 |       - 15.0
152 |       - 15.0
153 |     IOUS:
154 |     - 0.5
155 |     - 0.6
156 |     - 0.7
157 |   ROI_BOX_HEAD:
158 |     BBOX_REG_WEIGHTS:
159 |     - 10.0
160 |     - 10.0
161 |     - 5.0
162 |     - 5.0
163 |     CLS_AGNOSTIC_BBOX_REG: false
164 |     CONV_DIM: 256
165 |     FC_DIM: 1024
166 |     NAME: FastRCNNConvFCHead
167 |     NORM: ''
168 |     NUM_CONV: 0
169 |     NUM_FC: 2
170 |     POOLER_RESOLUTION: 7
171 |     POOLER_SAMPLING_RATIO: 0
172 |     POOLER_TYPE: ROIAlignV2
173 |     SMOOTH_L1_BETA: 0.0
174 |     TRAIN_ON_PRED_BOXES: false
175 |   ROI_HEADS:
176 |     BATCH_SIZE_PER_IMAGE: 512
177 |     IN_FEATURES:
178 |     - p2
179 |     - p3
180 |     - p4
181 |     - p5
182 |     IOU_LABELS:
183 |     - 0
184 |     - 1
185 |     IOU_THRESHOLDS:
186 |     - 0.5
187 |     NAME: StandardROIHeads
188 |     NMS_THRESH_TEST: 0.5
189 |     NUM_CLASSES: 80
190 |     POSITIVE_FRACTION: 0.25
191 |     PROPOSAL_APPEND_GT: true
192 |     SCORE_THRESH_TEST: 0.05
193 |   ROI_KEYPOINT_HEAD:
194 |     CONV_DIMS:
195 |     - 512
196 |     - 512
197 |     - 512
198 |     - 512
199 |     - 512
200 |     - 512
201 |     - 512
202 |     - 512
203 |     LOSS_WEIGHT: 1.0
204 |     MIN_KEYPOINTS_PER_IMAGE: 1
205 |     NAME: KRCNNConvDeconvUpsampleHead
206 |     NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
207 |     NUM_KEYPOINTS: 17
208 |     POOLER_RESOLUTION: 14
209 |     POOLER_SAMPLING_RATIO: 0
210 |     POOLER_TYPE: ROIAlignV2
211 |   ROI_MASK_HEAD:
212 |     CLS_AGNOSTIC_MASK: false
213 |     CONV_DIM: 256
214 |     NAME: MaskRCNNConvUpsampleHead
215 |     NORM: ''
216 |     NUM_CONV: 4
217 |     POOLER_RESOLUTION: 14
218 |     POOLER_SAMPLING_RATIO: 0
219 |     POOLER_TYPE: ROIAlignV2
220 |   RPN:
221 |     BATCH_SIZE_PER_IMAGE: 256
222 |     BBOX_REG_WEIGHTS:
223 |     - 1.0
224 |     - 1.0
225 |     - 1.0
226 |     - 1.0
227 |     BOUNDARY_THRESH: -1
228 |     HEAD_NAME: StandardRPNHead
229 |     IN_FEATURES:
230 |     - p2
231 |     - p3
232 |     - p4
233 |     - p5
234 |     - p6
235 |     IOU_LABELS:
236 |     - 0
237 |     - -1
238 |     - 1
239 |     IOU_THRESHOLDS:
240 |     - 0.3
241 |     - 0.7
242 |     LOSS_WEIGHT: 1.0
243 |     NMS_THRESH: 0.7
244 |     POSITIVE_FRACTION: 0.5
245 |     POST_NMS_TOPK_TEST: 1000
246 |     POST_NMS_TOPK_TRAIN: 1000
247 |     PRE_NMS_TOPK_TEST: 1000
248 |     PRE_NMS_TOPK_TRAIN: 2000
249 |     SMOOTH_L1_BETA: 0.0
250 |   SEM_SEG_HEAD:
251 |     COMMON_STRIDE: 4
252 |     CONVS_DIM: 128
253 |     IGNORE_VALUE: 255
254 |     IN_FEATURES:
255 |     - p2
256 |     - p3
257 |     - p4
258 |     - p5
259 |     LOSS_WEIGHT: 1.0
260 |     NAME: SemSegFPNHead
261 |     NORM: GN
262 |     NUM_CLASSES: 54
263 |   WEIGHTS: detectron2://ImageNetPretrained/MSRA/R-50.pkl
264 | OUTPUT_DIR: ./output
265 | SEED: -1
266 | SOLVER:
267 |   BASE_LR: 0.02
268 |   BIAS_LR_FACTOR: 1.0
269 |   CHECKPOINT_PERIOD: 20000
270 |   GAMMA: 0.1
271 |   IMS_PER_BATCH: 16
272 |   LR_SCHEDULER_NAME: WarmupMultiStepLR
273 |   MAX_ITER: 60000
274 |   MOMENTUM: 0.9
275 |   STEPS:
276 |   - 210000
277 |   - 250000
278 |   WARMUP_FACTOR: 0.001
279 |   WARMUP_ITERS: 1000
280 |   WARMUP_METHOD: linear
281 |   WEIGHT_DECAY: 0.0001
282 |   WEIGHT_DECAY_BIAS: 0.0001
283 |   WEIGHT_DECAY_NORM: 0.0
284 | TEST:
285 |   AUG:
286 |     ENABLED: false
287 |     FLIP: true
288 |     MAX_SIZE: 4000
289 |     MIN_SIZES:
290 |     - 400
291 |     - 500
292 |     - 600
293 |     - 700
294 |     - 800
295 |     - 900
296 |     - 1000
297 |     - 1100
298 |     - 1200
299 |   DETECTIONS_PER_IMAGE: 100
300 |   EVAL_PERIOD: 0
301 |   EXPECTED_RESULTS: []
302 |   KEYPOINT_OKS_SIGMAS: []
303 |   PRECISE_BN:
304 |     ENABLED: false
305 |     NUM_ITER: 200
306 | VERSION: 2
307 | VIS_PERIOD: 0
308 | 


--------------------------------------------------------------------------------
/configs/prima/mask_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN_BENCHMARK: false
  2 | DATALOADER:
  3 |   ASPECT_RATIO_GROUPING: true
  4 |   FILTER_EMPTY_ANNOTATIONS: true
  5 |   NUM_WORKERS: 4
  6 |   REPEAT_THRESHOLD: 0.0
  7 |   SAMPLER_TRAIN: TrainingSampler
  8 | DATASETS:
  9 |   PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
 10 |   PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
 11 |   PROPOSAL_FILES_TEST: []
 12 |   PROPOSAL_FILES_TRAIN: []
 13 |   TEST: []
 14 |   TRAIN: []
 15 | GLOBAL:
 16 |   HACK: 1.0
 17 | INPUT:
 18 |   CROP:
 19 |     ENABLED: false
 20 |     SIZE:
 21 |     - 0.9
 22 |     - 0.9
 23 |     TYPE: relative_range
 24 |   FORMAT: BGR
 25 |   MASK_FORMAT: polygon
 26 |   MAX_SIZE_TEST: 1333
 27 |   MAX_SIZE_TRAIN: 1333
 28 |   MIN_SIZE_TEST: 800
 29 |   MIN_SIZE_TRAIN:
 30 |   - 640
 31 |   - 672
 32 |   - 704
 33 |   - 736
 34 |   - 768
 35 |   - 800
 36 |   MIN_SIZE_TRAIN_SAMPLING: choice
 37 | MODEL:
 38 |   ANCHOR_GENERATOR:
 39 |     ANGLES:
 40 |     - - -90
 41 |       - 0
 42 |       - 90
 43 |     ASPECT_RATIOS:
 44 |     - - 0.5
 45 |       - 1.0
 46 |       - 2.0
 47 |     NAME: DefaultAnchorGenerator
 48 |     OFFSET: 0.0
 49 |     SIZES:
 50 |     - - 32
 51 |     - - 64
 52 |     - - 128
 53 |     - - 256
 54 |     - - 512
 55 |   BACKBONE:
 56 |     FREEZE_AT: 2
 57 |     NAME: build_resnet_fpn_backbone
 58 |   DEVICE: cuda
 59 |   FPN:
 60 |     FUSE_TYPE: sum
 61 |     IN_FEATURES:
 62 |     - res2
 63 |     - res3
 64 |     - res4
 65 |     - res5
 66 |     NORM: ''
 67 |     OUT_CHANNELS: 256
 68 |   KEYPOINT_ON: false
 69 |   LOAD_PROPOSALS: false
 70 |   MASK_ON: true
 71 |   META_ARCHITECTURE: GeneralizedRCNN
 72 |   PANOPTIC_FPN:
 73 |     COMBINE:
 74 |       ENABLED: true
 75 |       INSTANCES_CONFIDENCE_THRESH: 0.5
 76 |       OVERLAP_THRESH: 0.5
 77 |       STUFF_AREA_LIMIT: 4096
 78 |     INSTANCE_LOSS_WEIGHT: 1.0
 79 |   PIXEL_MEAN:
 80 |   - 103.53
 81 |   - 116.28
 82 |   - 123.675
 83 |   PIXEL_STD:
 84 |   - 1.0
 85 |   - 1.0
 86 |   - 1.0
 87 |   PROPOSAL_GENERATOR:
 88 |     MIN_SIZE: 0
 89 |     NAME: RPN
 90 |   RESNETS:
 91 |     DEFORM_MODULATED: false
 92 |     DEFORM_NUM_GROUPS: 1
 93 |     DEFORM_ON_PER_STAGE:
 94 |     - false
 95 |     - false
 96 |     - false
 97 |     - false
 98 |     DEPTH: 50
 99 |     NORM: FrozenBN
100 |     NUM_GROUPS: 1
101 |     OUT_FEATURES:
102 |     - res2
103 |     - res3
104 |     - res4
105 |     - res5
106 |     RES2_OUT_CHANNELS: 256
107 |     RES5_DILATION: 1
108 |     STEM_OUT_CHANNELS: 64
109 |     STRIDE_IN_1X1: true
110 |     WIDTH_PER_GROUP: 64
111 |   RETINANET:
112 |     BBOX_REG_WEIGHTS:
113 |     - 1.0
114 |     - 1.0
115 |     - 1.0
116 |     - 1.0
117 |     FOCAL_LOSS_ALPHA: 0.25
118 |     FOCAL_LOSS_GAMMA: 2.0
119 |     IN_FEATURES:
120 |     - p3
121 |     - p4
122 |     - p5
123 |     - p6
124 |     - p7
125 |     IOU_LABELS:
126 |     - 0
127 |     - -1
128 |     - 1
129 |     IOU_THRESHOLDS:
130 |     - 0.4
131 |     - 0.5
132 |     NMS_THRESH_TEST: 0.5
133 |     NUM_CLASSES: 80
134 |     NUM_CONVS: 4
135 |     PRIOR_PROB: 0.01
136 |     SCORE_THRESH_TEST: 0.05
137 |     SMOOTH_L1_LOSS_BETA: 0.1
138 |     TOPK_CANDIDATES_TEST: 1000
139 |   ROI_BOX_CASCADE_HEAD:
140 |     BBOX_REG_WEIGHTS:
141 |     - - 10.0
142 |       - 10.0
143 |       - 5.0
144 |       - 5.0
145 |     - - 20.0
146 |       - 20.0
147 |       - 10.0
148 |       - 10.0
149 |     - - 30.0
150 |       - 30.0
151 |       - 15.0
152 |       - 15.0
153 |     IOUS:
154 |     - 0.5
155 |     - 0.6
156 |     - 0.7
157 |   ROI_BOX_HEAD:
158 |     BBOX_REG_WEIGHTS:
159 |     - 10.0
160 |     - 10.0
161 |     - 5.0
162 |     - 5.0
163 |     CLS_AGNOSTIC_BBOX_REG: false
164 |     CONV_DIM: 256
165 |     FC_DIM: 1024
166 |     NAME: FastRCNNConvFCHead
167 |     NORM: ''
168 |     NUM_CONV: 0
169 |     NUM_FC: 2
170 |     POOLER_RESOLUTION: 7
171 |     POOLER_SAMPLING_RATIO: 0
172 |     POOLER_TYPE: ROIAlignV2
173 |     SMOOTH_L1_BETA: 0.0
174 |     TRAIN_ON_PRED_BOXES: false
175 |   ROI_HEADS:
176 |     BATCH_SIZE_PER_IMAGE: 512
177 |     IN_FEATURES:
178 |     - p2
179 |     - p3
180 |     - p4
181 |     - p5
182 |     IOU_LABELS:
183 |     - 0
184 |     - 1
185 |     IOU_THRESHOLDS:
186 |     - 0.5
187 |     NAME: StandardROIHeads
188 |     NMS_THRESH_TEST: 0.5
189 |     NUM_CLASSES: 80
190 |     POSITIVE_FRACTION: 0.25
191 |     PROPOSAL_APPEND_GT: true
192 |     SCORE_THRESH_TEST: 0.05
193 |   ROI_KEYPOINT_HEAD:
194 |     CONV_DIMS:
195 |     - 512
196 |     - 512
197 |     - 512
198 |     - 512
199 |     - 512
200 |     - 512
201 |     - 512
202 |     - 512
203 |     LOSS_WEIGHT: 1.0
204 |     MIN_KEYPOINTS_PER_IMAGE: 1
205 |     NAME: KRCNNConvDeconvUpsampleHead
206 |     NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
207 |     NUM_KEYPOINTS: 17
208 |     POOLER_RESOLUTION: 14
209 |     POOLER_SAMPLING_RATIO: 0
210 |     POOLER_TYPE: ROIAlignV2
211 |   ROI_MASK_HEAD:
212 |     CLS_AGNOSTIC_MASK: false
213 |     CONV_DIM: 256
214 |     NAME: MaskRCNNConvUpsampleHead
215 |     NORM: ''
216 |     NUM_CONV: 4
217 |     POOLER_RESOLUTION: 14
218 |     POOLER_SAMPLING_RATIO: 0
219 |     POOLER_TYPE: ROIAlignV2
220 |   RPN:
221 |     BATCH_SIZE_PER_IMAGE: 256
222 |     BBOX_REG_WEIGHTS:
223 |     - 1.0
224 |     - 1.0
225 |     - 1.0
226 |     - 1.0
227 |     BOUNDARY_THRESH: -1
228 |     HEAD_NAME: StandardRPNHead
229 |     IN_FEATURES:
230 |     - p2
231 |     - p3
232 |     - p4
233 |     - p5
234 |     - p6
235 |     IOU_LABELS:
236 |     - 0
237 |     - -1
238 |     - 1
239 |     IOU_THRESHOLDS:
240 |     - 0.3
241 |     - 0.7
242 |     LOSS_WEIGHT: 1.0
243 |     NMS_THRESH: 0.7
244 |     POSITIVE_FRACTION: 0.5
245 |     POST_NMS_TOPK_TEST: 1000
246 |     POST_NMS_TOPK_TRAIN: 1000
247 |     PRE_NMS_TOPK_TEST: 1000
248 |     PRE_NMS_TOPK_TRAIN: 2000
249 |     SMOOTH_L1_BETA: 0.0
250 |   SEM_SEG_HEAD:
251 |     COMMON_STRIDE: 4
252 |     CONVS_DIM: 128
253 |     IGNORE_VALUE: 255
254 |     IN_FEATURES:
255 |     - p2
256 |     - p3
257 |     - p4
258 |     - p5
259 |     LOSS_WEIGHT: 1.0
260 |     NAME: SemSegFPNHead
261 |     NORM: GN
262 |     NUM_CLASSES: 54
263 |   WEIGHTS: detectron2://ImageNetPretrained/MSRA/R-50.pkl
264 | OUTPUT_DIR: ./output
265 | SEED: -1
266 | SOLVER:
267 |   BASE_LR: 0.02
268 |   BIAS_LR_FACTOR: 1.0
269 |   CHECKPOINT_PERIOD: 20000
270 |   GAMMA: 0.1
271 |   IMS_PER_BATCH: 16
272 |   LR_SCHEDULER_NAME: WarmupMultiStepLR
273 |   MAX_ITER: 60000
274 |   MOMENTUM: 0.9
275 |   STEPS:
276 |   - 210000
277 |   - 250000
278 |   WARMUP_FACTOR: 0.001
279 |   WARMUP_ITERS: 1000
280 |   WARMUP_METHOD: linear
281 |   WEIGHT_DECAY: 0.0001
282 |   WEIGHT_DECAY_BIAS: 0.0001
283 |   WEIGHT_DECAY_NORM: 0.0
284 | TEST:
285 |   AUG:
286 |     ENABLED: false
287 |     FLIP: true
288 |     MAX_SIZE: 4000
289 |     MIN_SIZES:
290 |     - 400
291 |     - 500
292 |     - 600
293 |     - 700
294 |     - 800
295 |     - 900
296 |     - 1000
297 |     - 1100
298 |     - 1200
299 |   DETECTIONS_PER_IMAGE: 100
300 |   EVAL_PERIOD: 0
301 |   EXPECTED_RESULTS: []
302 |   KEYPOINT_OKS_SIGMAS: []
303 |   PRECISE_BN:
304 |     ENABLED: false
305 |     NUM_ITER: 200
306 | VERSION: 2
307 | VIS_PERIOD: 0
308 | 


--------------------------------------------------------------------------------
/tools/train_net.py:
--------------------------------------------------------------------------------
  1 | """
  2 | The script is based on https://github.com/facebookresearch/detectron2/blob/master/tools/train_net.py. 
  3 | """
  4 | 
  5 | import logging
  6 | import os
  7 | import json
  8 | from collections import OrderedDict
  9 | import detectron2.utils.comm as comm
 10 | import detectron2.data.transforms as T
 11 | from detectron2.checkpoint import DetectionCheckpointer
 12 | from detectron2.config import get_cfg
 13 | from detectron2.data import DatasetMapper, build_detection_train_loader
 14 | 
 15 | from detectron2.data.datasets import register_coco_instances
 16 | 
 17 | from detectron2.engine import (
 18 |     DefaultTrainer,
 19 |     default_argument_parser,
 20 |     default_setup,
 21 |     hooks,
 22 |     launch,
 23 | )
 24 | from detectron2.evaluation import (
 25 |     COCOEvaluator,
 26 |     verify_results,
 27 | )
 28 | from detectron2.modeling import GeneralizedRCNNWithTTA
 29 | import pandas as pd
 30 | 
 31 | 
 32 | def get_augs(cfg):
 33 |     """Add all the desired augmentations here. A list of availble augmentations
 34 |     can be found here:
 35 |        https://detectron2.readthedocs.io/en/latest/modules/data_transforms.html
 36 |     """
 37 |     augs = [
 38 |         T.ResizeShortestEdge(
 39 |             cfg.INPUT.MIN_SIZE_TRAIN,
 40 |             cfg.INPUT.MAX_SIZE_TRAIN,
 41 |             cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING,
 42 |         )
 43 |     ]
 44 |     if cfg.INPUT.CROP.ENABLED:
 45 |         augs.append(
 46 |             T.RandomCrop_CategoryAreaConstraint(
 47 |                 cfg.INPUT.CROP.TYPE,
 48 |                 cfg.INPUT.CROP.SIZE,
 49 |                 cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA,
 50 |                 cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE,
 51 |             )
 52 |         )
 53 |     horizontal_flip: bool = cfg.INPUT.RANDOM_FLIP == "horizontal"
 54 |     augs.append(T.RandomFlip(horizontal=horizontal_flip, vertical=not horizontal_flip))
 55 |     # Rotate the image between -90 to 0 degrees clockwise around the centre
 56 |     augs.append(T.RandomRotation(angle=[-90.0, 0.0]))
 57 |     return augs
 58 | 
 59 | 
 60 | class Trainer(DefaultTrainer):
 61 |     """
 62 |     We use the "DefaultTrainer" which contains pre-defined default logic for
 63 |     standard training workflow. They may not work for you, especially if you
 64 |     are working on a new research project. In that case you can use the cleaner
 65 |     "SimpleTrainer", or write your own training loop. You can use
 66 |     "tools/plain_train_net.py" as an example.
 67 | 
 68 |     Adapted from:
 69 |         https://github.com/facebookresearch/detectron2/blob/master/projects/DeepLab/train_net.py
 70 |     """
 71 | 
 72 |     @classmethod
 73 |     def build_train_loader(cls, cfg):
 74 |         mapper = DatasetMapper(cfg, is_train=True, augmentations=get_augs(cfg))
 75 |         return build_detection_train_loader(cfg, mapper=mapper)
 76 | 
 77 |     @classmethod
 78 |     def build_evaluator(cls, cfg, dataset_name, output_folder=None):
 79 |         """
 80 |         Returns:
 81 |             DatasetEvaluator or None
 82 | 
 83 |         It is not implemented by default.
 84 |         """
 85 |         return COCOEvaluator(dataset_name, cfg, True, output_folder)
 86 | 
 87 |     @classmethod
 88 |     def test_with_TTA(cls, cfg, model):
 89 |         logger = logging.getLogger("detectron2.trainer")
 90 |         # In the end of training, run an evaluation with TTA
 91 |         # Only support some R-CNN models.
 92 |         logger.info("Running inference with test-time augmentation ...")
 93 |         model = GeneralizedRCNNWithTTA(cfg, model)
 94 |         evaluators = [
 95 |             cls.build_evaluator(
 96 |                 cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA")
 97 |             )
 98 |             for name in cfg.DATASETS.TEST
 99 |         ]
100 |         res = cls.test(cfg, model, evaluators)
101 |         res = OrderedDict({k + "_TTA": v for k, v in res.items()})
102 |         return res
103 | 
104 |     @classmethod
105 |     def eval_and_save(cls, cfg, model):
106 |         evaluators = [
107 |             cls.build_evaluator(
108 |                 cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference")
109 |             )
110 |             for name in cfg.DATASETS.TEST
111 |         ]
112 |         res = cls.test(cfg, model, evaluators)
113 |         pd.DataFrame(res).to_csv(os.path.join(cfg.OUTPUT_DIR, "eval.csv"))
114 |         return res
115 | 
116 | 
117 | def setup(args):
118 |     """
119 |     Create configs and perform basic setups.
120 |     """
121 |     cfg = get_cfg()
122 | 
123 |     if args.config_file != "":
124 |         cfg.merge_from_file(args.config_file)
125 |     cfg.merge_from_list(args.opts)
126 | 
127 |     with open(args.json_annotation_train, "r") as fp:
128 |         anno_file = json.load(fp)
129 | 
130 |     cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(anno_file["categories"])
131 |     del anno_file
132 | 
133 |     cfg.DATASETS.TRAIN = (f"{args.dataset_name}-train",)
134 |     cfg.DATASETS.TEST = (f"{args.dataset_name}-val",)
135 |     cfg.freeze()
136 |     default_setup(cfg, args)
137 |     return cfg
138 | 
139 | 
140 | def main(args):
141 |     # Register Datasets
142 |     register_coco_instances(
143 |         f"{args.dataset_name}-train",
144 |         {},
145 |         args.json_annotation_train,
146 |         args.image_path_train,
147 |     )
148 | 
149 |     register_coco_instances(
150 |         f"{args.dataset_name}-val", 
151 |         {}, 
152 |         args.json_annotation_val, 
153 |         args.image_path_val
154 |     )
155 |     cfg = setup(args)
156 | 
157 |     if args.eval_only:
158 |         model = Trainer.build_model(cfg)
159 |         DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
160 |             cfg.MODEL.WEIGHTS, resume=args.resume
161 |         )
162 |         res = Trainer.test(cfg, model)
163 | 
164 |         if cfg.TEST.AUG.ENABLED:
165 |             res.update(Trainer.test_with_TTA(cfg, model))
166 |         if comm.is_main_process():
167 |             verify_results(cfg, res)
168 | 
169 |         # Save the evaluation results
170 |         pd.DataFrame(res).to_csv(f"{cfg.OUTPUT_DIR}/eval.csv")
171 |         return res
172 | 
173 |     # Ensure that the Output directory exists
174 |     os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
175 | 
176 |     """
177 |     If you'd like to do anything fancier than the standard training logic,
178 |     consider writing your own training loop (see plain_train_net.py) or
179 |     subclassing the trainer.
180 |     """
181 |     trainer = Trainer(cfg)
182 |     trainer.resume_or_load(resume=args.resume)
183 |     trainer.register_hooks(
184 |         [hooks.EvalHook(0, lambda: trainer.eval_and_save(cfg, trainer.model))]
185 |     )
186 |     if cfg.TEST.AUG.ENABLED:
187 |         trainer.register_hooks(
188 |             [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))]
189 |         )
190 |     return trainer.train()
191 | 
192 | 
193 | if __name__ == "__main__":
194 |     parser = default_argument_parser()
195 | 
196 |     # Extra Configurations for dataset names and paths
197 |     parser.add_argument(
198 |         "--dataset_name", 
199 |         help="The Dataset Name")
200 |     parser.add_argument(
201 |         "--json_annotation_train",
202 |         help="The path to the training set JSON annotation",
203 |     )
204 |     parser.add_argument(
205 |         "--image_path_train",
206 |         help="The path to the training set image folder",
207 |     )
208 |     parser.add_argument(
209 |         "--json_annotation_val",
210 |         help="The path to the validation set JSON annotation",
211 |     )
212 |     parser.add_argument(
213 |         "--image_path_val",
214 |         help="The path to the validation set image folder",
215 |     )
216 |     args = parser.parse_args()
217 |     print("Command Line Args:", args)
218 | 
219 |     # Dataset Registration is moved to the main function to support multi-gpu training
220 |     # See ref https://github.com/facebookresearch/detectron2/issues/253#issuecomment-554216517
221 | 
222 |     launch(
223 |         main,
224 |         args.num_gpus,
225 |         num_machines=args.num_machines,
226 |         machine_rank=args.machine_rank,
227 |         dist_url=args.dist_url,
228 |         args=(args,),
229 |     )
230 | 


--------------------------------------------------------------------------------
/tools/convert_prima_to_coco.py:
--------------------------------------------------------------------------------
  1 | import os, re, json
  2 | import imagesize
  3 | from glob import glob
  4 | from bs4 import BeautifulSoup
  5 | import numpy as np
  6 | from PIL import Image
  7 | import argparse
  8 | from tqdm import tqdm
  9 | import sys
 10 | sys.path.append('..')
 11 | from utils import cocosplit
 12 | 
 13 | class NpEncoder(json.JSONEncoder):
 14 |     def default(self, obj):
 15 |         if isinstance(obj, np.integer):
 16 |             return int(obj)
 17 |         elif isinstance(obj, np.floating):
 18 |             return float(obj)
 19 |         elif isinstance(obj, np.ndarray):
 20 |             return obj.tolist()
 21 |         else:
 22 |             return super(NpEncoder, self).default(obj)
 23 | 
 24 | def cvt_coords_to_array(obj):
 25 |     
 26 |     return np.array(
 27 |             [(float(pt['x']), float(pt['y']))
 28 |                  for pt in obj.find_all("Point")]
 29 |         )
 30 | 
 31 | def cal_ployarea(points):
 32 |     x = points[:,0]
 33 |     y = points[:,1]
 34 |     return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)))
 35 | 
 36 | def _create_category(schema=0):
 37 | 
 38 |     if schema==0:
 39 |         
 40 |         categories = \
 41 |             [{"supercategory": "layout", "id": 0, "name": "Background"},
 42 |              {"supercategory": "layout", "id": 1, "name": "TextRegion"},
 43 |              {"supercategory": "layout", "id": 2, "name": "ImageRegion"},
 44 |              {"supercategory": "layout", "id": 3, "name": "TableRegion"},
 45 |              {"supercategory": "layout", "id": 4, "name": "MathsRegion"},
 46 |              {"supercategory": "layout", "id": 5, "name": "SeparatorRegion"},
 47 |              {"supercategory": "layout", "id": 6, "name": "OtherRegion"}]
 48 |         
 49 |         find_categories = lambda name: \
 50 |             [val["id"] for val in categories if val['name'] == name][0]
 51 |         
 52 |         conversion = \
 53 |             {
 54 |                 'TextRegion':       find_categories("TextRegion"),
 55 |                 'TableRegion':      find_categories("TableRegion"),
 56 |                 'MathsRegion':      find_categories("MathsRegion"),
 57 |                 'ChartRegion':      find_categories("ImageRegion"),
 58 |                 'GraphicRegion':    find_categories("ImageRegion"),
 59 |                 'ImageRegion':      find_categories("ImageRegion"),
 60 |                 'LineDrawingRegion':find_categories("OtherRegion"),
 61 |                 'SeparatorRegion':  find_categories("SeparatorRegion"),
 62 |                 'NoiseRegion':      find_categories("OtherRegion"),
 63 |                 'FrameRegion':      find_categories("OtherRegion"),
 64 |             }
 65 |         
 66 |         return categories, conversion
 67 | 
 68 | _categories, _categories_conversion = _create_category(schema=0)
 69 | 
 70 | _info = {
 71 |     "description": "PRIMA Layout Analysis Dataset",
 72 |     "url": "https://www.primaresearch.org/datasets/Layout_Analysis",
 73 |     "version": "1.0",
 74 |     "year": 2010,
 75 |     "contributor": "PRIMA Research",
 76 |     "date_created": "2020/09/01",
 77 | }
 78 | 
 79 | def _load_soup(filename):
 80 |     with open(filename, "r") as fp:
 81 |         soup = BeautifulSoup(fp.read(),'xml')
 82 |     
 83 |     return soup
 84 | 
 85 | def _image_template(image_id, image_path):
 86 |     
 87 |     width, height = imagesize.get(image_path)
 88 |     
 89 |     return {
 90 |         "file_name": os.path.basename(image_path),
 91 |         "height": height,
 92 |         "width": width,
 93 |         "id": int(image_id)
 94 |     }
 95 |     
 96 | def _anno_template(anno_id, image_id, pts, obj_tag):
 97 | 
 98 |     x_1, x_2 = pts[:,0].min(), pts[:,0].max()
 99 |     y_1, y_2 = pts[:,1].min(), pts[:,1].max()
100 |     height = y_2 - y_1
101 |     width  = x_2 - x_1
102 |     
103 |     return {
104 |         "segmentation": [pts.flatten().tolist()],
105 |         "area": cal_ployarea(pts),
106 |         "iscrowd": 0,
107 |         "image_id": image_id,
108 |         "bbox": [x_1, y_1, width, height],
109 |         "category_id": _categories_conversion[obj_tag],
110 |         "id": anno_id
111 |     }
112 | 
113 | class PRIMADataset():
114 |     
115 |     def __init__(self, base_path, anno_path='XML',
116 |                                   image_path='Images'):
117 |         
118 |         self.base_path = base_path
119 |         self.anno_path = os.path.join(base_path, anno_path)
120 |         self.image_path = os.path.join(base_path, image_path)
121 |         
122 |         self._ids = self.find_all_image_ids()
123 |     
124 |     def __len__(self):
125 |         return len(self.ids)
126 |     
127 |     def __getitem__(self, idx):
128 |         return self.load_image_and_annotaiton(idx)
129 |     
130 |     def find_all_annotation_files(self):
131 |         return glob(os.path.join(self.anno_path, '*.xml'))
132 |     
133 |     def find_all_image_ids(self):
134 |         replacer = lambda s: os.path.basename(s).replace('pc-', '').replace('.xml', '')
135 |         return [replacer(s) for s in self.find_all_annotation_files()]
136 |     
137 |     def load_image_and_annotaiton(self, idx):
138 |         
139 |         image_id = self._ids[idx]
140 |         
141 |         image_path = os.path.join(self.image_path, f'{image_id}.tif')
142 |         image = Image.open(image_path)
143 |         
144 |         anno = self.load_annotation(idx)
145 |         
146 |         return image, anno
147 | 
148 |     def load_annotation(self, idx):
149 |         image_id = self._ids[idx]
150 | 
151 |         anno_path  = os.path.join(self.anno_path,  f'pc-{image_id}.xml')
152 |         # A dirtly hack to load the files w/wo pc- simualtaneously
153 |         if not os.path.exists(anno_path):
154 |             anno_path = os.path.join(self.anno_path,  f'{image_id}.xml')
155 |             assert os.path.exists(anno_path), "Invalid path"
156 |         anno = _load_soup(anno_path)
157 | 
158 |         return anno
159 | 
160 |     def convert_to_COCO(self, save_path):
161 |         
162 |         all_image_infos = []
163 |         all_anno_infos  = []
164 |         anno_id = 0
165 |         
166 |         for idx, image_id in enumerate(tqdm(self._ids)):
167 |             
168 |             # We use the idx as the image id
169 |             
170 |             image_path = os.path.join(self.image_path, f'{image_id}.tif')
171 |             image_info = _image_template(idx, image_path)
172 |             all_image_infos.append(image_info)
173 |             
174 |             anno = self.load_annotation(idx)
175 | 
176 |             for item in anno.find_all(re.compile(".*Region")):
177 |                 
178 |                 pts = cvt_coords_to_array(item.Coords)
179 |                 if 0 not in pts.shape:
180 |                     # Sometimes there will be polygons with less
181 |                     # than 4 edges, and they could not be appropriately 
182 |                     # handled by the COCO format. So we just drop them. 
183 |                     if pts.shape[0] >= 4:
184 |                         anno_info = _anno_template(anno_id, idx, pts, item.name)
185 |                         all_anno_infos.append(anno_info)
186 |                         anno_id += 1
187 |         
188 |             
189 |         final_annotation = {
190 |             "info": _info,
191 |             "licenses": [],
192 |             "images": all_image_infos,
193 |             "annotations": all_anno_infos,
194 |             "categories": _categories} 
195 |         
196 |         with open(save_path, 'w') as fp:
197 |             json.dump(final_annotation, fp, cls=NpEncoder)
198 |         
199 |         return final_annotation
200 | 
201 | 
202 | parser = argparse.ArgumentParser()
203 | 
204 | parser.add_argument('--prima_datapath', type=str, default='./data/prima', help='the path to the prima data folders')
205 | parser.add_argument('--anno_savepath',  type=str, default='./annotations.json', help='the path to save the new annotations')
206 | 
207 | 
208 | if __name__ == "__main__":
209 |     args = parser.parse_args()
210 | 
211 |     print("Start running the conversion script")
212 |     
213 |     print(f"Loading the information from the path {args.prima_datapath}")
214 |     dataset = PRIMADataset(args.prima_datapath)
215 |     
216 |     print(f"Saving the annotation to {args.anno_savepath}")
217 |     res = dataset.convert_to_COCO(args.anno_savepath)
218 | 
219 |     cocosplit.main(
220 |         args.anno_savepath,
221 |         split_ratio=0.8,
222 |         having_annotations=True, 
223 |         train_save_path=args.anno_savepath.replace('.json', '-train.json'),
224 |         test_save_path=args.anno_savepath.replace('.json', '-val.json'),
225 |         random_state=24)


--------------------------------------------------------------------------------