├── .gitmodules ├── .idea ├── .gitignore ├── ensembleObjectDetection.iml ├── inspectionProfiles │ ├── Project_Default.xml │ └── profiles_settings.xml ├── modules.xml └── vcs.xml ├── Ensemble ├── README.md ├── ensemble.py ├── example │ ├── 1 │ │ ├── 2007_000027.xml │ │ ├── 2007_000032.xml │ │ ├── 2007_000033.xml │ │ ├── 2007_000039.xml │ │ └── 2007_000042.xml │ ├── 2 │ │ ├── 2007_000027.xml │ │ ├── 2007_000032.xml │ │ ├── 2007_000033.xml │ │ ├── 2007_000039.xml │ │ └── 2007_000042.xml │ └── 3 │ │ ├── 2007_000027.xml │ │ ├── 2007_000032.xml │ │ ├── 2007_000033.xml │ │ ├── 2007_000039.xml │ │ └── 2007_000042.xml ├── generateXML.py └── main.py ├── LICENSE ├── README.md ├── TestTimeAugmentation ├── EfficientDet │ ├── LICENSE │ ├── README.md │ ├── __init__.py │ ├── augmentor │ │ ├── __init__.py │ │ ├── color.py │ │ ├── misc.py │ │ └── transform.py │ ├── efficientnet.py │ ├── eval │ │ ├── __init__.py │ │ ├── coco.py │ │ ├── common.py │ │ └── pascal.py │ ├── generators │ │ ├── __init__.py │ │ ├── coco.py │ │ ├── common.py │ │ ├── csv_.py │ │ └── pascal.py │ ├── inference.py │ ├── initializers.py │ ├── keras_.py │ ├── layers.py │ ├── losses.py │ ├── model.py │ ├── requirements.txt │ ├── test │ │ ├── 000004.jpg │ │ ├── 000010.jpg │ │ └── 000014.jpg │ ├── tfkeras.py │ ├── train.py │ └── utils │ │ ├── __init__.py │ │ ├── anchors.py │ │ ├── colors.py │ │ ├── compute_overlap.cpython-36m-x86_64-linux-gnu.so │ │ ├── compute_overlap.pyx │ │ ├── image.py │ │ ├── transform.py │ │ └── visualization.py ├── FSAF │ ├── README.md │ ├── augmentor │ │ ├── __init__.py │ │ ├── color.py │ │ ├── misc.py │ │ └── transform.py │ ├── callbacks.py │ ├── configure.py │ ├── fsaf_layers.py │ ├── generators │ │ ├── __init__.py │ │ ├── coco_generator.py │ │ ├── csv_generator.py │ │ ├── generator.py │ │ └── voc_generator.py │ ├── inference.py │ ├── initializers.py │ ├── layers.py │ ├── losses.py │ ├── models │ │ ├── __init__.py │ │ ├── densenet.py │ │ ├── mobilenet.py │ │ ├── resnet.py │ │ ├── retinanet.py │ │ └── vgg.py │ ├── requirements.txt │ ├── setup.py │ ├── test │ │ ├── 004456.jpg │ │ ├── 005770.jpg │ │ └── 006408.jpg │ ├── train.py │ ├── util_graphs.py │ ├── utils │ │ ├── __init__.py │ │ ├── anchors.py │ │ ├── coco_eval.py │ │ ├── colors.py │ │ ├── compute_overlap.cpython-36m-x86_64-linux-gnu.so │ │ ├── compute_overlap.pyx │ │ ├── config.py │ │ ├── eval.py │ │ ├── image.py │ │ ├── keras_version.py │ │ ├── model.py │ │ ├── transform.py │ │ └── visualization.py │ └── yolo │ │ ├── README.md │ │ ├── __init__.py │ │ ├── config.py │ │ ├── eval │ │ ├── __init__.py │ │ ├── coco.py │ │ ├── common.py │ │ └── pascal.py │ │ ├── fsaf_layers.py │ │ ├── generators │ │ ├── __init__.py │ │ ├── coco.py │ │ ├── common.py │ │ ├── csv_.py │ │ └── pascal.py │ │ ├── inference.py │ │ ├── model.py │ │ └── train.py ├── README.md ├── __pycache__ │ ├── detect.cpython-36.pyc │ ├── ensemble.cpython-36.pyc │ ├── ensembleOptions.cpython-36.pyc │ ├── function.cpython-36.pyc │ ├── generateXML.cpython-36.pyc │ ├── predict_batch.cpython-36.pyc │ ├── predict_batch_rcnn.cpython-36.pyc │ ├── predict_batch_retinanet.cpython-36.pyc │ ├── techniques.cpython-36.pyc │ └── testTimeAugmentation.cpython-36.pyc ├── detect.py ├── ensemble.py ├── ensembleOptions.py ├── function.py ├── generateXML.py ├── kerasfcos │ ├── README.md │ ├── __init__.py │ ├── callbacks.py │ ├── generators │ │ ├── __init__.py │ │ ├── csv_generator.py │ │ ├── generator.py │ │ ├── test_generator.py │ │ └── voc_generator.py │ ├── inference.py │ ├── initializers.py │ ├── layers.py │ ├── losses.py │ ├── models │ │ ├── __init__.py │ │ ├── densenet.py │ │ ├── mobilenet.py │ │ ├── resnet.py │ │ ├── retinanet.py │ │ └── vgg.py │ ├── requirements.txt │ ├── setup.py │ ├── test │ │ ├── 005360.jpg │ │ ├── 2010_003345.jpg │ │ └── 2012_000949.jpg │ ├── train.py │ ├── utils │ │ ├── __init__.py │ │ ├── anchors.py │ │ ├── coco_eval.py │ │ ├── colors.py │ │ ├── compute_overlap.cpython-36m-x86_64-linux-gnu.so │ │ ├── compute_overlap.pyx │ │ ├── config.py │ │ ├── eval.py │ │ ├── image.py │ │ ├── keras_version.py │ │ ├── model.py │ │ ├── transform.py │ │ └── visualization.py │ └── utils_graph.py ├── mainModel.py ├── mainTTA.py ├── predict_batch.py ├── predict_batch_FCOS.py ├── predict_batch_FSAF.py ├── predict_batch_efficient.py ├── predict_batch_rcnn.py ├── predict_batch_retinanet.py ├── techniques.py └── testTimeAugmentation.py ├── images ├── diagramaClases.jpg ├── ensemble.jpg ├── images.md └── testTimeAugm.jpg └── requirements.txt /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/.gitmodules -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /.idea/ensembleObjectDetection.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 12 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /Ensemble/README.md: -------------------------------------------------------------------------------- 1 | # Ensemble methods 2 | 3 | This code allows us to apply the ensemble method given a folder containing folders with the corresponding xmls and indicating 4 | the option we want to use. 5 | 6 | ### Ensemble Options 7 | You can be taken using three different voting strategies: 8 | * Affirmative. This means that whenever one of the methods that produce the 9 | initial predictions says that a region contains an object, such a detection is considered as valid. 10 | * Consensus. This means that the majority of the initial methods must agree to consider that a region contains an object. The consensus strategy is analogous to the majority voting strategy commonly applied in ensemble methods for images classification. 11 | * Unanimous. This means that all the methods must agree to consider that a region contains an object. 12 | 13 | ### Executed 14 | To execute the code we use the following instruction. 15 | ```bash 16 | python main.py -d pathOfDataset -o option 17 | ``` 18 | ### Example 19 | An example of its use would be the following. Given the examples folder that in turn contains folders with the xmls files, we would execute the following instruction (suppose we choose the consensus option). 20 | ```bash 21 | python main.py -d example -o consensus 22 | ``` 23 | -------------------------------------------------------------------------------- /Ensemble/example/1/2007_000027.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | images 4 | 2_0_2007_000027 5 | Images//../salida/histo/tmp/2_0_2007_000027.jpg 6 | 7 | Unknown 8 | 9 | 10 | 486 11 | 500 12 | 3 13 | 14 | 0 15 | 16 | person 17 | Unspecified 18 | 0 19 | 0 20 | 0.9999385 21 | 22 | 169 23 | 100 24 | 356 25 | 358 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /Ensemble/example/1/2007_000032.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | images 4 | 3_0_2007_000032 5 | Images//../salida/histo/tmp/3_0_2007_000032.jpg 6 | 7 | Unknown 8 | 9 | 10 | 500 11 | 281 12 | 3 13 | 14 | 0 15 | 16 | aeroplane 17 | Unspecified 18 | 0 19 | 0 20 | 0.99776435 21 | 22 | 131 23 | 89 24 | 197 25 | 117 26 | 27 | 28 | 29 | person 30 | Unspecified 31 | 0 32 | 0 33 | 0.9977574 34 | 35 | 196 36 | 180 37 | 212 38 | 228 39 | 40 | 41 | 42 | aeroplane 43 | Unspecified 44 | 0 45 | 0 46 | 0.9972204 47 | 48 | 117 49 | 67 50 | 392 51 | 210 52 | 53 | 54 | 55 | person 56 | Unspecified 57 | 0 58 | 0 59 | 0.9954644 60 | 61 | 24 62 | 187 63 | 44 64 | 231 65 | 66 | 67 | 68 | truck 69 | Unspecified 70 | 0 71 | 0 72 | 0.98245126 73 | 74 | 12 75 | 128 76 | 48 77 | 145 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /Ensemble/example/1/2007_000033.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | images 4 | 4_0_2007_000033 5 | Images//../salida/histo/tmp/4_0_2007_000033.jpg 6 | 7 | Unknown 8 | 9 | 10 | 500 11 | 366 12 | 3 13 | 14 | 0 15 | 16 | aeroplane 17 | Unspecified 18 | 0 19 | 0 20 | 0.99991727 21 | 22 | 0 23 | 104 24 | 485 25 | 253 26 | 27 | 28 | 29 | aeroplane 30 | Unspecified 31 | 0 32 | 0 33 | 0.99755836 34 | 35 | 328 36 | 188 37 | 404 38 | 225 39 | 40 | 41 | 42 | aeroplane 43 | Unspecified 44 | 0 45 | 0 46 | 0.99208516 47 | 48 | 419 49 | 201 50 | 476 51 | 222 52 | 53 | 54 | 55 | aeroplane 56 | Unspecified 57 | 0 58 | 0 59 | 0.9736789 60 | 61 | 1 62 | 190 63 | 47 64 | 216 65 | 66 | 67 | 68 | truck 69 | Unspecified 70 | 0 71 | 0 72 | 0.88334656 73 | 74 | 1 75 | 228 76 | 28 77 | 330 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /Ensemble/example/1/2007_000039.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | images 4 | 5_0_2007_000039 5 | Images//../salida/histo/tmp/5_0_2007_000039.jpg 6 | 7 | Unknown 8 | 9 | 10 | 500 11 | 375 12 | 3 13 | 14 | 0 15 | 16 | tvmonitor 17 | Unspecified 18 | 0 19 | 0 20 | 0.9993581 21 | 22 | 166 23 | 88 24 | 346 25 | 276 26 | 27 | 28 | 29 | keyboard 30 | Unspecified 31 | 0 32 | 0 33 | 0.9938328 34 | 35 | 121 36 | 260 37 | 343 38 | 372 39 | 40 | 41 | 42 | refrigerator 43 | Unspecified 44 | 0 45 | 0 46 | 0.92890877 47 | 48 | 2 49 | 3 50 | 179 51 | 370 52 | 53 | 54 | 55 | bowl 56 | Unspecified 57 | 0 58 | 0 59 | 0.8038953 60 | 61 | 416 62 | 315 63 | 482 64 | 362 65 | 66 | 67 | 68 | tvmonitor 69 | Unspecified 70 | 0 71 | 0 72 | 0.7665141 73 | 74 | 24 75 | 28 76 | 320 77 | 339 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /Ensemble/example/1/2007_000042.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | images 4 | 0_0_2007_000042 5 | Images//../salida/histo/tmp/0_0_2007_000042.jpg 6 | 7 | Unknown 8 | 9 | 10 | 500 11 | 335 12 | 3 13 | 14 | 0 15 | 16 | train 17 | Unspecified 18 | 0 19 | 0 20 | 0.9970091 21 | 22 | 285 23 | 40 24 | 500 25 | 232 26 | 27 | 28 | 29 | train 30 | Unspecified 31 | 0 32 | 0 33 | 0.9925662 34 | 35 | 2 36 | 36 37 | 315 38 | 276 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /Ensemble/example/2/2007_000027.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | images 4 | 2_0_2007_000027 5 | Images//../salida/none/tmp/2_0_2007_000027.jpg 6 | 7 | Unknown 8 | 9 | 10 | 486 11 | 500 12 | 3 13 | 14 | 0 15 | 16 | person 17 | Unspecified 18 | 0 19 | 0 20 | 0.9999583 21 | 22 | 169 23 | 100 24 | 347 25 | 359 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /Ensemble/example/2/2007_000032.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | images 4 | 3_0_2007_000032 5 | Images//../salida/none/tmp/3_0_2007_000032.jpg 6 | 7 | Unknown 8 | 9 | 10 | 500 11 | 281 12 | 3 13 | 14 | 0 15 | 16 | aeroplane 17 | Unspecified 18 | 0 19 | 0 20 | 0.99783045 21 | 22 | 100 23 | 75 24 | 385 25 | 207 26 | 27 | 28 | 29 | person 30 | Unspecified 31 | 0 32 | 0 33 | 0.9972447 34 | 35 | 22 36 | 188 37 | 45 38 | 236 39 | 40 | 41 | 42 | aeroplane 43 | Unspecified 44 | 0 45 | 0 46 | 0.9972248 47 | 48 | 131 49 | 87 50 | 200 51 | 117 52 | 53 | 54 | 55 | person 56 | Unspecified 57 | 0 58 | 0 59 | 0.995362 60 | 61 | 196 62 | 179 63 | 211 64 | 228 65 | 66 | 67 | 68 | truck 69 | Unspecified 70 | 0 71 | 0 72 | 0.9789924 73 | 74 | 16 75 | 128 76 | 46 77 | 144 78 | 79 | 80 | 81 | boat 82 | Unspecified 83 | 0 84 | 0 85 | 0.8549495 86 | 87 | 0 88 | 125 89 | 16 90 | 144 91 | 92 | 93 | 94 | aeroplane 95 | Unspecified 96 | 0 97 | 0 98 | 0.8142854 99 | 100 | 115 101 | 80 102 | 323 103 | 133 104 | 105 | 106 | 107 | dog 108 | Unspecified 109 | 0 110 | 0 111 | 0.73535573 112 | 113 | 97 114 | 220 115 | 112 116 | 229 117 | 118 | 119 | 120 | -------------------------------------------------------------------------------- /Ensemble/example/2/2007_000033.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | images 4 | 4_0_2007_000033 5 | Images//../salida/none/tmp/4_0_2007_000033.jpg 6 | 7 | Unknown 8 | 9 | 10 | 500 11 | 366 12 | 3 13 | 14 | 0 15 | 16 | aeroplane 17 | Unspecified 18 | 0 19 | 0 20 | 0.99982256 21 | 22 | 0 23 | 105 24 | 481 25 | 253 26 | 27 | 28 | 29 | aeroplane 30 | Unspecified 31 | 0 32 | 0 33 | 0.99537575 34 | 35 | 416 36 | 200 37 | 476 38 | 222 39 | 40 | 41 | 42 | aeroplane 43 | Unspecified 44 | 0 45 | 0 46 | 0.9935741 47 | 48 | 324 49 | 188 50 | 406 51 | 224 52 | 53 | 54 | 55 | truck 56 | Unspecified 57 | 0 58 | 0 59 | 0.98684734 60 | 61 | 1 62 | 230 63 | 30 64 | 332 65 | 66 | 67 | 68 | aeroplane 69 | Unspecified 70 | 0 71 | 0 72 | 0.972013 73 | 74 | 1 75 | 191 76 | 47 77 | 216 78 | 79 | 80 | 81 | aeroplane 82 | Unspecified 83 | 0 84 | 0 85 | 0.7804272 86 | 87 | 380 88 | 196 89 | 412 90 | 221 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /Ensemble/example/2/2007_000039.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | images 4 | 5_0_2007_000039 5 | Images//../salida/none/tmp/5_0_2007_000039.jpg 6 | 7 | Unknown 8 | 9 | 10 | 500 11 | 375 12 | 3 13 | 14 | 0 15 | 16 | tvmonitor 17 | Unspecified 18 | 0 19 | 0 20 | 0.99891627 21 | 22 | 162 23 | 89 24 | 345 25 | 277 26 | 27 | 28 | 29 | keyboard 30 | Unspecified 31 | 0 32 | 0 33 | 0.9976534 34 | 35 | 115 36 | 258 37 | 341 38 | 372 39 | 40 | 41 | 42 | refrigerator 43 | Unspecified 44 | 0 45 | 0 46 | 0.90029246 47 | 48 | 4 49 | 3 50 | 213 51 | 368 52 | 53 | 54 | 55 | sink 56 | Unspecified 57 | 0 58 | 0 59 | 0.8643521 60 | 61 | 343 62 | 240 63 | 485 64 | 357 65 | 66 | 67 | 68 | bowl 69 | Unspecified 70 | 0 71 | 0 72 | 0.70929754 73 | 74 | 416 75 | 316 76 | 482 77 | 362 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /Ensemble/example/2/2007_000042.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | images 4 | 0_0_2007_000042 5 | Images//../salida/none/tmp/0_0_2007_000042.jpg 6 | 7 | Unknown 8 | 9 | 10 | 500 11 | 335 12 | 3 13 | 14 | 0 15 | 16 | train 17 | Unspecified 18 | 0 19 | 0 20 | 0.9985753 21 | 22 | 279 23 | 39 24 | 500 25 | 249 26 | 27 | 28 | 29 | train 30 | Unspecified 31 | 0 32 | 0 33 | 0.99792457 34 | 35 | 0 36 | 34 37 | 239 38 | 293 39 | 40 | 41 | 42 | boat 43 | Unspecified 44 | 0 45 | 0 46 | 0.7483044 47 | 48 | 2 49 | 45 50 | 212 51 | 77 52 | 53 | 54 | 55 | train 56 | Unspecified 57 | 0 58 | 0 59 | 0.71809465 60 | 61 | 352 62 | 233 63 | 493 64 | 295 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /Ensemble/example/3/2007_000027.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | images 4 | 2_0_2007_000027 5 | Images//../salida/vflip/tmp/2_0_2007_000027.jpg 6 | 7 | Unknown 8 | 9 | 10 | 486 11 | 500 12 | 3 13 | 14 | 0 15 | 16 | person 17 | Unspecified 18 | 0 19 | 0 20 | 0.99977356 21 | 22 | 170 23 | 95 24 | 341 25 | 338 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /Ensemble/example/3/2007_000032.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | images 4 | 3_0_2007_000032 5 | Images//../salida/vflip/tmp/3_0_2007_000032.jpg 6 | 7 | Unknown 8 | 9 | 10 | 500 11 | 281 12 | 3 13 | 14 | 0 15 | 16 | aeroplane 17 | Unspecified 18 | 0 19 | 0 20 | 0.8808234 21 | 22 | 84 23 | 95 24 | 406 25 | 243 26 | 27 | 28 | 29 | aeroplane 30 | Unspecified 31 | 0 32 | 0 33 | 0.8340644 34 | 35 | 107 36 | 95 37 | 282 38 | 167 39 | 40 | 41 | 42 | person 43 | Unspecified 44 | 0 45 | 0 46 | 0.77157795 47 | 48 | 24 49 | 187 50 | 52 51 | 237 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /Ensemble/example/3/2007_000033.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | images 4 | 4_0_2007_000033 5 | Images//../salida/vflip/tmp/4_0_2007_000033.jpg 6 | 7 | Unknown 8 | 9 | 10 | 500 11 | 366 12 | 3 13 | 14 | 0 15 | 16 | aeroplane 17 | Unspecified 18 | 0 19 | 0 20 | 0.948219 21 | 22 | 29 23 | 114 24 | 376 25 | 342 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /Ensemble/example/3/2007_000039.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | images 4 | 5_0_2007_000039 5 | Images//../salida/vflip/tmp/5_0_2007_000039.jpg 6 | 7 | Unknown 8 | 9 | 10 | 500 11 | 375 12 | 3 13 | 14 | 0 15 | 16 | tvmonitor 17 | Unspecified 18 | 0 19 | 0 20 | 0.94501233 21 | 22 | 159 23 | 89 24 | 346 25 | 264 26 | 27 | 28 | 29 | keyboard 30 | Unspecified 31 | 0 32 | 0 33 | 0.9448636 34 | 35 | 115 36 | 255 37 | 339 38 | 364 39 | 40 | 41 | 42 | chair 43 | Unspecified 44 | 0 45 | 0 46 | 0.92722976 47 | 48 | 203 49 | 1 50 | 344 51 | 44 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /Ensemble/example/3/2007_000042.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | images 4 | 0_0_2007_000042 5 | Images//../salida/vflip/tmp/0_0_2007_000042.jpg 6 | 7 | Unknown 8 | 9 | 10 | 500 11 | 335 12 | 3 13 | 14 | 0 15 | 16 | train 17 | Unspecified 18 | 0 19 | 0 20 | 0.9850043 21 | 22 | 5 23 | 37 24 | 500 25 | 287 26 | 27 | 28 | 29 | bench 30 | Unspecified 31 | 0 32 | 0 33 | 0.73901737 34 | 35 | 6 36 | 3 37 | 500 38 | 58 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /Ensemble/generateXML.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | import xml.etree.ElementTree as ET 3 | from xml.dom import minidom 4 | 5 | 6 | def prettify(elem): 7 | """Return a pretty-printed XML string for the Element. 8 | """ 9 | rough_string = ET.tostring(elem, 'utf-8') 10 | reparsed = minidom.parseString(rough_string) 11 | return reparsed.toprettyxml(indent=" ") 12 | 13 | def generateXML(filename,outputPath,w,h,d,boxes): 14 | top = ET.Element('annotation') 15 | childFolder = ET.SubElement(top, 'folder') 16 | childFolder.text = 'images' 17 | childFilename = ET.SubElement(top, 'filename') 18 | childFilename.text = filename[0:filename.rfind(".")] 19 | childPath = ET.SubElement(top, 'path') 20 | childPath.text = outputPath + "/" + filename 21 | childSource = ET.SubElement(top, 'source') 22 | childDatabase = ET.SubElement(childSource, 'database') 23 | childDatabase.text = 'Unknown' 24 | childSize = ET.SubElement(top, 'size') 25 | childWidth = ET.SubElement(childSize, 'width') 26 | childWidth.text = str(w) 27 | childHeight = ET.SubElement(childSize, 'height') 28 | childHeight.text = str(h) 29 | childDepth = ET.SubElement(childSize, 'depth') 30 | childDepth.text = str(d) 31 | childSegmented = ET.SubElement(top, 'segmented') 32 | childSegmented.text = str(0) 33 | for box in boxes: 34 | category = box[0] 35 | (x,y,xmax,ymax) = box[1:5] 36 | childObject = ET.SubElement(top, 'object') 37 | childName = ET.SubElement(childObject, 'name') 38 | childName.text = category 39 | childPose = ET.SubElement(childObject, 'pose') 40 | childPose.text = 'Unspecified' 41 | childTruncated = ET.SubElement(childObject, 'truncated') 42 | childTruncated.text = '0' 43 | childDifficult = ET.SubElement(childObject, 'difficult') 44 | childDifficult.text = '0' 45 | childConfidence = ET.SubElement(childObject, 'confidence') 46 | childConfidence.text = box[5] 47 | childBndBox = ET.SubElement(childObject, 'bndbox') 48 | childXmin = ET.SubElement(childBndBox, 'xmin') 49 | childXmin.text = str(x[0:-2]) 50 | childYmin = ET.SubElement(childBndBox, 'ymin') 51 | childYmin.text = str(y[0:-2]) 52 | childXmax = ET.SubElement(childBndBox, 'xmax') 53 | childXmax.text = str(xmax[0:-2]) 54 | childYmax = ET.SubElement(childBndBox, 'ymax') 55 | childYmax.text = str(ymax[0:-2]) 56 | return prettify(top) 57 | 58 | 59 | -------------------------------------------------------------------------------- /Ensemble/main.py: -------------------------------------------------------------------------------- 1 | import ensemble 2 | import argparse 3 | import numpy as np 4 | import generateXML 5 | import glob 6 | from lxml import etree 7 | import os 8 | import math 9 | 10 | ap = argparse.ArgumentParser() 11 | ap.add_argument("-d", "--dataset", required=True,help="path to the dataset of images") 12 | ap.add_argument("-o", "--option", required=True,help="option to the ensemble: affirmative, consensus or unanimous") 13 | args = vars(ap.parse_args()) 14 | 15 | #read the arguments 16 | datasetPath= args["dataset"] 17 | option = args["option"] 18 | 19 | #we get a list that contains as many pairs as there are xmls in the first folder, these pairs indicate first the 20 | #name of the xml file and then contains a list with all the objects of the xmls 21 | boxes = ensemble.listBoxes(datasetPath) 22 | 23 | for nombre,lis in boxes: 24 | pick = [] 25 | resul = [] 26 | 27 | #we check if the output folder exists 28 | if os.path.exists(datasetPath+"/output") == False: 29 | os.mkdir(datasetPath+"/output") 30 | equalFiles = glob.glob(datasetPath + '/*/' + nombre+'.xml') 31 | file = open(datasetPath+"/output/"+nombre+".xml", "w") 32 | numFich = len(equalFiles) 33 | if equalFiles[0].find("/output/")>0: 34 | doc = etree.parse(equalFiles[1]) 35 | else: 36 | doc = etree.parse(equalFiles[0]) 37 | filename = doc.getroot() # we look for the root of our xml 38 | wI = filename.find("size").find("width").text 39 | hI = filename.find("size").find("height").text 40 | d = filename.find("size").find("depth").text 41 | box = ensemble.uneBoundingBoxes(lis) 42 | #apply the corresponging technique 43 | for rectangles in box: 44 | list1 = [] 45 | 46 | for rc in rectangles: 47 | list1.append(rc) 48 | pick = [] 49 | 50 | if option == 'consensus': 51 | if len(np.array(list1))>=math.ceil(numFich/2): 52 | pick,prob = ensemble.nonMaximumSuppression(np.array(list1), 0.3) 53 | pick[0][5] = prob/numFich 54 | 55 | 56 | elif option == 'unanimous': 57 | if len(np.array(list1))==numFich: 58 | pick,prob = ensemble.nonMaximumSuppression(np.array(list1), 0.3) 59 | pick[0][5] = prob / numFich 60 | 61 | elif option == 'affirmative': 62 | pick,prob = ensemble.nonMaximumSuppression(np.array(list1), 0.3) 63 | pick[0][5] = prob / numFich 64 | 65 | if len(pick)!=0: 66 | resul.append(list(pick[0])) 67 | file.write(generateXML.generateXML(nombre, "", wI, hI, d, resul)) 68 | file.close() 69 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Ángela Casado García 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /TestTimeAugmentation/EfficientDet/README.md: -------------------------------------------------------------------------------- 1 | # EfficientDet 2 | This is a fork of the [xuannianz](https://github.com/xuannianz/EfficientDet) implementation of [EfficientDet](https://arxiv.org/pdf/1911.09070.pdf) for object detection on Keras and Tensorflow. The project is based on [fizyr/keras-retinanet](https://github.com/fizyr/keras-retinanet) 3 | and the [qubvel/efficientnet](https://github.com/qubvel/efficientnet). 4 | The pretrained EfficientNet weights files are downloaded from [Callidior/keras-applications/releases](https://github.com/Callidior/keras-applications/releases) 5 | 6 | Thanks for their hard work. 7 | This project is released under the Apache License. Please take their licenses into consideration too when use this project. 8 | 9 | ## Train 10 | ### build dataset 11 | 1. Pascal VOC 12 | * Download VOC2007 and VOC2012, copy all image files from VOC2007 to VOC2012. 13 | * Append VOC2007 train.txt to VOC2012 trainval.txt. 14 | * Overwrite VOC2012 val.txt by VOC2007 val.txt. 15 | 2. MSCOCO 2017 16 | * Download images and annotations of coco 2017 17 | * Copy all images into datasets/coco/images, all annotations into datasets/coco/annotations 18 | 3. Other types please refer to [fizyr/keras-retinanet](https://github.com/fizyr/keras-retinanet)) 19 | ### train 20 | * STEP1: `python3 train.py --snapshot imagenet --phi {0, 1, 2, 3, 4, 5, 6} --gpu 0 --random-transform --compute-val-loss --freeze-backbone --batch-size 32 --steps 1000 pascal|coco datasets/VOC2012|datasets/coco` to start training. The init lr is 1e-3. 21 | * STEP2: `python3 train.py --snapshot xxx.h5 --phi {0, 1, 2, 3, 4, 5, 6} --gpu 0 --random-transform --compute-val-loss --freeze-bn --batch-size 4 --steps 10000 pascal|coco datasets/VOC2012|datasets/coco` to start training when val mAP can not increase during STEP1. The init lr is 1e-4 and decays to 1e-5 when val mAP keeps dropping down. 22 | ## Evaluate 23 | 1. PASCAL VOC 24 | * `python3 eval/common.py` to evaluate pascal model by specifying model path there. 25 | * The best evaluation results (score_threshold=0.01, mAP50) on VOC2007 test are: 26 | 27 | | phi | 0 | 1 | 28 | | ---- | ---- | ---- | 29 | | w/o weighted | | [0.8029](https://drive.google.com/open?id=1-QkMq56w4dZOTQUnbitF53NKEiNF9F_Q) | 30 | | w/ weighted | [0.7892](https://drive.google.com/open?id=1mrqL9rFoYW-4Jc57MsTipkvOTRy_EGfe) | | 31 | 2. MSCOCO 32 | * `python3 eval/coco.py` to evaluate coco model by specifying model path there. 33 | ## Test 34 | `python3 inference.py` to test your image by specifying image path and model path there. 35 | 36 | ![image1](test/000004.jpg) 37 | ![image2](test/000010.jpg) 38 | ![image3](test/000014.jpg) 39 | -------------------------------------------------------------------------------- /TestTimeAugmentation/EfficientDet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/EfficientDet/__init__.py -------------------------------------------------------------------------------- /TestTimeAugmentation/EfficientDet/augmentor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/EfficientDet/augmentor/__init__.py -------------------------------------------------------------------------------- /TestTimeAugmentation/EfficientDet/eval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/EfficientDet/eval/__init__.py -------------------------------------------------------------------------------- /TestTimeAugmentation/EfficientDet/eval/pascal.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | # import keras 18 | from tensorflow import keras 19 | 20 | from eval.common import evaluate 21 | 22 | 23 | class Evaluate(keras.callbacks.Callback): 24 | """ 25 | Evaluation callback for arbitrary datasets. 26 | """ 27 | 28 | def __init__( 29 | self, 30 | generator, 31 | model, 32 | iou_threshold=0.5, 33 | score_threshold=0.01, 34 | max_detections=100, 35 | save_path=None, 36 | tensorboard=None, 37 | weighted_average=False, 38 | verbose=1 39 | ): 40 | """ 41 | Evaluate a given dataset using a given model at the end of every epoch during training. 42 | 43 | Args: 44 | generator: The generator that represents the dataset to evaluate. 45 | iou_threshold: The threshold used to consider when a detection is positive or negative. 46 | score_threshold: The score confidence threshold to use for detections. 47 | max_detections: The maximum number of detections to use per image. 48 | save_path: The path to save images with visualized detections to. 49 | tensorboard: Instance of keras.callbacks.TensorBoard used to log the mAP value. 50 | weighted_average: Compute the mAP using the weighted average of precisions among classes. 51 | verbose: Set the verbosity level, by default this is set to 1. 52 | """ 53 | self.generator = generator 54 | self.iou_threshold = iou_threshold 55 | self.score_threshold = score_threshold 56 | self.max_detections = max_detections 57 | self.save_path = save_path 58 | self.tensorboard = tensorboard 59 | self.weighted_average = weighted_average 60 | self.verbose = verbose 61 | self.active_model = model 62 | 63 | super(Evaluate, self).__init__() 64 | 65 | def on_epoch_end(self, epoch, logs=None): 66 | logs = logs or {} 67 | 68 | # run evaluation 69 | average_precisions = evaluate( 70 | self.generator, 71 | self.active_model, 72 | iou_threshold=self.iou_threshold, 73 | score_threshold=self.score_threshold, 74 | max_detections=self.max_detections, 75 | visualize=False 76 | ) 77 | 78 | # compute per class average precision 79 | total_instances = [] 80 | precisions = [] 81 | for label, (average_precision, num_annotations) in average_precisions.items(): 82 | if self.verbose == 1: 83 | print('{:.0f} instances of class'.format(num_annotations), 84 | self.generator.label_to_name(label), 'with average precision: {:.4f}'.format(average_precision)) 85 | total_instances.append(num_annotations) 86 | precisions.append(average_precision) 87 | if self.weighted_average: 88 | self.mean_ap = sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances) 89 | else: 90 | self.mean_ap = sum(precisions) / sum(x > 0 for x in total_instances) 91 | 92 | if self.tensorboard is not None and self.tensorboard.writer is not None: 93 | import tensorflow as tf 94 | summary = tf.Summary() 95 | summary_value = summary.value.add() 96 | summary_value.simple_value = self.mean_ap 97 | summary_value.tag = "mAP" 98 | self.tensorboard.writer.add_summary(summary, epoch) 99 | 100 | logs['mAP'] = self.mean_ap 101 | 102 | if self.verbose == 1: 103 | print('mAP: {:.4f}'.format(self.mean_ap)) 104 | -------------------------------------------------------------------------------- /TestTimeAugmentation/EfficientDet/generators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/EfficientDet/generators/__init__.py -------------------------------------------------------------------------------- /TestTimeAugmentation/EfficientDet/inference.py: -------------------------------------------------------------------------------- 1 | from model import efficientdet 2 | import cv2 3 | import os 4 | import numpy as np 5 | import time 6 | from utils import preprocess_image 7 | from utils.anchors import anchors_for_shape 8 | 9 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 10 | 11 | phi = 1 12 | weighted_bifpn = False 13 | model_path = 'checkpoints/2019-12-03/pascal_05_0.6283_1.1975_0.8029.h5' 14 | image_sizes = (512, 640, 768, 896, 1024, 1280, 1408) 15 | image_size = image_sizes[phi] 16 | classes = [ 17 | 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 18 | 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor', 19 | ] 20 | num_classes = len(classes) 21 | score_threshold = 0.5 22 | colors = [np.random.randint(0, 256, 3).tolist() for i in range(num_classes)] 23 | model, prediction_model = efficientdet(phi=phi, 24 | weighted_bifpn=weighted_bifpn, 25 | num_classes=num_classes, 26 | score_threshold=score_threshold) 27 | prediction_model.load_weights(model_path, by_name=True) 28 | 29 | image_path = 'datasets/VOC2007/JPEGImages/000002.jpg' 30 | image = cv2.imread(image_path) 31 | src_image = image.copy() 32 | image = image[:, :, ::-1] 33 | h, w = image.shape[:2] 34 | 35 | image, scale, offset_h, offset_w = preprocess_image(image, image_size=image_size) 36 | inputs = np.expand_dims(image, axis=0) 37 | anchors = anchors_for_shape((image_size, image_size)) 38 | # run network 39 | start = time.time() 40 | boxes, scores, labels = prediction_model.predict_on_batch([np.expand_dims(image, axis=0), 41 | np.expand_dims(anchors, axis=0)]) 42 | print(time.time() - start) 43 | boxes[0, :, [0, 2]] = boxes[0, :, [0, 2]] - offset_w 44 | boxes[0, :, [1, 3]] = boxes[0, :, [1, 3]] - offset_h 45 | boxes /= scale 46 | boxes[0, :, 0] = np.clip(boxes[0, :, 0], 0, w - 1) 47 | boxes[0, :, 1] = np.clip(boxes[0, :, 1], 0, h - 1) 48 | boxes[0, :, 2] = np.clip(boxes[0, :, 2], 0, w - 1) 49 | boxes[0, :, 3] = np.clip(boxes[0, :, 3], 0, h - 1) 50 | 51 | # select indices which have a score above the threshold 52 | indices = np.where(scores[0, :] > score_threshold)[0] 53 | 54 | # select those detections 55 | boxes = boxes[0, indices] 56 | scores = scores[0, indices] 57 | labels = labels[0, indices] 58 | 59 | for box, score, label in zip(boxes, scores, labels): 60 | xmin = int(round(box[0])) 61 | ymin = int(round(box[1])) 62 | xmax = int(round(box[2])) 63 | ymax = int(round(box[3])) 64 | score = '{:.4f}'.format(score) 65 | class_id = int(label) 66 | color = colors[class_id] 67 | class_name = classes[class_id] 68 | label = '-'.join([class_name, score]) 69 | ret, baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) 70 | cv2.rectangle(src_image, (xmin, ymin), (xmax, ymax), color, 1) 71 | cv2.rectangle(src_image, (xmin, ymax - ret[1] - baseline), (xmin + ret[0], ymax), color, -1) 72 | cv2.putText(src_image, label, (xmin, ymax - baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) 73 | cv2.namedWindow('image', cv2.WINDOW_NORMAL) 74 | cv2.imshow('image', src_image) 75 | cv2.waitKey(0) 76 | -------------------------------------------------------------------------------- /TestTimeAugmentation/EfficientDet/initializers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | # import keras 18 | from tensorflow import keras 19 | 20 | import numpy as np 21 | import math 22 | 23 | 24 | class PriorProbability(keras.initializers.Initializer): 25 | """ Apply a prior probability to the weights. 26 | """ 27 | 28 | def __init__(self, probability=0.01): 29 | self.probability = probability 30 | 31 | def get_config(self): 32 | return { 33 | 'probability': self.probability 34 | } 35 | 36 | def __call__(self, shape, dtype=None): 37 | # set bias to -log((1 - p)/p) for foreground 38 | result = np.ones(shape, dtype=np.float32) * -math.log((1 - self.probability) / self.probability) 39 | 40 | return result 41 | -------------------------------------------------------------------------------- /TestTimeAugmentation/EfficientDet/keras_.py: -------------------------------------------------------------------------------- 1 | from utils import inject_keras_modules, init_keras_custom_objects 2 | import efficientnet as model 3 | 4 | EfficientNetB0 = inject_keras_modules(model.EfficientNetB0) 5 | EfficientNetB1 = inject_keras_modules(model.EfficientNetB1) 6 | EfficientNetB2 = inject_keras_modules(model.EfficientNetB2) 7 | EfficientNetB3 = inject_keras_modules(model.EfficientNetB3) 8 | EfficientNetB4 = inject_keras_modules(model.EfficientNetB4) 9 | EfficientNetB5 = inject_keras_modules(model.EfficientNetB5) 10 | EfficientNetB6 = inject_keras_modules(model.EfficientNetB6) 11 | EfficientNetB7 = inject_keras_modules(model.EfficientNetB7) 12 | 13 | preprocess_input = inject_keras_modules(model.preprocess_input) 14 | 15 | init_keras_custom_objects() 16 | -------------------------------------------------------------------------------- /TestTimeAugmentation/EfficientDet/requirements.txt: -------------------------------------------------------------------------------- 1 | Keras==2.2.5 2 | opencv-contrib-python==3.4.2.17 3 | opencv-python==3.4.2.17 4 | Pillow==6.2.0 5 | tensorflow-gpu==1.15.0 6 | progressbar2 7 | git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI 8 | -------------------------------------------------------------------------------- /TestTimeAugmentation/EfficientDet/test/000004.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/EfficientDet/test/000004.jpg -------------------------------------------------------------------------------- /TestTimeAugmentation/EfficientDet/test/000010.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/EfficientDet/test/000010.jpg -------------------------------------------------------------------------------- /TestTimeAugmentation/EfficientDet/test/000014.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/EfficientDet/test/000014.jpg -------------------------------------------------------------------------------- /TestTimeAugmentation/EfficientDet/tfkeras.py: -------------------------------------------------------------------------------- 1 | from .utils import inject_tfkeras_modules, init_tfkeras_custom_objects 2 | from . import efficientnet as model 3 | 4 | EfficientNetB0 = inject_tfkeras_modules(model.EfficientNetB0) 5 | EfficientNetB1 = inject_tfkeras_modules(model.EfficientNetB1) 6 | EfficientNetB2 = inject_tfkeras_modules(model.EfficientNetB2) 7 | EfficientNetB3 = inject_tfkeras_modules(model.EfficientNetB3) 8 | EfficientNetB4 = inject_tfkeras_modules(model.EfficientNetB4) 9 | EfficientNetB5 = inject_tfkeras_modules(model.EfficientNetB5) 10 | EfficientNetB6 = inject_tfkeras_modules(model.EfficientNetB6) 11 | EfficientNetB7 = inject_tfkeras_modules(model.EfficientNetB7) 12 | 13 | preprocess_input = inject_tfkeras_modules(model.preprocess_input) 14 | 15 | init_tfkeras_custom_objects() 16 | -------------------------------------------------------------------------------- /TestTimeAugmentation/EfficientDet/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors, Pavel Yakubovskiy. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | import functools 17 | import cv2 18 | import numpy as np 19 | 20 | _KERAS_BACKEND = None 21 | _KERAS_LAYERS = None 22 | _KERAS_MODELS = None 23 | _KERAS_UTILS = None 24 | 25 | 26 | def get_submodules_from_kwargs(kwargs): 27 | backend = kwargs.get('backend', _KERAS_BACKEND) 28 | layers = kwargs.get('layers', _KERAS_LAYERS) 29 | models = kwargs.get('models', _KERAS_MODELS) 30 | utils = kwargs.get('utils', _KERAS_UTILS) 31 | for key in kwargs.keys(): 32 | if key not in ['backend', 'layers', 'models', 'utils']: 33 | raise TypeError('Invalid keyword argument: %s', key) 34 | return backend, layers, models, utils 35 | 36 | 37 | def inject_keras_modules(func): 38 | import keras 39 | @functools.wraps(func) 40 | def wrapper(*args, **kwargs): 41 | kwargs['backend'] = keras.backend 42 | kwargs['layers'] = keras.layers 43 | kwargs['models'] = keras.models 44 | kwargs['utils'] = keras.utils 45 | return func(*args, **kwargs) 46 | 47 | return wrapper 48 | 49 | 50 | def inject_tfkeras_modules(func): 51 | import tensorflow.keras as tfkeras 52 | @functools.wraps(func) 53 | def wrapper(*args, **kwargs): 54 | kwargs['backend'] = tfkeras.backend 55 | kwargs['layers'] = tfkeras.layers 56 | kwargs['models'] = tfkeras.models 57 | kwargs['utils'] = tfkeras.utils 58 | return func(*args, **kwargs) 59 | 60 | return wrapper 61 | 62 | 63 | def init_keras_custom_objects(): 64 | import keras 65 | import sys 66 | sys.path.append("../") 67 | from .. import efficientnet as model 68 | 69 | custom_objects = { 70 | 'swish': inject_keras_modules(model.get_swish)(), 71 | 'FixedDropout': inject_keras_modules(model.get_dropout)() 72 | } 73 | 74 | keras.utils.generic_utils.get_custom_objects().update(custom_objects) 75 | 76 | 77 | def init_tfkeras_custom_objects(): 78 | import tensorflow.keras as tfkeras 79 | import sys 80 | sys.path.append("../") 81 | 82 | from .. import efficientnet as model 83 | 84 | custom_objects = { 85 | 'swish': inject_tfkeras_modules(model.get_swish)(), 86 | 'FixedDropout': inject_tfkeras_modules(model.get_dropout)() 87 | } 88 | 89 | tfkeras.utils.get_custom_objects().update(custom_objects) 90 | 91 | 92 | def preprocess_image(image, image_size): 93 | image_height, image_width = image.shape[:2] 94 | if image_height > image_width: 95 | scale = image_size / image_height 96 | resized_height = image_size 97 | resized_width = int(image_width * scale) 98 | else: 99 | scale = image_size / image_width 100 | resized_height = int(image_height * scale) 101 | resized_width = image_size 102 | image = cv2.resize(image, (resized_width, resized_height)) 103 | new_image = np.ones((image_size, image_size, 3), dtype=np.float32) * 128. 104 | offset_h = (image_size - resized_height) // 2 105 | offset_w = (image_size - resized_width) // 2 106 | new_image[offset_h:offset_h + resized_height, offset_w:offset_w + resized_width] = image.astype(np.float32) 107 | new_image /= 255. 108 | mean = [0.485, 0.456, 0.406] 109 | std = [0.229, 0.224, 0.225] 110 | new_image[..., 0] -= mean[0] 111 | new_image[..., 1] -= mean[1] 112 | new_image[..., 2] -= mean[2] 113 | new_image[..., 0] /= std[0] 114 | new_image[..., 1] /= std[1] 115 | new_image[..., 2] /= std[2] 116 | return new_image, scale, offset_h, offset_w 117 | -------------------------------------------------------------------------------- /TestTimeAugmentation/EfficientDet/utils/colors.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | 4 | def label_color(label): 5 | """ Return a color from a set of predefined colors. Contains 80 colors in total. 6 | 7 | Args 8 | label: The label to get the color for. 9 | 10 | Returns 11 | A list of three values representing a RGB color. 12 | 13 | If no color is defined for a certain label, the color green is returned and a warning is printed. 14 | """ 15 | if label < len(colors): 16 | return colors[label] 17 | else: 18 | warnings.warn('Label {} has no color, returning default.'.format(label)) 19 | return (0, 255, 0) 20 | 21 | 22 | """ 23 | Generated using: 24 | 25 | ``` 26 | colors = [list((matplotlib.colors.hsv_to_rgb([x, 1.0, 1.0]) * 255).astype(int)) for x in np.arange(0, 1, 1.0 / 80)] 27 | shuffle(colors) 28 | pprint(colors) 29 | ``` 30 | """ 31 | colors = [ 32 | [31 , 0 , 255] , 33 | [0 , 159 , 255] , 34 | [255 , 95 , 0] , 35 | [255 , 19 , 0] , 36 | [255 , 0 , 0] , 37 | [255 , 38 , 0] , 38 | [0 , 255 , 25] , 39 | [255 , 0 , 133] , 40 | [255 , 172 , 0] , 41 | [108 , 0 , 255] , 42 | [0 , 82 , 255] , 43 | [0 , 255 , 6] , 44 | [255 , 0 , 152] , 45 | [223 , 0 , 255] , 46 | [12 , 0 , 255] , 47 | [0 , 255 , 178] , 48 | [108 , 255 , 0] , 49 | [184 , 0 , 255] , 50 | [255 , 0 , 76] , 51 | [146 , 255 , 0] , 52 | [51 , 0 , 255] , 53 | [0 , 197 , 255] , 54 | [255 , 248 , 0] , 55 | [255 , 0 , 19] , 56 | [255 , 0 , 38] , 57 | [89 , 255 , 0] , 58 | [127 , 255 , 0] , 59 | [255 , 153 , 0] , 60 | [0 , 255 , 255] , 61 | [0 , 255 , 216] , 62 | [0 , 255 , 121] , 63 | [255 , 0 , 248] , 64 | [70 , 0 , 255] , 65 | [0 , 255 , 159] , 66 | [0 , 216 , 255] , 67 | [0 , 6 , 255] , 68 | [0 , 63 , 255] , 69 | [31 , 255 , 0] , 70 | [255 , 57 , 0] , 71 | [255 , 0 , 210] , 72 | [0 , 255 , 102] , 73 | [242 , 255 , 0] , 74 | [255 , 191 , 0] , 75 | [0 , 255 , 63] , 76 | [255 , 0 , 95] , 77 | [146 , 0 , 255] , 78 | [184 , 255 , 0] , 79 | [255 , 114 , 0] , 80 | [0 , 255 , 235] , 81 | [255 , 229 , 0] , 82 | [0 , 178 , 255] , 83 | [255 , 0 , 114] , 84 | [255 , 0 , 57] , 85 | [0 , 140 , 255] , 86 | [0 , 121 , 255] , 87 | [12 , 255 , 0] , 88 | [255 , 210 , 0] , 89 | [0 , 255 , 44] , 90 | [165 , 255 , 0] , 91 | [0 , 25 , 255] , 92 | [0 , 255 , 140] , 93 | [0 , 101 , 255] , 94 | [0 , 255 , 82] , 95 | [223 , 255 , 0] , 96 | [242 , 0 , 255] , 97 | [89 , 0 , 255] , 98 | [165 , 0 , 255] , 99 | [70 , 255 , 0] , 100 | [255 , 0 , 172] , 101 | [255 , 76 , 0] , 102 | [203 , 255 , 0] , 103 | [204 , 0 , 255] , 104 | [255 , 0 , 229] , 105 | [255 , 133 , 0] , 106 | [127 , 0 , 255] , 107 | [0 , 235 , 255] , 108 | [0 , 255 , 197] , 109 | [255 , 0 , 191] , 110 | [0 , 44 , 255] , 111 | [50 , 255 , 0] 112 | ] 113 | -------------------------------------------------------------------------------- /TestTimeAugmentation/EfficientDet/utils/compute_overlap.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/EfficientDet/utils/compute_overlap.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /TestTimeAugmentation/EfficientDet/utils/compute_overlap.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | 13 | def compute_overlap( 14 | np.ndarray[double, ndim=2] boxes, 15 | np.ndarray[double, ndim=2] query_boxes 16 | ): 17 | """ 18 | Args 19 | a: (N, 4) ndarray of float 20 | b: (K, 4) ndarray of float 21 | 22 | Returns 23 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 24 | """ 25 | cdef unsigned int N = boxes.shape[0] 26 | cdef unsigned int K = query_boxes.shape[0] 27 | cdef np.ndarray[double, ndim=2] overlaps = np.zeros((N, K), dtype=np.float64) 28 | cdef double iw, ih, box_area 29 | cdef double ua 30 | cdef unsigned int k, n 31 | for k in range(K): 32 | box_area = ( 33 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 34 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 35 | ) 36 | for n in range(N): 37 | iw = ( 38 | min(boxes[n, 2], query_boxes[k, 2]) - 39 | max(boxes[n, 0], query_boxes[k, 0]) + 1 40 | ) 41 | if iw > 0: 42 | ih = ( 43 | min(boxes[n, 3], query_boxes[k, 3]) - 44 | max(boxes[n, 1], query_boxes[k, 1]) + 1 45 | ) 46 | if ih > 0: 47 | ua = np.float64( 48 | (boxes[n, 2] - boxes[n, 0] + 1) * 49 | (boxes[n, 3] - boxes[n, 1] + 1) + 50 | box_area - iw * ih 51 | ) 52 | overlaps[n, k] = iw * ih / ua 53 | return overlaps 54 | -------------------------------------------------------------------------------- /TestTimeAugmentation/EfficientDet/utils/visualization.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import cv2 18 | import numpy as np 19 | 20 | from .colors import label_color 21 | 22 | 23 | def draw_box(image, box, color, thickness=2): 24 | """ Draws a box on an image with a given color. 25 | 26 | # Arguments 27 | image : The image to draw on. 28 | box : A list of 4 elements (x1, y1, x2, y2). 29 | color : The color of the box. 30 | thickness : The thickness of the lines to draw a box with. 31 | """ 32 | b = np.array(box).astype(np.int32) 33 | cv2.rectangle(image, (b[0], b[1]), (b[2], b[3]), color, thickness, cv2.LINE_AA) 34 | 35 | 36 | def draw_caption(image, box, caption): 37 | """ Draws a caption above the box in an image. 38 | 39 | # Arguments 40 | image : The image to draw on. 41 | box : A list of 4 elements (x1, y1, x2, y2). 42 | caption : String containing the text to draw. 43 | """ 44 | b = np.array(box).astype(int) 45 | cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) 46 | cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) 47 | 48 | 49 | def draw_boxes(image, boxes, color, thickness=2): 50 | """ Draws boxes on an image with a given color. 51 | 52 | # Arguments 53 | image : The image to draw on. 54 | boxes : A [N, 4] matrix (x1, y1, x2, y2). 55 | color : The color of the boxes. 56 | thickness : The thickness of the lines to draw boxes with. 57 | """ 58 | for b in boxes: 59 | draw_box(image, b, color, thickness=thickness) 60 | 61 | 62 | def draw_detections(image, boxes, scores, labels, color=None, label_to_name=None, score_threshold=0.5): 63 | """ Draws detections in an image. 64 | 65 | # Arguments 66 | image : The image to draw on. 67 | boxes : A [N, 4] matrix (x1, y1, x2, y2). 68 | scores : A list of N classification scores. 69 | labels : A list of N labels. 70 | color : The color of the boxes. By default the color from keras_retinanet.utils.colors.label_color will be used. 71 | label_to_name : (optional) Functor for mapping a label to a name. 72 | score_threshold : Threshold used for determining what detections to draw. 73 | """ 74 | selection = np.where(scores > score_threshold)[0] 75 | 76 | for i in selection: 77 | c = color if color is not None else label_color(labels[i]) 78 | draw_box(image, boxes[i, :], color=c) 79 | 80 | # draw labels 81 | caption = (label_to_name(labels[i]) if label_to_name else labels[i]) + ': {0:.2f}'.format(scores[i]) 82 | draw_caption(image, boxes[i, :], caption) 83 | 84 | 85 | def draw_annotations(image, annotations, color=(0, 255, 0), label_to_name=None): 86 | """ Draws annotations in an image. 87 | 88 | # Arguments 89 | image : The image to draw on. 90 | annotations : A [N, 5] matrix (x1, y1, x2, y2, label) or dictionary containing bboxes (shaped [N, 4]) and labels (shaped [N]). 91 | color : The color of the boxes. By default the color from keras_retinanet.utils.colors.label_color will be used. 92 | label_to_name : (optional) Functor for mapping a label to a name. 93 | """ 94 | if isinstance(annotations, np.ndarray): 95 | annotations = {'bboxes': annotations[:, :4], 'labels': annotations[:, 4]} 96 | 97 | assert('bboxes' in annotations) 98 | assert('labels' in annotations) 99 | assert(annotations['bboxes'].shape[0] == annotations['labels'].shape[0]) 100 | 101 | for i in range(annotations['bboxes'].shape[0]): 102 | label = annotations['labels'][i] 103 | c = color if color is not None else label_color(label) 104 | caption = '{}'.format(label_to_name(label) if label_to_name else label) 105 | draw_caption(image, annotations['bboxes'][i], caption) 106 | draw_box(image, annotations['bboxes'][i], color=c) 107 | -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/README.md: -------------------------------------------------------------------------------- 1 | # FSAF 2 | This is an implementation of [FSAF](https://arxiv.org/abs/1903.00621) on keras and Tensorflow. The project is based on [fizyr/keras-retinanet](https://github.com/fizyr/keras-retinanet) 3 | and fsaf branch of [zccstig/mmdetection](https://github.com/zccstig/mmdetection/tree/fsaf). 4 | Thanks for their hard work. 5 | 6 | As the authors write, **FASF module can be plugged into any single-shot detectors with FPN-like structure smoothly**. 7 | I have also tried on [yolo3](yolo). Anchor-free yolo3(with FSAF) gets a comparable performance with the anchor-based counterpart. But you don't need to pre-compute the anchor sizes any more. 8 | And it is much better and faster than the one based on retinanet. 9 | 10 | ## Test 11 | 1. I trained on Pascal VOC2012 trainval.txt + Pascal VOC2007 train.txt, and validated on Pascal VOC2007 val.txt. There are 14041 images for training and 2510 images for validation. 12 | 2. The best evaluation results (score_threshold=0.05) on VOC2007 test are: 13 | 14 | | backbone | mAP50 | 15 | | ---- | ---- | 16 | | resnet50 | 0.7248 | 17 | | resnet101 | 0.7652 | 18 | 19 | 3. Pretrained models are here. 20 | [baidu netdisk](https://pan.baidu.com/s/1ZdHvR-03XqHvxWG0rLCw1g) extract code: rbrr 21 | [goole dirver](https://drive.google.com/open?id=1Hcgxp5OwqNsAx-HYgcIhLat1OOHKnvJ2) 22 | 23 | 4. `python3 inference.py` to test your image by specifying image path and model path there. 24 | 25 | ![image1](test/004456.jpg) 26 | ![image2](test/005770.jpg) 27 | ![image3](test/006408.jpg) 28 | 29 | 30 | ## Train 31 | ### build dataset (Pascal VOC, other types please refer to [fizyr/keras-retinanet](https://github.com/fizyr/keras-retinanet)) 32 | * Download VOC2007 and VOC2012, copy all image files from VOC2007 to VOC2012. 33 | * Append VOC2007 train.txt to VOC2012 trainval.txt. 34 | * Overwrite VOC2012 val.txt by VOC2007 val.txt. 35 | ### train 36 | * `python3 train.py --backbone resnet50 --gpu 0 --random-transform pascal datasets/VOC2012` to start training. 37 | ## Evaluate 38 | * `python3 utils/eval.py` to evaluate by specifying model path there. 39 | -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/augmentor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/FSAF/augmentor/__init__.py -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/configure.py: -------------------------------------------------------------------------------- 1 | MAX_NUM_GT_BOXES = 100 2 | POS_SCALE = 0.2 3 | IGNORE_SCALE = 0.5 4 | STRIDES = (8, 16, 32, 64, 128) 5 | 6 | -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/generators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/FSAF/generators/__init__.py -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/inference.py: -------------------------------------------------------------------------------- 1 | import keras 2 | import models 3 | from utils.image import read_image_bgr, preprocess_image, resize_image 4 | from utils.visualization import draw_box, draw_caption 5 | from utils.colors import label_color 6 | 7 | # import miscellaneous modules 8 | import matplotlib.pyplot as plt 9 | import cv2 10 | import os 11 | import numpy as np 12 | import time 13 | import glob 14 | import os.path as osp 15 | 16 | # set tf backend to allow memory to grow, instead of claiming everything 17 | import tensorflow as tf 18 | 19 | 20 | def get_session(): 21 | config = tf.ConfigProto() 22 | config.gpu_options.allow_growth = True 23 | return tf.Session(config=config) 24 | 25 | 26 | # use this environment flag to change which GPU to use 27 | os.environ["CUDA_VISIBLE_DEVICES"] = "1" 28 | 29 | # set the modified tf session as backend in keras 30 | keras.backend.set_session(get_session()) 31 | # adjust this to point to your downloaded/trained model 32 | # models can be downloaded here: https://github.com/fizyr/keras-retinanet/releases 33 | model_path = '/home/adam/workspace/github/xuannianz/carrot/fsaf/snapshots/2019-10-05/resnet101_pascal_47_0.7652.h5' 34 | 35 | # load retinanet model 36 | # model = models.load_model(model_path, backbone_name='resnet101') 37 | 38 | # if the model is not converted to an inference model, use the line below 39 | # see: https://github.com/fizyr/keras-retinanet#converting-a-training-model-to-inference-model 40 | from models.resnet import resnet_fsaf 41 | from models.retinanet import fsaf_bbox 42 | fsaf = resnet_fsaf(num_classes=20, backbone='resnet101') 43 | model = fsaf_bbox(fsaf) 44 | model.load_weights(model_path, by_name=True) 45 | # load label to names mapping for visualization purposes 46 | voc_classes = { 47 | 'aeroplane': 0, 48 | 'bicycle': 1, 49 | 'bird': 2, 50 | 'boat': 3, 51 | 'bottle': 4, 52 | 'bus': 5, 53 | 'car': 6, 54 | 'cat': 7, 55 | 'chair': 8, 56 | 'cow': 9, 57 | 'diningtable': 10, 58 | 'dog': 11, 59 | 'horse': 12, 60 | 'motorbike': 13, 61 | 'person': 14, 62 | 'pottedplant': 15, 63 | 'sheep': 16, 64 | 'sofa': 17, 65 | 'train': 18, 66 | 'tvmonitor': 19 67 | } 68 | labels_to_names = {} 69 | for key, value in voc_classes.items(): 70 | labels_to_names[value] = key 71 | # load image 72 | image_paths = glob.glob('datasets/voc_test/VOC2007/JPEGImages/*.jpg') 73 | for image_path in image_paths: 74 | print('Handling {}'.format(image_path)) 75 | image = read_image_bgr(image_path) 76 | 77 | # copy to draw on 78 | draw = image.copy() 79 | 80 | # preprocess image for network 81 | image = preprocess_image(image) 82 | image, scale = resize_image(image) 83 | 84 | # process image 85 | start = time.time() 86 | # locations, feature_shapes = model.predict_on_batch(np.expand_dims(image, axis=0)) 87 | boxes, scores, labels = model.predict_on_batch(np.expand_dims(image, axis=0)) 88 | print("processing time: ", time.time() - start) 89 | 90 | # correct for image scale 91 | boxes /= scale 92 | labels_to_locations = {} 93 | # visualize detections 94 | for box, score, label in zip(boxes[0], scores[0], labels[0]): 95 | # scores are sorted so we can break 96 | if score < 0.5: 97 | break 98 | start_x = int(box[0]) 99 | start_y = int(box[1]) 100 | end_x = int(box[2]) 101 | end_y = int(box[3]) 102 | color = label_color(label) 103 | 104 | b = box.astype(int) 105 | draw_box(draw, b, color=color) 106 | 107 | caption = "{} {:.3f}".format(labels_to_names[label], score) 108 | draw_caption(draw, b, caption) 109 | 110 | cv2.namedWindow('image', cv2.WINDOW_NORMAL) 111 | cv2.imshow('image', draw) 112 | key = cv2.waitKey(0) 113 | if int(key) == 121: 114 | image_fname = osp.split(image_path)[-1] 115 | cv2.imwrite('test/{}'.format(image_fname), draw) 116 | 117 | -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/initializers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import keras 18 | 19 | import numpy as np 20 | import math 21 | 22 | 23 | class PriorProbability(keras.initializers.Initializer): 24 | """ Apply a prior probability to the weights. 25 | """ 26 | 27 | def __init__(self, probability=0.01): 28 | self.probability = probability 29 | 30 | def get_config(self): 31 | return { 32 | 'probability': self.probability 33 | } 34 | 35 | def __call__(self, shape, dtype=None): 36 | # set bias to -log((1 - p)/p) for foreground 37 | result = np.ones(shape, dtype=dtype) * -math.log((1 - self.probability) / self.probability) 38 | 39 | return result 40 | -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/models/densenet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2018 vidosits (https://github.com/vidosits/) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import keras 18 | from keras.applications import densenet 19 | from keras.utils import get_file 20 | 21 | from . import retinanet 22 | from . import Backbone 23 | from utils.image import preprocess_image 24 | 25 | 26 | allowed_backbones = { 27 | 'densenet121': ([6, 12, 24, 16], densenet.DenseNet121), 28 | 'densenet169': ([6, 12, 32, 32], densenet.DenseNet169), 29 | 'densenet201': ([6, 12, 48, 32], densenet.DenseNet201), 30 | } 31 | 32 | 33 | class DenseNetBackbone(Backbone): 34 | """ Describes backbone information and provides utility functions. 35 | """ 36 | 37 | def retinanet(self, *args, **kwargs): 38 | """ Returns a retinanet model using the correct backbone. 39 | """ 40 | return densenet_retinanet(*args, backbone=self.backbone, **kwargs) 41 | 42 | def download_imagenet(self): 43 | """ Download pre-trained weights for the specified backbone name. 44 | This name is in the format {backbone}_weights_tf_dim_ordering_tf_kernels_notop 45 | where backbone is the densenet + number of layers (e.g. densenet121). 46 | For more info check the explanation from the keras densenet script itself: 47 | https://github.com/keras-team/keras/blob/master/keras/applications/densenet.py 48 | """ 49 | origin = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.8/' 50 | file_name = '{}_weights_tf_dim_ordering_tf_kernels_notop.h5' 51 | 52 | # load weights 53 | if keras.backend.image_data_format() == 'channels_first': 54 | raise ValueError('Weights for "channels_first" format are not available.') 55 | 56 | weights_url = origin + file_name.format(self.backbone) 57 | return get_file(file_name.format(self.backbone), weights_url, cache_subdir='models') 58 | 59 | def validate(self): 60 | """ Checks whether the backbone string is correct. 61 | """ 62 | backbone = self.backbone.split('_')[0] 63 | 64 | if backbone not in allowed_backbones: 65 | raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, allowed_backbones.keys())) 66 | 67 | def preprocess_image(self, inputs): 68 | """ Takes as input an image and prepares it for being passed through the network. 69 | """ 70 | return preprocess_image(inputs, mode='tf') 71 | 72 | 73 | def densenet_retinanet(num_classes, backbone='densenet121', inputs=None, modifier=None, **kwargs): 74 | """ Constructs a retinanet model using a densenet backbone. 75 | 76 | Args 77 | num_classes: Number of classes to predict. 78 | backbone: Which backbone to use (one of ('densenet121', 'densenet169', 'densenet201')). 79 | inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)). 80 | modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example). 81 | 82 | Returns 83 | RetinaNet model with a DenseNet backbone. 84 | """ 85 | # choose default input 86 | if inputs is None: 87 | inputs = keras.layers.Input((None, None, 3)) 88 | 89 | blocks, creator = allowed_backbones[backbone] 90 | model = creator(input_tensor=inputs, include_top=False, pooling=None, weights=None) 91 | 92 | # get last conv layer from the end of each dense block 93 | layer_outputs = [model.get_layer(name='conv{}_block{}_concat'.format(idx + 2, block_num)).output for idx, block_num in enumerate(blocks)] 94 | 95 | # create the densenet backbone 96 | model = keras.models.Model(inputs=inputs, outputs=layer_outputs[1:], name=model.name) 97 | 98 | # invoke modifier if given 99 | if modifier: 100 | model = modifier(model) 101 | 102 | # create the full model 103 | model = retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=model.outputs, **kwargs) 104 | 105 | return model 106 | -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/models/mobilenet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 lvaleriu (https://github.com/lvaleriu/) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import keras 18 | from keras.applications import mobilenet 19 | from keras.utils import get_file 20 | from utils.image import preprocess_image 21 | 22 | from . import retinanet 23 | from . import Backbone 24 | 25 | 26 | class MobileNetBackbone(Backbone): 27 | """ Describes backbone information and provides utility functions. 28 | """ 29 | 30 | allowed_backbones = ['mobilenet128', 'mobilenet160', 'mobilenet192', 'mobilenet224'] 31 | 32 | def retinanet(self, *args, **kwargs): 33 | """ Returns a retinanet model using the correct backbone. 34 | """ 35 | return mobilenet_retinanet(*args, backbone=self.backbone, **kwargs) 36 | 37 | def download_imagenet(self): 38 | """ Download pre-trained weights for the specified backbone name. 39 | This name is in the format mobilenet{rows}_{alpha} where rows is the 40 | imagenet shape dimension and 'alpha' controls the width of the network. 41 | For more info check the explanation from the keras mobilenet script itself. 42 | """ 43 | 44 | alpha = float(self.backbone.split('_')[1]) 45 | rows = int(self.backbone.split('_')[0].replace('mobilenet', '')) 46 | 47 | # load weights 48 | if keras.backend.image_data_format() == 'channels_first': 49 | raise ValueError('Weights for "channels_last" format ' 50 | 'are not available.') 51 | if alpha == 1.0: 52 | alpha_text = '1_0' 53 | elif alpha == 0.75: 54 | alpha_text = '7_5' 55 | elif alpha == 0.50: 56 | alpha_text = '5_0' 57 | else: 58 | alpha_text = '2_5' 59 | 60 | model_name = 'mobilenet_{}_{}_tf_no_top.h5'.format(alpha_text, rows) 61 | weights_url = mobilenet.mobilenet.BASE_WEIGHT_PATH + model_name 62 | weights_path = get_file(model_name, weights_url, cache_subdir='models') 63 | 64 | return weights_path 65 | 66 | def validate(self): 67 | """ Checks whether the backbone string is correct. 68 | """ 69 | backbone = self.backbone.split('_')[0] 70 | 71 | if backbone not in MobileNetBackbone.allowed_backbones: 72 | raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, MobileNetBackbone.allowed_backbones)) 73 | 74 | def preprocess_image(self, inputs): 75 | """ Takes as input an image and prepares it for being passed through the network. 76 | """ 77 | return preprocess_image(inputs, mode='tf') 78 | 79 | 80 | def mobilenet_retinanet(num_classes, backbone='mobilenet224_1.0', inputs=None, modifier=None, **kwargs): 81 | """ Constructs a retinanet model using a mobilenet backbone. 82 | 83 | Args 84 | num_classes: Number of classes to predict. 85 | backbone: Which backbone to use (one of ('mobilenet128', 'mobilenet160', 'mobilenet192', 'mobilenet224')). 86 | inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)). 87 | modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example). 88 | 89 | Returns 90 | RetinaNet model with a MobileNet backbone. 91 | """ 92 | alpha = float(backbone.split('_')[1]) 93 | 94 | # choose default input 95 | if inputs is None: 96 | inputs = keras.layers.Input((None, None, 3)) 97 | 98 | backbone = mobilenet.MobileNet(input_tensor=inputs, alpha=alpha, include_top=False, pooling=None, weights=None) 99 | 100 | # create the full model 101 | layer_names = ['conv_pw_5_relu', 'conv_pw_11_relu', 'conv_pw_13_relu'] 102 | layer_outputs = [backbone.get_layer(name).output for name in layer_names] 103 | backbone = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=backbone.name) 104 | 105 | # invoke modifier if given 106 | if modifier: 107 | backbone = modifier(backbone) 108 | 109 | return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone.outputs, **kwargs) 110 | -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/requirements.txt: -------------------------------------------------------------------------------- 1 | Keras==2.2.5 2 | keras-resnet==0.2.0 3 | opencv-contrib-python==3.4.2.17 4 | opencv-python==3.4.2.17 5 | Pillow==6.2.0 6 | tensorflow-gpu==1.15.0 7 | progressbar2 8 | git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI 9 | -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | from setuptools.extension import Extension 3 | from distutils.command.build_ext import build_ext as DistUtilsBuildExt 4 | 5 | 6 | class BuildExtension(setuptools.Command): 7 | description = DistUtilsBuildExt.description 8 | user_options = DistUtilsBuildExt.user_options 9 | boolean_options = DistUtilsBuildExt.boolean_options 10 | help_options = DistUtilsBuildExt.help_options 11 | 12 | def __init__(self, *args, **kwargs): 13 | from setuptools.command.build_ext import build_ext as SetupToolsBuildExt 14 | 15 | # Bypass __setatrr__ to avoid infinite recursion. 16 | self.__dict__['_command'] = SetupToolsBuildExt(*args, **kwargs) 17 | 18 | def __getattr__(self, name): 19 | return getattr(self._command, name) 20 | 21 | def __setattr__(self, name, value): 22 | setattr(self._command, name, value) 23 | 24 | def initialize_options(self, *args, **kwargs): 25 | return self._command.initialize_options(*args, **kwargs) 26 | 27 | def finalize_options(self, *args, **kwargs): 28 | ret = self._command.finalize_options(*args, **kwargs) 29 | import numpy 30 | self.include_dirs.append(numpy.get_include()) 31 | return ret 32 | 33 | def run(self, *args, **kwargs): 34 | return self._command.run(*args, **kwargs) 35 | 36 | 37 | extensions = [ 38 | Extension( 39 | 'utils.compute_overlap', 40 | ['utils/compute_overlap.pyx'] 41 | ), 42 | ] 43 | 44 | setuptools.setup( 45 | cmdclass={'build_ext': BuildExtension}, 46 | packages=setuptools.find_packages(), 47 | ext_modules=extensions, 48 | setup_requires=["cython>=0.28", "numpy>=1.14.0"] 49 | ) 50 | -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/test/004456.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/FSAF/test/004456.jpg -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/test/005770.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/FSAF/test/005770.jpg -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/test/006408.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/FSAF/test/006408.jpg -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/FSAF/utils/__init__.py -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/utils/coco_eval.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | from pycocotools.cocoeval import COCOeval 18 | 19 | import keras 20 | import numpy as np 21 | import json 22 | 23 | import progressbar 24 | 25 | assert (callable(progressbar.progressbar)), "Using wrong progressbar module, install 'progressbar2' instead." 26 | 27 | 28 | def evaluate_coco(generator, model, threshold=0.05): 29 | """ Use the pycocotools to evaluate a COCO model on a dataset. 30 | 31 | Args 32 | generator : The generator for generating the evaluation data. 33 | model : The model to evaluate. 34 | threshold : The score threshold to use. 35 | """ 36 | # start collecting results 37 | results = [] 38 | image_ids = [] 39 | for index in progressbar.progressbar(range(generator.size()), prefix='COCO evaluation: '): 40 | image = generator.load_image(index) 41 | image = generator.preprocess_image(image) 42 | image, scale = generator.resize_image(image) 43 | 44 | if keras.backend.image_data_format() == 'channels_first': 45 | image = image.transpose((2, 0, 1)) 46 | 47 | # run network 48 | boxes, scores, labels = model.predict_on_batch(np.expand_dims(image, axis=0)) 49 | 50 | # correct boxes for image scale 51 | boxes /= scale 52 | 53 | # change to (x, y, w, h) (MS COCO standard) 54 | boxes[:, :, 2] -= boxes[:, :, 0] 55 | boxes[:, :, 3] -= boxes[:, :, 1] 56 | 57 | # compute predicted labels and scores 58 | for box, score, label in zip(boxes[0], scores[0], labels[0]): 59 | # scores are sorted, so we can break 60 | if score < threshold: 61 | break 62 | 63 | # append detection for each positively labeled class 64 | image_result = { 65 | 'image_id': generator.image_ids[index], 66 | 'category_id': generator.label_to_coco_label(label), 67 | 'score': float(score), 68 | 'bbox': box.tolist(), 69 | } 70 | 71 | # append detection to results 72 | results.append(image_result) 73 | 74 | # append image to list of processed images 75 | image_ids.append(generator.image_ids[index]) 76 | 77 | if not len(results): 78 | return 79 | 80 | # write output 81 | json.dump(results, open('{}_bbox_results.json'.format(generator.set_name), 'w'), indent=4) 82 | json.dump(image_ids, open('{}_processed_image_ids.json'.format(generator.set_name), 'w'), indent=4) 83 | 84 | # load results in COCO evaluation tool 85 | coco_true = generator.coco 86 | coco_pred = coco_true.loadRes('{}_bbox_results.json'.format(generator.set_name)) 87 | 88 | # run COCO evaluation 89 | coco_eval = COCOeval(coco_true, coco_pred, 'bbox') 90 | coco_eval.params.imgIds = image_ids 91 | coco_eval.evaluate() 92 | coco_eval.accumulate() 93 | coco_eval.summarize() 94 | return coco_eval.stats 95 | -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/utils/colors.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | 4 | def label_color(label): 5 | """ Return a color from a set of predefined colors. Contains 80 colors in total. 6 | 7 | Args 8 | label: The label to get the color for. 9 | 10 | Returns 11 | A list of three values representing a RGB color. 12 | 13 | If no color is defined for a certain label, the color green is returned and a warning is printed. 14 | """ 15 | if label < len(colors): 16 | return colors[label] 17 | else: 18 | warnings.warn('Label {} has no color, returning default.'.format(label)) 19 | return (0, 255, 0) 20 | 21 | 22 | """ 23 | Generated using: 24 | 25 | ``` 26 | colors = [list((matplotlib.colors.hsv_to_rgb([x, 1.0, 1.0]) * 255).astype(int)) for x in np.arange(0, 1, 1.0 / 80)] 27 | shuffle(colors) 28 | pprint(colors) 29 | ``` 30 | """ 31 | colors = [ 32 | [31 , 0 , 255] , 33 | [0 , 159 , 255] , 34 | [255 , 95 , 0] , 35 | [255 , 19 , 0] , 36 | [255 , 0 , 0] , 37 | [255 , 38 , 0] , 38 | [0 , 255 , 25] , 39 | [255 , 0 , 133] , 40 | [255 , 172 , 0] , 41 | [108 , 0 , 255] , 42 | [0 , 82 , 255] , 43 | [0 , 255 , 6] , 44 | [255 , 0 , 152] , 45 | [223 , 0 , 255] , 46 | [12 , 0 , 255] , 47 | [0 , 255 , 178] , 48 | [108 , 255 , 0] , 49 | [184 , 0 , 255] , 50 | [255 , 0 , 76] , 51 | [146 , 255 , 0] , 52 | [51 , 0 , 255] , 53 | [0 , 197 , 255] , 54 | [255 , 248 , 0] , 55 | [255 , 0 , 19] , 56 | [255 , 0 , 38] , 57 | [89 , 255 , 0] , 58 | [127 , 255 , 0] , 59 | [255 , 153 , 0] , 60 | [0 , 255 , 255] , 61 | [0 , 255 , 216] , 62 | [0 , 255 , 121] , 63 | [255 , 0 , 248] , 64 | [70 , 0 , 255] , 65 | [0 , 255 , 159] , 66 | [0 , 216 , 255] , 67 | [0 , 6 , 255] , 68 | [0 , 63 , 255] , 69 | [31 , 255 , 0] , 70 | [255 , 57 , 0] , 71 | [255 , 0 , 210] , 72 | [0 , 255 , 102] , 73 | [242 , 255 , 0] , 74 | [255 , 191 , 0] , 75 | [0 , 255 , 63] , 76 | [255 , 0 , 95] , 77 | [146 , 0 , 255] , 78 | [184 , 255 , 0] , 79 | [255 , 114 , 0] , 80 | [0 , 255 , 235] , 81 | [255 , 229 , 0] , 82 | [0 , 178 , 255] , 83 | [255 , 0 , 114] , 84 | [255 , 0 , 57] , 85 | [0 , 140 , 255] , 86 | [0 , 121 , 255] , 87 | [12 , 255 , 0] , 88 | [255 , 210 , 0] , 89 | [0 , 255 , 44] , 90 | [165 , 255 , 0] , 91 | [0 , 25 , 255] , 92 | [0 , 255 , 140] , 93 | [0 , 101 , 255] , 94 | [0 , 255 , 82] , 95 | [223 , 255 , 0] , 96 | [242 , 0 , 255] , 97 | [89 , 0 , 255] , 98 | [165 , 0 , 255] , 99 | [70 , 255 , 0] , 100 | [255 , 0 , 172] , 101 | [255 , 76 , 0] , 102 | [203 , 255 , 0] , 103 | [204 , 0 , 255] , 104 | [255 , 0 , 229] , 105 | [255 , 133 , 0] , 106 | [127 , 0 , 255] , 107 | [0 , 235 , 255] , 108 | [0 , 255 , 197] , 109 | [255 , 0 , 191] , 110 | [0 , 44 , 255] , 111 | [50 , 255 , 0] 112 | ] 113 | -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/utils/compute_overlap.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/FSAF/utils/compute_overlap.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/utils/compute_overlap.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | 13 | def compute_overlap( 14 | np.ndarray[double, ndim=2] boxes, 15 | np.ndarray[double, ndim=2] query_boxes 16 | ): 17 | """ 18 | Args 19 | a: (N, 4) ndarray of float 20 | b: (K, 4) ndarray of float 21 | 22 | Returns 23 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 24 | """ 25 | cdef unsigned int N = boxes.shape[0] 26 | cdef unsigned int K = query_boxes.shape[0] 27 | cdef np.ndarray[double, ndim=2] overlaps = np.zeros((N, K), dtype=np.float64) 28 | cdef double iw, ih, box_area 29 | cdef double ua 30 | cdef unsigned int k, n 31 | for k in range(K): 32 | box_area = ( 33 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 34 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 35 | ) 36 | for n in range(N): 37 | iw = ( 38 | min(boxes[n, 2], query_boxes[k, 2]) - 39 | max(boxes[n, 0], query_boxes[k, 0]) + 1 40 | ) 41 | if iw > 0: 42 | ih = ( 43 | min(boxes[n, 3], query_boxes[k, 3]) - 44 | max(boxes[n, 1], query_boxes[k, 1]) + 1 45 | ) 46 | if ih > 0: 47 | ua = np.float64( 48 | (boxes[n, 2] - boxes[n, 0] + 1) * 49 | (boxes[n, 3] - boxes[n, 1] + 1) + 50 | box_area - iw * ih 51 | ) 52 | overlaps[n, k] = iw * ih / ua 53 | return overlaps 54 | -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/utils/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import configparser 18 | import numpy as np 19 | import keras 20 | from utils.anchors import AnchorParameters 21 | 22 | 23 | def read_config_file(config_path): 24 | config = configparser.ConfigParser() 25 | 26 | with open(config_path, 'r') as file: 27 | config.read_file(file) 28 | 29 | assert 'anchor_parameters' in config, \ 30 | "Malformed config file. Verify that it contains the anchor_parameters section." 31 | 32 | config_keys = set(config['anchor_parameters']) 33 | default_keys = set(AnchorParameters.default.__dict__.keys()) 34 | 35 | assert config_keys <= default_keys, \ 36 | "Malformed config file. These keys are not valid: {}".format(config_keys - default_keys) 37 | 38 | return config 39 | 40 | 41 | def parse_anchor_parameters(config): 42 | ratios = np.array(list(map(float, config['anchor_parameters']['ratios'].split(' '))), keras.backend.floatx()) 43 | scales = np.array(list(map(float, config['anchor_parameters']['scales'].split(' '))), keras.backend.floatx()) 44 | sizes = list(map(int, config['anchor_parameters']['sizes'].split(' '))) 45 | strides = list(map(int, config['anchor_parameters']['strides'].split(' '))) 46 | 47 | return AnchorParameters(sizes, strides, ratios, scales) 48 | -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/utils/keras_version.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | from __future__ import print_function 18 | 19 | import keras 20 | import sys 21 | 22 | minimum_keras_version = 2, 2, 4 23 | 24 | 25 | def keras_version(): 26 | """ 27 | Get the Keras version. 28 | 29 | Returns 30 | tuple of (major, minor, patch). 如 (2, 2, 4) 31 | """ 32 | return tuple(map(int, keras.__version__.split('.'))) 33 | 34 | 35 | def keras_version_ok(): 36 | """ 37 | Check if the current Keras version is higher than the minimum version. 38 | """ 39 | return keras_version() >= minimum_keras_version 40 | 41 | 42 | def assert_keras_version(): 43 | """ 44 | Assert that the Keras version is up to date. 45 | """ 46 | detected = keras.__version__ 47 | required = '.'.join(map(str, minimum_keras_version)) 48 | assert(keras_version() >= minimum_keras_version), 'You are using keras version {}. The minimum required version is {}.'.format(detected, required) 49 | 50 | 51 | def check_keras_version(): 52 | """ 53 | Check that the Keras version is up to date. If it isn't, print an error message and exit the script. 54 | """ 55 | try: 56 | assert_keras_version() 57 | except AssertionError as e: 58 | print(e, file=sys.stderr) 59 | sys.exit(1) 60 | -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/utils/model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | 18 | def freeze(model): 19 | """ 20 | Set all layers in a model to non-trainable. 21 | 22 | The weights for these layers will not be updated during training. 23 | 24 | This function modifies the given model in-place, 25 | but it also returns the modified model to allow easy chaining with other functions. 26 | """ 27 | for layer in model.layers: 28 | layer.trainable = False 29 | return model 30 | -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/utils/visualization.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import cv2 18 | import numpy as np 19 | 20 | from .colors import label_color 21 | 22 | 23 | def draw_box(image, box, color, thickness=2): 24 | """ Draws a box on an image with a given color. 25 | 26 | # Arguments 27 | image : The image to draw on. 28 | box : A list of 4 elements (x1, y1, x2, y2). 29 | color : The color of the box. 30 | thickness : The thickness of the lines to draw a box with. 31 | """ 32 | b = np.array(box).astype(int) 33 | cv2.rectangle(image, (b[0], b[1]), (b[2], b[3]), color, thickness, cv2.LINE_AA) 34 | 35 | 36 | def draw_caption(image, box, caption): 37 | """ Draws a caption above the box in an image. 38 | 39 | # Arguments 40 | image : The image to draw on. 41 | box : A list of 4 elements (x1, y1, x2, y2). 42 | caption : String containing the text to draw. 43 | """ 44 | b = np.array(box).astype(int) 45 | ret, baseline = cv2.getTextSize(caption, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) 46 | cv2.rectangle(image, (b[0], b[3] - ret[1] - baseline), (b[0] + ret[0], b[3]), (255, 255, 255), -1) 47 | cv2.putText(image, caption, (b[0], b[3] - baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) 48 | 49 | 50 | def draw_boxes(image, boxes, color, thickness=2): 51 | """ Draws boxes on an image with a given color. 52 | 53 | # Arguments 54 | image : The image to draw on. 55 | boxes : A [N, 4] matrix (x1, y1, x2, y2). 56 | color : The color of the boxes. 57 | thickness : The thickness of the lines to draw boxes with. 58 | """ 59 | for b in boxes: 60 | draw_box(image, b, color, thickness=thickness) 61 | 62 | 63 | def draw_detections(image, boxes, scores, labels, color=None, label_to_name=None, score_threshold=0.5): 64 | """ Draws detections in an image. 65 | 66 | # Arguments 67 | image : The image to draw on. 68 | boxes : A [N, 4] matrix (x1, y1, x2, y2). 69 | scores : A list of N classification scores. 70 | labels : A list of N labels. 71 | color : The color of the boxes. By default the color from keras_retinanet.utils.colors.label_color will be used. 72 | label_to_name : (optional) Functor for mapping a label to a name. 73 | score_threshold : Threshold used for determining what detections to draw. 74 | """ 75 | selection = np.where(scores > score_threshold)[0] 76 | 77 | for i in selection: 78 | c = color if color is not None else label_color(labels[i]) 79 | draw_box(image, boxes[i, :], color=c) 80 | 81 | # draw labels 82 | caption = (label_to_name(labels[i]) if label_to_name else labels[i]) + ': {0:.2f}'.format(scores[i]) 83 | draw_caption(image, boxes[i, :], caption) 84 | 85 | 86 | def draw_annotations(image, annotations, color=(0, 255, 0), label_to_name=None): 87 | """ Draws annotations in an image. 88 | 89 | # Arguments 90 | image : The image to draw on. 91 | annotations : A [N, 5] matrix (x1, y1, x2, y2, label) or dictionary containing bboxes (shaped [N, 4]) and labels (shaped [N]). 92 | color : The color of the boxes. By default the color from keras_retinanet.utils.colors.label_color will be used. 93 | label_to_name : (optional) Functor for mapping a label to a name. 94 | """ 95 | if isinstance(annotations, np.ndarray): 96 | annotations = {'bboxes': annotations[:, :4], 'labels': annotations[:, 4]} 97 | 98 | assert('bboxes' in annotations) 99 | assert('labels' in annotations) 100 | assert(annotations['bboxes'].shape[0] == annotations['labels'].shape[0]) 101 | 102 | for i in range(annotations['bboxes'].shape[0]): 103 | label = annotations['labels'][i] 104 | c = color if color is not None else label_color(label) 105 | caption = '{}'.format(label_to_name(label) if label_to_name else label) 106 | draw_caption(image, annotations['bboxes'][i], caption) 107 | draw_box(image, annotations['bboxes'][i], color=c) 108 | -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/yolo/README.md: -------------------------------------------------------------------------------- 1 | # FSAF 2 | This is an implementation of [FSAF](https://arxiv.org/abs/1903.00621) on keras and Tensorflow. The project is based on [qqwweee/keras-yolo3](https://github.com/qqwweee/keras-yolo3) and [fizyr/keras-retinanet](https://github.com/fizyr/keras-retinanet). 3 | Thanks for their hard work. 4 | 5 | As the authors write, FASF module can be plugged into any single-shot detectors with FPN-like structure smoothly. 6 | I have also tried on yolo3. Anchor-free yolo3(with FSAF) gets a comparable performance with the anchor-based counterpart. But you don't need to pre-compute the anchor sizes any more. 7 | And it is much better and faster than the one based on retinanet. 8 | 9 | It can also converge quite quickly. After the first epoch(batch_size=64, steps=1000), we can get an mAP50 of 0.6xxx on val dataset. 10 | 11 | ## Test 12 | 1. I trained on Pascal VOC2012 trainval.txt + Pascal VOC2007 train.txt, and validated on Pascal VOC2007 val.txt. There are 14041 images for training and 2510 images for validation. 13 | 2. The best evaluation result (score_threshold=0.01, mAP50, image_size=416) on VOC2007 test is 0.8358. I have only trained once. 14 | 3. Pretrained yolo and fsaf weights are here. [baidu netdisk](https://pan.baidu.com/s/1QoGXnajcohj9P4yCVwJ4Yw), extract code: qab7 15 | 4. `python3 yolo/inference.py` to test your image by specifying image path and model path there. 16 | 17 | ## Train 18 | ### build dataset (Pascal VOC, other types please refer to [fizyr/keras-retinanet](https://github.com/fizyr/keras-retinanet)) 19 | * Download VOC2007 and VOC2012, copy all image files from VOC2007 to VOC2012. 20 | * Append VOC2007 train.txt to VOC2012 trainval.txt. 21 | * Overwrite VOC2012 val.txt by VOC2007 val.txt. 22 | ### train 23 | * **STEP1**: `python3 yolo/train.py --freeze-body darknet --gpu 0 --batch-size 32 --random-transform pascal datasets/VOC2012` to start training with lr=1e-3 then set lr=1e-4 when val mAP continue to drop. 24 | * **STEP2**: `python3 yolo/train.py --snapshot --freeze-body none --gpu 0 --batch-size 32 --random-transform pascal datasets/VOC2012` to start training with lr=1e-5 and then set lr=1e-6 when val mAP contines to drop. 25 | ## Evaluate 26 | * `python3 yolo/eval/common.py` to evaluate by specifying model path there. 27 | -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/yolo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/FSAF/yolo/__init__.py -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/yolo/config.py: -------------------------------------------------------------------------------- 1 | MAX_NUM_GT_BOXES = 100 2 | POS_SCALE = 0.2 3 | IGNORE_SCALE = 0.5 4 | STRIDES = (8, 16, 32) 5 | 6 | -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/yolo/eval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/FSAF/yolo/eval/__init__.py -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/yolo/eval/pascal.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import keras 18 | from .common import evaluate 19 | 20 | 21 | class Evaluate(keras.callbacks.Callback): 22 | """ 23 | Evaluation callback for arbitrary datasets. 24 | """ 25 | 26 | def __init__( 27 | self, 28 | generator, 29 | model, 30 | iou_threshold=0.5, 31 | score_threshold=0.01, 32 | max_detections=100, 33 | save_path=None, 34 | tensorboard=None, 35 | weighted_average=False, 36 | verbose=1 37 | ): 38 | """ 39 | Evaluate a given dataset using a given model at the end of every epoch during training. 40 | 41 | Args: 42 | generator: The generator that represents the dataset to evaluate. 43 | iou_threshold: The threshold used to consider when a detection is positive or negative. 44 | score_threshold: The score confidence threshold to use for detections. 45 | max_detections: The maximum number of detections to use per image. 46 | save_path: The path to save images with visualized detections to. 47 | tensorboard: Instance of keras.callbacks.TensorBoard used to log the mAP value. 48 | weighted_average: Compute the mAP using the weighted average of precisions among classes. 49 | verbose: Set the verbosity level, by default this is set to 1. 50 | """ 51 | self.generator = generator 52 | self.iou_threshold = iou_threshold 53 | self.score_threshold = score_threshold 54 | self.max_detections = max_detections 55 | self.save_path = save_path 56 | self.tensorboard = tensorboard 57 | self.weighted_average = weighted_average 58 | self.verbose = verbose 59 | self.active_model = model 60 | 61 | super(Evaluate, self).__init__() 62 | 63 | def on_epoch_end(self, epoch, logs=None): 64 | logs = logs or {} 65 | 66 | # run evaluation 67 | average_precisions = evaluate( 68 | self.generator, 69 | self.active_model, 70 | iou_threshold=self.iou_threshold, 71 | score_threshold=self.score_threshold, 72 | max_detections=self.max_detections, 73 | visualize=False 74 | ) 75 | 76 | # compute per class average precision 77 | total_instances = [] 78 | precisions = [] 79 | for label, (average_precision, num_annotations) in average_precisions.items(): 80 | if self.verbose == 1: 81 | print('{:.0f} instances of class'.format(num_annotations), 82 | self.generator.label_to_name(label), 'with average precision: {:.4f}'.format(average_precision)) 83 | total_instances.append(num_annotations) 84 | precisions.append(average_precision) 85 | if self.weighted_average: 86 | self.mean_ap = sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances) 87 | else: 88 | self.mean_ap = sum(precisions) / sum(x > 0 for x in total_instances) 89 | 90 | if self.tensorboard is not None and self.tensorboard.writer is not None: 91 | import tensorflow as tf 92 | summary = tf.Summary() 93 | summary_value = summary.value.add() 94 | summary_value.simple_value = self.mean_ap 95 | summary_value.tag = "mAP" 96 | self.tensorboard.writer.add_summary(summary, epoch) 97 | 98 | logs['mAP'] = self.mean_ap 99 | 100 | if self.verbose == 1: 101 | print('mAP: {:.4f}'.format(self.mean_ap)) 102 | -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/yolo/generators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/FSAF/yolo/generators/__init__.py -------------------------------------------------------------------------------- /TestTimeAugmentation/FSAF/yolo/inference.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import glob 3 | import keras 4 | import numpy as np 5 | import os 6 | import os.path as osp 7 | import tensorflow as tf 8 | import time 9 | 10 | from utils.visualization import draw_box, draw_caption 11 | from utils.colors import label_color 12 | from yolo.model import yolo_body 13 | 14 | 15 | # set tf backend to allow memory to grow, instead of claiming everything 16 | def get_session(): 17 | config = tf.ConfigProto() 18 | config.gpu_options.allow_growth = True 19 | return tf.Session(config=config) 20 | 21 | 22 | def preprocess_image(image, image_size=416): 23 | image_height, image_width = image.shape[:2] 24 | if image_height > image_width: 25 | scale = image_size / image_height 26 | resized_height = image_size 27 | resized_width = int(image_width * scale) 28 | else: 29 | scale = image_size / image_width 30 | resized_height = int(image_height * scale) 31 | resized_width = image_size 32 | image = cv2.resize(image, (resized_width, resized_height)) 33 | new_image = np.ones((image_size, image_size, 3), dtype=np.float32) * 128. 34 | offset_h = (image_size - resized_height) // 2 35 | offset_w = (image_size - resized_width) // 2 36 | new_image[offset_h:offset_h + resized_height, offset_w:offset_w + resized_width] = image.astype(np.float32) 37 | new_image /= 255. 38 | return new_image, scale, offset_h, offset_w 39 | 40 | 41 | # use this environment flag to change which GPU to use 42 | os.environ["CUDA_VISIBLE_DEVICES"] = "1" 43 | 44 | # set the modified tf session as backend in keras 45 | keras.backend.set_session(get_session()) 46 | 47 | model_path = 'pascal_18_6.4112_6.5125_0.8319_0.8358.h5' 48 | 49 | model, prediction_model = yolo_body(num_classes=20) 50 | 51 | prediction_model.load_weights(model_path, by_name=True) 52 | 53 | # load label to names mapping for visualization purposes 54 | voc_classes = { 55 | 'aeroplane': 0, 56 | 'bicycle': 1, 57 | 'bird': 2, 58 | 'boat': 3, 59 | 'bottle': 4, 60 | 'bus': 5, 61 | 'car': 6, 62 | 'cat': 7, 63 | 'chair': 8, 64 | 'cow': 9, 65 | 'diningtable': 10, 66 | 'dog': 11, 67 | 'horse': 12, 68 | 'motorbike': 13, 69 | 'person': 14, 70 | 'pottedplant': 15, 71 | 'sheep': 16, 72 | 'sofa': 17, 73 | 'train': 18, 74 | 'tvmonitor': 19 75 | } 76 | labels_to_names = {} 77 | for key, value in voc_classes.items(): 78 | labels_to_names[value] = key 79 | # load image 80 | image_paths = glob.glob('datasets/voc_test/VOC2007/JPEGImages/*.jpg') 81 | for image_path in image_paths: 82 | print('Handling {}'.format(image_path)) 83 | image = cv2.imread(image_path) 84 | 85 | # copy to draw on 86 | draw = image.copy() 87 | 88 | # preprocess image for network 89 | image, scale, offset_h, offset_w = preprocess_image(image) 90 | 91 | # process image 92 | start = time.time() 93 | # locations, feature_shapes = model.predict_on_batch(np.expand_dims(image, axis=0)) 94 | boxes, scores, labels = prediction_model.predict_on_batch(np.expand_dims(image, axis=0)) 95 | print("processing time: ", time.time() - start) 96 | 97 | # correct boxes for image scale 98 | boxes[0, :, [0, 2]] -= offset_w 99 | boxes[0, :, [1, 3]] -= offset_h 100 | boxes /= scale 101 | 102 | labels_to_locations = {} 103 | # visualize detections 104 | for box, score, label in zip(boxes[0], scores[0], labels[0]): 105 | # scores are sorted so we can break 106 | if score < 0.5: 107 | break 108 | start_x = int(box[0]) 109 | start_y = int(box[1]) 110 | end_x = int(box[2]) 111 | end_y = int(box[3]) 112 | color = label_color(label) 113 | 114 | b = box.astype(int) 115 | draw_box(draw, b, color=color) 116 | 117 | caption = "{} {:.3f}".format(labels_to_names[label], score) 118 | draw_caption(draw, b, caption) 119 | 120 | cv2.namedWindow('image', cv2.WINDOW_NORMAL) 121 | cv2.imshow('image', draw) 122 | key = cv2.waitKey(0) 123 | if int(key) == 121: 124 | image_fname = osp.split(image_path)[-1] 125 | cv2.imwrite('test/{}'.format(image_fname), draw) 126 | -------------------------------------------------------------------------------- /TestTimeAugmentation/README.md: -------------------------------------------------------------------------------- 1 | # Ensemble of models and test-time augmentation 2 | 3 | This code allows anyone to apply the ensemble of models and test-time augmentation. 4 | -------------------------------------------------------------------------------- /TestTimeAugmentation/__pycache__/detect.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/__pycache__/detect.cpython-36.pyc -------------------------------------------------------------------------------- /TestTimeAugmentation/__pycache__/ensemble.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/__pycache__/ensemble.cpython-36.pyc -------------------------------------------------------------------------------- /TestTimeAugmentation/__pycache__/ensembleOptions.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/__pycache__/ensembleOptions.cpython-36.pyc -------------------------------------------------------------------------------- /TestTimeAugmentation/__pycache__/function.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/__pycache__/function.cpython-36.pyc -------------------------------------------------------------------------------- /TestTimeAugmentation/__pycache__/generateXML.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/__pycache__/generateXML.cpython-36.pyc -------------------------------------------------------------------------------- /TestTimeAugmentation/__pycache__/predict_batch.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/__pycache__/predict_batch.cpython-36.pyc -------------------------------------------------------------------------------- /TestTimeAugmentation/__pycache__/predict_batch_rcnn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/__pycache__/predict_batch_rcnn.cpython-36.pyc -------------------------------------------------------------------------------- /TestTimeAugmentation/__pycache__/predict_batch_retinanet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/__pycache__/predict_batch_retinanet.cpython-36.pyc -------------------------------------------------------------------------------- /TestTimeAugmentation/__pycache__/techniques.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/__pycache__/techniques.cpython-36.pyc -------------------------------------------------------------------------------- /TestTimeAugmentation/__pycache__/testTimeAugmentation.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/__pycache__/testTimeAugmentation.cpython-36.pyc -------------------------------------------------------------------------------- /TestTimeAugmentation/ensembleOptions.py: -------------------------------------------------------------------------------- 1 | import ensemble 2 | import argparse 3 | import numpy as np 4 | import generateXML 5 | import predict_batch 6 | import glob 7 | from lxml import etree 8 | import os 9 | import math 10 | 11 | 12 | 13 | 14 | def ensembleOptions(datasetPath, option): 15 | 16 | #we get a list that contains as many pairs as there are xmls in the first folder, 17 | #these pairs indicate first the name of the xml file and then contain a list with all the objects of the xmls 18 | boxes = ensemble.listarCuadrados(datasetPath) 19 | 20 | #we separate by images and we get a list that groups the objects by the iou> 0.5 21 | for nombre,lis in boxes: 22 | pick = [] 23 | resul = [] 24 | 25 | #we check if the output folder where we are going to store the xmls exists 26 | if os.path.exists(datasetPath+"/output") == False: 27 | os.mkdir(datasetPath+"/output") 28 | 29 | #we look for the width, height and depth of the image 30 | fichIguales = glob.glob(datasetPath + '/*/' + nombre+'.xml') 31 | file = open(datasetPath+"/output/"+nombre+".xml", "w") 32 | numFich = len(fichIguales) 33 | doc = etree.parse(fichIguales[0]) 34 | filename = doc.getroot() # we look for the root of our xml 35 | wI = filename.find("size").find("width").text 36 | hI = filename.find("size").find("height").text 37 | d = filename.find("size").find("depth").text 38 | box = ensemble.uneBoundingBoxes(lis) 39 | #now we pass the non-maximunSupression to each list within the list obtained 40 | for rectangles in box: 41 | lista = [] 42 | 43 | for rc in rectangles: 44 | lista.append(rc) 45 | pick = [] 46 | 47 | if option == 'consensus': 48 | if len(np.array(lista))>=math.ceil(numFich/2):#if the number of boxes is greater than half the number of files 49 | pick,prob = ensemble.nonMaximumSuppression(np.array(lista), 0.3) 50 | pick[0][5] = prob/numFich 51 | 52 | 53 | elif option == 'unanimous': 54 | if len(np.array(lista))==numFich:#if the number of boxes is greater than half the number of files 55 | pick,prob = ensemble.nonMaximumSuppression(np.array(lista), 0.3) 56 | pick[0][5] = prob / numFich 57 | 58 | elif option == 'affirmative': 59 | pick,prob = ensemble.nonMaximumSuppression(np.array(lista), 0.3) 60 | pick[0][5] = prob / numFich 61 | 62 | if len(pick)!=0: 63 | resul.append(list(pick[0])) 64 | file.write(generateXML.generateXML(nombre, "", wI, hI, d, resul)) 65 | file.close() 66 | -------------------------------------------------------------------------------- /TestTimeAugmentation/function.py: -------------------------------------------------------------------------------- 1 | from clodsa.augmentors.augmentorFactory import createAugmentor 2 | from clodsa.transformers.transformerFactory import transformerGenerator 3 | import techniques 4 | import os 5 | 6 | def clasification(imgFolder,technique): 7 | technique2 = technique 8 | technique = techniques.techniques[technique][0] 9 | augmentor = createAugmentor("classification","folders","folders","linear",imgFolder, 10 | {"outputPath":imgFolder+'/../salida/'+technique2+"/"}) 11 | transformer = transformerGenerator("classification") 12 | augmentor.addTransformer(transformer(technique)) 13 | augmentor.applyAugmentation() 14 | 15 | 16 | 17 | 18 | 19 | def detection(imgFolder,technique): 20 | technique2 = technique 21 | technique = techniques.techniques[technique][1] 22 | os.mkdir(imgFolder+"/tmp") 23 | augmentor = createAugmentor("detection","pascalvoc","pascalvoc","linear",imgFolder, 24 | {"outputPath":imgFolder+"/tmp"}) 25 | transformer = transformerGenerator("detection") 26 | augmentor.addTransformer(transformer(technique)) 27 | augmentor.applyAugmentation() -------------------------------------------------------------------------------- /TestTimeAugmentation/generateXML.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | import xml.etree.ElementTree as ET 3 | from xml.dom import minidom 4 | 5 | 6 | def prettify(elem): 7 | """Return a pretty-printed XML string for the Element. 8 | """ 9 | rough_string = ET.tostring(elem, 'utf-8') 10 | reparsed = minidom.parseString(rough_string) 11 | return reparsed.toprettyxml(indent=" ") 12 | 13 | def generateXML(filename,outputPath,w,h,d,boxes): 14 | top = ET.Element('annotation') 15 | childFolder = ET.SubElement(top, 'folder') 16 | childFolder.text = 'images' 17 | childFilename = ET.SubElement(top, 'filename') 18 | childFilename.text = filename[0:filename.rfind(".")] 19 | childPath = ET.SubElement(top, 'path') 20 | childPath.text = outputPath + "/" + filename 21 | childSource = ET.SubElement(top, 'source') 22 | childDatabase = ET.SubElement(childSource, 'database') 23 | childDatabase.text = 'Unknown' 24 | childSize = ET.SubElement(top, 'size') 25 | childWidth = ET.SubElement(childSize, 'width') 26 | childWidth.text = str(w) 27 | childHeight = ET.SubElement(childSize, 'height') 28 | childHeight.text = str(h) 29 | childDepth = ET.SubElement(childSize, 'depth') 30 | childDepth.text = str(d) 31 | childSegmented = ET.SubElement(top, 'segmented') 32 | childSegmented.text = str(0) 33 | for box in boxes: 34 | category = box[0] 35 | (x,y,xmax,ymax) = box[1:5] 36 | childObject = ET.SubElement(top, 'object') 37 | childName = ET.SubElement(childObject, 'name') 38 | childName.text = category 39 | childPose = ET.SubElement(childObject, 'pose') 40 | childPose.text = 'Unspecified' 41 | childTruncated = ET.SubElement(childObject, 'truncated') 42 | childTruncated.text = '0' 43 | childDifficult = ET.SubElement(childObject, 'difficult') 44 | childDifficult.text = '0' 45 | childConfidence = ET.SubElement(childObject, 'confidence') 46 | childConfidence.text = box[5] 47 | childBndBox = ET.SubElement(childObject, 'bndbox') 48 | childXmin = ET.SubElement(childBndBox, 'xmin') 49 | childXmin.text = str(x[0:-2]) 50 | childYmin = ET.SubElement(childBndBox, 'ymin') 51 | childYmin.text = str(y[0:-2]) 52 | childXmax = ET.SubElement(childBndBox, 'xmax') 53 | childXmax.text = str(xmax[0:-2]) 54 | childYmax = ET.SubElement(childBndBox, 'ymax') 55 | childYmax.text = str(ymax[0:-2]) 56 | return prettify(top) 57 | 58 | 59 | -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/README.md: -------------------------------------------------------------------------------- 1 | # keras-fcos 2 | This is an implementation of [FCOS](https://arxiv.org/abs/1904.01355) on keras and Tensorflow. The project is based on [fizyr/keras-retinanet](https://github.com/fizyr/keras-retinanet) 3 | and [tianzhi0549/FCOS](https://github.com/tianzhi0549/FCOS). 4 | Thanks for their hard work. 5 | ## Test 6 | 1. I trained on Pascal VOC2012 trainval.txt + Pascal VOC2007 train.txt, and validated on Pascal VOC2007 val.txt. There are 14041 images for training and 2510 images for validation. 7 | 2. The best evaluation results on VOC2007 test are (score_threshold=0.05): 8 | 9 | | backbone | mAP50 | 10 | | ---- | ---- | 11 | | resnet50 | 0.6892 | 12 | | resnet101 | 0.7352 | 13 | 14 | 3. Pretrained model is here. [baidu netdisk](https://pan.baidu.com/s/1Gq3CGPltUumd3JwaCagbGg) extract code: yr8k 15 | 4. `python3 inference.py` to test your image by specifying image path and model path there. 16 | 17 | ![image1](test/005360.jpg) 18 | ![image2](test/2012_000949.jpg) 19 | ![image3](test/2010_003345.jpg) 20 | 21 | 22 | ## Train 23 | ### build dataset (Pascal VOC, other types please refer to [fizyr/keras-retinanet](https://github.com/fizyr/keras-retinanet)) 24 | * Download VOC2007 and VOC2012, copy all image files from VOC2007 to VOC2012. 25 | * Append VOC2007 train.txt to VOC2012 trainval.txt. 26 | * Overwrite VOC2012 val.txt by VOC2007 val.txt. 27 | ### train 28 | * `python3 train.py --backbone resnet50 --gpu 0 pascal datasets/VOC2012` to start training. 29 | ## Evaluate 30 | * `python3 utils/eval.py` to evaluate by specifying model path there. 31 | -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/kerasfcos/__init__.py -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/generators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/kerasfcos/generators/__init__.py -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/generators/test_generator.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from generators.voc_generator import PascalVocGenerator 4 | from utils.transform import random_transform_generator 5 | from utils.image import random_visual_effect_generator 6 | from utils.image import preprocess_image 7 | 8 | 9 | def show_annotations(): 10 | generator = PascalVocGenerator(data_dir='datasets/voc_trainval/VOC0712', set_name='val') 11 | for image_group, annotation_group, targets in generator: 12 | locations = targets[0] 13 | batch_regr_targets = targets[1] 14 | batch_cls_targets = targets[2] 15 | batch_centerness_targets = targets[3] 16 | for image, annotation, regr_targets, cls_targets, centerness_targets in zip(image_group, annotation_group, 17 | batch_regr_targets, 18 | batch_cls_targets, 19 | batch_centerness_targets): 20 | gt_boxes = annotation['bboxes'] 21 | for gt_box in gt_boxes: 22 | gt_xmin, gt_ymin, gt_xmax, gt_ymax = gt_box 23 | cv2.rectangle(image, (int(gt_xmin), int(gt_ymin)), (int(gt_xmax), int(gt_ymax)), (0, 255, 0), 2) 24 | pos_indices = np.where(centerness_targets[:, 1] == 1)[0] 25 | for pos_index in pos_indices: 26 | cx, cy = locations[pos_index] 27 | l, t, r, b, *_ = regr_targets[pos_index] 28 | xmin = cx - l 29 | ymin = cy - t 30 | xmax = cx + r 31 | ymax = cy + b 32 | class_id = np.argmax(cls_targets[pos_index]) 33 | centerness = centerness_targets[pos_index][0] 34 | # cv2.putText(image, '{:.2f}'.format(centerness), (cx, cy), cv2.FONT_HERSHEY_SIMPLEX, 2.0, (255, 0, 255), 2) 35 | cv2.putText(image, str(class_id), (xmin, ymin), cv2.FONT_HERSHEY_SIMPLEX, 2.0, (0, 0, 0), 3) 36 | cv2.circle(image, (round(cx), round(cy)), 5, (255, 0, 0), -1) 37 | cv2.rectangle(image, (round(xmin), round(ymin)), (round(xmax), round(ymax)), (0, 0, 255), 2) 38 | cv2.namedWindow('image', cv2.WINDOW_NORMAL) 39 | cv2.imshow('image', image) 40 | cv2.waitKey(0) 41 | 42 | 43 | def verify_no_negative_regr(): 44 | transform_generator = random_transform_generator( 45 | min_rotation=-0.1, 46 | max_rotation=0.1, 47 | min_translation=(-0.1, -0.1), 48 | max_translation=(0.1, 0.1), 49 | min_shear=-0.1, 50 | max_shear=0.1, 51 | min_scaling=(0.9, 0.9), 52 | max_scaling=(1.1, 1.1), 53 | flip_x_chance=0.5, 54 | flip_y_chance=0.5, 55 | ) 56 | visual_effect_generator = random_visual_effect_generator( 57 | contrast_range=(0.9, 1.1), 58 | brightness_range=(-.1, .1), 59 | hue_range=(-0.05, 0.05), 60 | saturation_range=(0.95, 1.05) 61 | ) 62 | common_args = { 63 | 'batch_size': 1, 64 | 'image_min_side': 800, 65 | 'image_max_side': 1333, 66 | 'preprocess_image': preprocess_image, 67 | } 68 | generator = PascalVocGenerator( 69 | 'datasets/voc_trainval/VOC0712', 70 | 'trainval', 71 | transform_generator=transform_generator, 72 | visual_effect_generator=visual_effect_generator, 73 | skip_difficult=True, 74 | **common_args 75 | ) 76 | i = 0 77 | for image_group, targets in generator: 78 | i += 1 79 | if i > 20000: 80 | break 81 | 82 | 83 | verify_no_negative_regr() 84 | -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/inference.py: -------------------------------------------------------------------------------- 1 | # import keras 2 | import keras 3 | import models 4 | from utils.image import read_image_bgr, preprocess_image, resize_image 5 | from utils.visualization import draw_box, draw_caption 6 | from utils.colors import label_color 7 | 8 | # import miscellaneous modules 9 | import cv2 10 | import os 11 | import os.path as osp 12 | import numpy as np 13 | import time 14 | import glob 15 | 16 | # set tf backend to allow memory to grow, instead of claiming everything 17 | import tensorflow as tf 18 | from utils.anchors import guess_shapes, compute_locations 19 | 20 | 21 | def get_session(): 22 | config = tf.ConfigProto() 23 | config.gpu_options.allow_growth = True 24 | return tf.Session(config=config) 25 | 26 | 27 | # use this environment flag to change which GPU to use 28 | os.environ["CUDA_VISIBLE_DEVICES"] = "1" 29 | 30 | # set the modified tf session as backend in keras 31 | keras.backend.tensorflow_backend.set_session(get_session()) 32 | # adjust this to point to your downloaded/trained model 33 | # models can be downloaded here: https://github.com/fizyr/keras-retinanet/releases 34 | model_path = '/home/adam/workspace/github/xuannianz/carrot/fcos/snapshots/2019-08-25/resnet101_pascal_07_0.7352.h5' 35 | 36 | # load retinanet model 37 | model = models.load_model(model_path, backbone_name='resnet101') 38 | 39 | # if the model is not converted to an inference model, use the line below 40 | # see: https://github.com/fizyr/keras-retinanet#converting-a-training-model-to-inference-model 41 | model = models.convert_model(model) 42 | 43 | # load label to names mapping for visualization purposes 44 | voc_classes = { 45 | 'aeroplane': 0, 46 | 'bicycle': 1, 47 | 'bird': 2, 48 | 'boat': 3, 49 | 'bottle': 4, 50 | 'bus': 5, 51 | 'car': 6, 52 | 'cat': 7, 53 | 'chair': 8, 54 | 'cow': 9, 55 | 'diningtable': 10, 56 | 'dog': 11, 57 | 'horse': 12, 58 | 'motorbike': 13, 59 | 'person': 14, 60 | 'pottedplant': 15, 61 | 'sheep': 16, 62 | 'sofa': 17, 63 | 'train': 18, 64 | 'tvmonitor': 19 65 | } 66 | labels_to_names = {} 67 | for key, value in voc_classes.items(): 68 | labels_to_names[value] = key 69 | # load image 70 | image_paths = glob.glob('datasets/VOC0712/JPEGImages/*.jpg') 71 | for image_path in image_paths: 72 | image = read_image_bgr(image_path) 73 | 74 | # copy to draw on 75 | draw = image.copy() 76 | 77 | # preprocess image for network 78 | image = preprocess_image(image) 79 | image, scale = resize_image(image) 80 | 81 | feature_shapes = guess_shapes(image.shape) 82 | print(feature_shapes) 83 | locations = compute_locations(feature_shapes) 84 | for location in locations: 85 | print(location.shape) 86 | # process image 87 | start = time.time() 88 | # locations, feature_shapes = model.predict_on_batch(np.expand_dims(image, axis=0)) 89 | boxes, scores, labels = model.predict_on_batch(np.expand_dims(image, axis=0)) 90 | print("processing time: ", time.time() - start) 91 | 92 | # correct for image scale 93 | boxes /= scale 94 | labels_to_locations = {} 95 | # visualize detections 96 | for box, score, label in zip(boxes[0], scores[0], labels[0]): 97 | # scores are sorted so we can break 98 | if score < 0.5: 99 | break 100 | start_x = int(box[0]) 101 | start_y = int(box[1]) 102 | end_x = int(box[2]) 103 | end_y = int(box[3]) 104 | color = label_color(label) 105 | 106 | b = box.astype(int) 107 | draw_box(draw, b, color=color) 108 | 109 | caption = "{} {:.3f}".format(labels_to_names[label], score) 110 | draw_caption(draw, b, caption) 111 | 112 | cv2.namedWindow('image', cv2.WINDOW_NORMAL) 113 | cv2.imshow('image', draw) 114 | key = cv2.waitKey(0) 115 | if int(key) == 121: 116 | image_fname = osp.split(image_path)[-1] 117 | cv2.imwrite('test/{}'.format(image_fname), draw) 118 | -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/initializers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import keras 18 | 19 | import numpy as np 20 | import math 21 | 22 | 23 | class PriorProbability(keras.initializers.Initializer): 24 | """ Apply a prior probability to the weights. 25 | """ 26 | 27 | def __init__(self, probability=0.01): 28 | self.probability = probability 29 | 30 | def get_config(self): 31 | return { 32 | 'probability': self.probability 33 | } 34 | 35 | def __call__(self, shape, dtype=None): 36 | # set bias to -log((1 - p)/p) for foreground 37 | result = np.ones(shape, dtype=dtype) * -math.log((1 - self.probability) / self.probability) 38 | 39 | return result 40 | -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/losses.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import tensorflow as tf 18 | import keras.backend as K 19 | 20 | 21 | def focal(alpha=0.25, gamma=2.0): 22 | """ 23 | Create a functor for computing the focal loss. 24 | 25 | Args 26 | alpha: Scale the focal weight with alpha. 27 | gamma: Take the power of the focal weight with gamma. 28 | 29 | Returns 30 | A functor that computes the focal loss using the alpha and gamma. 31 | """ 32 | 33 | def _focal(y_true, y_pred): 34 | """ 35 | Compute the focal loss given the target tensor and the predicted tensor. 36 | 37 | As defined in https://arxiv.org/abs/1708.02002 38 | 39 | Args 40 | y_true: Tensor of target data from the generator with shape (B, N, num_classes). 41 | y_pred: Tensor of predicted data from the network with shape (B, N, num_classes). 42 | 43 | Returns 44 | The focal loss of y_pred w.r.t. y_true. 45 | """ 46 | # compute the focal loss 47 | location_state = y_true[:, :, -1] 48 | labels = y_true[:, :, :-1] 49 | alpha_factor = K.ones_like(labels) * alpha 50 | alpha_factor = tf.where(K.equal(labels, 1), alpha_factor, 1 - alpha_factor) 51 | # (1 - 0.99) ** 2 = 1e-4, (1 - 0.9) ** 2 = 1e-2 52 | focal_weight = tf.where(K.equal(labels, 1), 1 - y_pred, y_pred) 53 | focal_weight = alpha_factor * focal_weight ** gamma 54 | cls_loss = focal_weight * K.binary_crossentropy(labels, y_pred) 55 | 56 | # compute the normalizer: the number of positive anchors 57 | normalizer = tf.where(K.equal(location_state, 1)) 58 | normalizer = K.cast(K.shape(normalizer)[0], K.floatx()) 59 | normalizer = K.maximum(K.cast_to_floatx(1.0), normalizer) 60 | 61 | return K.sum(cls_loss) / normalizer 62 | 63 | return _focal 64 | 65 | 66 | def iou(): 67 | def iou_(y_true, y_pred): 68 | location_state = y_true[:, :, -1] 69 | indices = tf.where(K.equal(location_state, 1)) 70 | if tf.size(indices) == 0: 71 | return tf.constant(0.0) 72 | y_regr_pred = tf.gather_nd(y_pred, indices) 73 | y_true = tf.gather_nd(y_true, indices) 74 | y_regr_true = y_true[:, :4] 75 | y_centerness_true = y_true[:, 4] 76 | 77 | # (num_pos, ) 78 | pred_left = y_regr_pred[:, 0] 79 | pred_top = y_regr_pred[:, 1] 80 | pred_right = y_regr_pred[:, 2] 81 | pred_bottom = y_regr_pred[:, 3] 82 | 83 | # (num_pos, ) 84 | target_left = y_regr_true[:, 0] 85 | target_top = y_regr_true[:, 1] 86 | target_right = y_regr_true[:, 2] 87 | target_bottom = y_regr_true[:, 3] 88 | 89 | target_area = (target_left + target_right) * (target_top + target_bottom) 90 | pred_area = (pred_left + pred_right) * (pred_top + pred_bottom) 91 | w_intersect = tf.minimum(pred_left, target_left) + tf.minimum(pred_right, target_right) 92 | h_intersect = tf.minimum(pred_bottom, target_bottom) + tf.minimum(pred_top, target_top) 93 | 94 | area_intersect = w_intersect * h_intersect 95 | area_union = target_area + pred_area - area_intersect 96 | 97 | # (num_pos, ) 98 | losses = -tf.log((area_intersect + 1.0) / (area_union + 1.0)) 99 | losses = tf.reduce_sum(losses * y_centerness_true) / (tf.reduce_sum(y_centerness_true) + 1e-8) 100 | return losses 101 | 102 | return iou_ 103 | 104 | 105 | def bce(): 106 | def bce_(y_true, y_pred): 107 | location_state = y_true[:, :, -1] 108 | indices = tf.where(K.equal(location_state, 1)) 109 | if tf.size(indices) == 0: 110 | return tf.constant(0.0) 111 | y_centerness_pred = tf.gather_nd(y_pred, indices) 112 | y_true = tf.gather_nd(y_true, indices) 113 | y_centerness_true = y_true[:, 0:1] 114 | loss = K.switch(tf.size(y_centerness_true) > 0, 115 | K.binary_crossentropy(target=y_centerness_true, output=y_centerness_pred), 116 | tf.constant(0.0)) 117 | loss = K.mean(loss) 118 | return loss 119 | 120 | return bce_ 121 | -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/models/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import sys 3 | sys.path.append("../") 4 | from .. import layers 5 | from .. import losses 6 | from .. import initializers 7 | 8 | 9 | class Backbone(object): 10 | """ This class stores additional information on backbones. 11 | """ 12 | 13 | def __init__(self, backbone): 14 | # a dictionary mapping custom layer names to the correct classes 15 | self.custom_objects = { 16 | 'UpsampleLike': layers.UpsampleLike, 17 | 'PriorProbability': initializers.PriorProbability, 18 | 'RegressBoxes': layers.RegressBoxes, 19 | 'FilterDetections': layers.FilterDetections, 20 | 'Anchors': layers.Anchors, 21 | 'ClipBoxes': layers.ClipBoxes, 22 | '_focal': losses.focal(), 23 | 'bce_': losses.bce(), 24 | 'iou_': losses.iou(), 25 | } 26 | 27 | self.backbone = backbone 28 | self.validate() 29 | 30 | def retinanet(self, *args, **kwargs): 31 | """ 32 | Returns a retinanet model using the correct backbone. 33 | """ 34 | raise NotImplementedError('retinanet method not implemented.') 35 | 36 | def download_imagenet(self): 37 | """ 38 | Downloads ImageNet weights and returns path to weights file. 39 | """ 40 | raise NotImplementedError('download_imagenet method not implemented.') 41 | 42 | def validate(self): 43 | """ 44 | Checks whether the backbone string is correct. 45 | """ 46 | raise NotImplementedError('validate method not implemented.') 47 | 48 | def preprocess_image(self, inputs): 49 | """ 50 | Takes as input an image and prepares it for being passed through the network. 51 | Having this function in Backbone allows other backbones to define a specific preprocessing step. 52 | """ 53 | raise NotImplementedError('preprocess_image method not implemented.') 54 | 55 | 56 | def backbone(backbone_name): 57 | """ Returns a backbone object for the given backbone. 58 | """ 59 | if 'resnet' in backbone_name: 60 | from .resnet import ResNetBackbone as b 61 | elif 'mobilenet' in backbone_name: 62 | from .mobilenet import MobileNetBackbone as b 63 | elif 'vgg' in backbone_name: 64 | from .vgg import VGGBackbone as b 65 | elif 'densenet' in backbone_name: 66 | from .densenet import DenseNetBackbone as b 67 | else: 68 | raise NotImplementedError('Backbone class for \'{}\' not implemented.'.format(backbone)) 69 | 70 | return b(backbone_name) 71 | 72 | 73 | def load_model(filepath, backbone_name='resnet50'): 74 | """ Loads a retinanet model using the correct custom objects. 75 | 76 | Args 77 | filepath: one of the following: 78 | - string, path to the saved model, or 79 | - h5py.File object from which to load the model 80 | backbone_name : Backbone with which the model was trained. 81 | 82 | Returns 83 | A keras.models.Model object. 84 | 85 | Raises 86 | ImportError: if h5py is not available. 87 | ValueError: In case of an invalid savefile. 88 | """ 89 | import keras.models 90 | return keras.models.load_model(filepath, custom_objects=backbone(backbone_name).custom_objects) 91 | 92 | 93 | def convert_model(model, nms=True, class_specific_filter=True, anchor_params=None): 94 | """ Converts a training model to an inference model. 95 | 96 | Args 97 | model : A retinanet training model. 98 | nms : Boolean, whether to add NMS filtering to the converted model. 99 | class_specific_filter : Whether to use class specific filtering or filter for the best scoring class only. 100 | anchor_params : Anchor parameters object. If omitted, default values are used. 101 | 102 | Returns 103 | A keras.models.Model object. 104 | 105 | Raises 106 | ImportError: if h5py is not available. 107 | ValueError: In case of an invalid savefile. 108 | """ 109 | from .retinanet import retinanet_bbox 110 | return retinanet_bbox(model=model, nms=nms, class_specific_filter=class_specific_filter, 111 | anchor_params=anchor_params) 112 | 113 | 114 | def assert_training_model(model): 115 | """ 116 | Assert that the model is a training model. 117 | """ 118 | # 在 __build_model_pyramid 中的 Concatenate 层中指定了 model_name, regression 或者 classification 119 | assert (all(output in model.output_names for output in ['regression', 'classification'])), \ 120 | "Input is not a training model (no 'regression' and 'classification' outputs were found, outputs are: {}).".format( 121 | model.output_names) 122 | 123 | 124 | def check_training_model(model): 125 | """ 126 | Check that model is a training model and exit otherwise. 127 | """ 128 | try: 129 | assert_training_model(model) 130 | except AssertionError as e: 131 | print(e, file=sys.stderr) 132 | sys.exit(1) 133 | -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/models/densenet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2018 vidosits (https://github.com/vidosits/) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import keras 18 | from keras.applications import densenet 19 | from keras.utils import get_file 20 | 21 | from . import retinanet 22 | from . import Backbone 23 | from utils.image import preprocess_image 24 | 25 | 26 | allowed_backbones = { 27 | 'densenet121': ([6, 12, 24, 16], densenet.DenseNet121), 28 | 'densenet169': ([6, 12, 32, 32], densenet.DenseNet169), 29 | 'densenet201': ([6, 12, 48, 32], densenet.DenseNet201), 30 | } 31 | 32 | 33 | class DenseNetBackbone(Backbone): 34 | """ Describes backbone information and provides utility functions. 35 | """ 36 | 37 | def retinanet(self, *args, **kwargs): 38 | """ Returns a retinanet model using the correct backbone. 39 | """ 40 | return densenet_retinanet(*args, backbone=self.backbone, **kwargs) 41 | 42 | def download_imagenet(self): 43 | """ Download pre-trained weights for the specified backbone name. 44 | This name is in the format {backbone}_weights_tf_dim_ordering_tf_kernels_notop 45 | where backbone is the densenet + number of layers (e.g. densenet121). 46 | For more info check the explanation from the keras densenet script itself: 47 | https://github.com/keras-team/keras/blob/master/keras/applications/densenet.py 48 | """ 49 | origin = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.8/' 50 | file_name = '{}_weights_tf_dim_ordering_tf_kernels_notop.h5' 51 | 52 | # load weights 53 | if keras.backend.image_data_format() == 'channels_first': 54 | raise ValueError('Weights for "channels_first" format are not available.') 55 | 56 | weights_url = origin + file_name.format(self.backbone) 57 | return get_file(file_name.format(self.backbone), weights_url, cache_subdir='models') 58 | 59 | def validate(self): 60 | """ Checks whether the backbone string is correct. 61 | """ 62 | backbone = self.backbone.split('_')[0] 63 | 64 | if backbone not in allowed_backbones: 65 | raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, allowed_backbones.keys())) 66 | 67 | def preprocess_image(self, inputs): 68 | """ Takes as input an image and prepares it for being passed through the network. 69 | """ 70 | return preprocess_image(inputs, mode='tf') 71 | 72 | 73 | def densenet_retinanet(num_classes, backbone='densenet121', inputs=None, modifier=None, **kwargs): 74 | """ Constructs a retinanet model using a densenet backbone. 75 | 76 | Args 77 | num_classes: Number of classes to predict. 78 | backbone: Which backbone to use (one of ('densenet121', 'densenet169', 'densenet201')). 79 | inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)). 80 | modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example). 81 | 82 | Returns 83 | RetinaNet model with a DenseNet backbone. 84 | """ 85 | # choose default input 86 | if inputs is None: 87 | inputs = keras.layers.Input((None, None, 3)) 88 | 89 | blocks, creator = allowed_backbones[backbone] 90 | model = creator(input_tensor=inputs, include_top=False, pooling=None, weights=None) 91 | 92 | # get last conv layer from the end of each dense block 93 | layer_outputs = [model.get_layer(name='conv{}_block{}_concat'.format(idx + 2, block_num)).output for idx, block_num in enumerate(blocks)] 94 | 95 | # create the densenet backbone 96 | model = keras.models.Model(inputs=inputs, outputs=layer_outputs[1:], name=model.name) 97 | 98 | # invoke modifier if given 99 | if modifier: 100 | model = modifier(model) 101 | 102 | # create the full model 103 | model = retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=model.outputs, **kwargs) 104 | 105 | return model 106 | -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/models/mobilenet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 lvaleriu (https://github.com/lvaleriu/) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import keras 18 | from keras.applications import mobilenet 19 | from keras.utils import get_file 20 | from utils.image import preprocess_image 21 | 22 | from . import retinanet 23 | from . import Backbone 24 | 25 | 26 | class MobileNetBackbone(Backbone): 27 | """ Describes backbone information and provides utility functions. 28 | """ 29 | 30 | allowed_backbones = ['mobilenet128', 'mobilenet160', 'mobilenet192', 'mobilenet224'] 31 | 32 | def retinanet(self, *args, **kwargs): 33 | """ Returns a retinanet model using the correct backbone. 34 | """ 35 | return mobilenet_retinanet(*args, backbone=self.backbone, **kwargs) 36 | 37 | def download_imagenet(self): 38 | """ Download pre-trained weights for the specified backbone name. 39 | This name is in the format mobilenet{rows}_{alpha} where rows is the 40 | imagenet shape dimension and 'alpha' controls the width of the network. 41 | For more info check the explanation from the keras mobilenet script itself. 42 | """ 43 | 44 | alpha = float(self.backbone.split('_')[1]) 45 | rows = int(self.backbone.split('_')[0].replace('mobilenet', '')) 46 | 47 | # load weights 48 | if keras.backend.image_data_format() == 'channels_first': 49 | raise ValueError('Weights for "channels_last" format ' 50 | 'are not available.') 51 | if alpha == 1.0: 52 | alpha_text = '1_0' 53 | elif alpha == 0.75: 54 | alpha_text = '7_5' 55 | elif alpha == 0.50: 56 | alpha_text = '5_0' 57 | else: 58 | alpha_text = '2_5' 59 | 60 | model_name = 'mobilenet_{}_{}_tf_no_top.h5'.format(alpha_text, rows) 61 | weights_url = mobilenet.mobilenet.BASE_WEIGHT_PATH + model_name 62 | weights_path = get_file(model_name, weights_url, cache_subdir='models') 63 | 64 | return weights_path 65 | 66 | def validate(self): 67 | """ Checks whether the backbone string is correct. 68 | """ 69 | backbone = self.backbone.split('_')[0] 70 | 71 | if backbone not in MobileNetBackbone.allowed_backbones: 72 | raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, MobileNetBackbone.allowed_backbones)) 73 | 74 | def preprocess_image(self, inputs): 75 | """ Takes as input an image and prepares it for being passed through the network. 76 | """ 77 | return preprocess_image(inputs, mode='tf') 78 | 79 | 80 | def mobilenet_retinanet(num_classes, backbone='mobilenet224_1.0', inputs=None, modifier=None, **kwargs): 81 | """ Constructs a retinanet model using a mobilenet backbone. 82 | 83 | Args 84 | num_classes: Number of classes to predict. 85 | backbone: Which backbone to use (one of ('mobilenet128', 'mobilenet160', 'mobilenet192', 'mobilenet224')). 86 | inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)). 87 | modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example). 88 | 89 | Returns 90 | RetinaNet model with a MobileNet backbone. 91 | """ 92 | alpha = float(backbone.split('_')[1]) 93 | 94 | # choose default input 95 | if inputs is None: 96 | inputs = keras.layers.Input((None, None, 3)) 97 | 98 | backbone = mobilenet.MobileNet(input_tensor=inputs, alpha=alpha, include_top=False, pooling=None, weights=None) 99 | 100 | # create the full model 101 | layer_names = ['conv_pw_5_relu', 'conv_pw_11_relu', 'conv_pw_13_relu'] 102 | layer_outputs = [backbone.get_layer(name).output for name in layer_names] 103 | backbone = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=backbone.name) 104 | 105 | # invoke modifier if given 106 | if modifier: 107 | backbone = modifier(backbone) 108 | 109 | return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone.outputs, **kwargs) 110 | -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/models/vgg.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 cgratie (https://github.com/cgratie/) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | 18 | import keras 19 | from keras.utils import get_file 20 | 21 | from . import retinanet 22 | from . import Backbone 23 | from utils.image import preprocess_image 24 | 25 | 26 | class VGGBackbone(Backbone): 27 | """ Describes backbone information and provides utility functions. 28 | """ 29 | 30 | def retinanet(self, *args, **kwargs): 31 | """ Returns a retinanet model using the correct backbone. 32 | """ 33 | return vgg_retinanet(*args, backbone=self.backbone, **kwargs) 34 | 35 | def download_imagenet(self): 36 | """ Downloads ImageNet weights and returns path to weights file. 37 | Weights can be downloaded at https://github.com/fizyr/keras-models/releases . 38 | """ 39 | if self.backbone == 'vgg16': 40 | resource = keras.applications.vgg16.vgg16.WEIGHTS_PATH_NO_TOP 41 | checksum = '6d6bbae143d832006294945121d1f1fc' 42 | elif self.backbone == 'vgg19': 43 | resource = keras.applications.vgg19.vgg19.WEIGHTS_PATH_NO_TOP 44 | checksum = '253f8cb515780f3b799900260a226db6' 45 | else: 46 | raise ValueError("Backbone '{}' not recognized.".format(self.backbone)) 47 | 48 | return get_file( 49 | '{}_weights_tf_dim_ordering_tf_kernels_notop.h5'.format(self.backbone), 50 | resource, 51 | cache_subdir='models', 52 | file_hash=checksum 53 | ) 54 | 55 | def validate(self): 56 | """ Checks whether the backbone string is correct. 57 | """ 58 | allowed_backbones = ['vgg16', 'vgg19'] 59 | 60 | if self.backbone not in allowed_backbones: 61 | raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(self.backbone, allowed_backbones)) 62 | 63 | def preprocess_image(self, inputs): 64 | """ Takes as input an image and prepares it for being passed through the network. 65 | """ 66 | return preprocess_image(inputs, mode='caffe') 67 | 68 | 69 | def vgg_retinanet(num_classes, backbone='vgg16', inputs=None, modifier=None, **kwargs): 70 | """ Constructs a retinanet model using a vgg backbone. 71 | 72 | Args 73 | num_classes: Number of classes to predict. 74 | backbone: Which backbone to use (one of ('vgg16', 'vgg19')). 75 | inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)). 76 | modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example). 77 | 78 | Returns 79 | RetinaNet model with a VGG backbone. 80 | """ 81 | # choose default input 82 | if inputs is None: 83 | inputs = keras.layers.Input(shape=(None, None, 3)) 84 | 85 | # create the vgg backbone 86 | if backbone == 'vgg16': 87 | vgg = keras.applications.VGG16(input_tensor=inputs, include_top=False, weights=None) 88 | elif backbone == 'vgg19': 89 | vgg = keras.applications.VGG19(input_tensor=inputs, include_top=False, weights=None) 90 | else: 91 | raise ValueError("Backbone '{}' not recognized.".format(backbone)) 92 | 93 | if modifier: 94 | vgg = modifier(vgg) 95 | 96 | # create the full model 97 | layer_names = ["block3_pool", "block4_pool", "block5_pool"] 98 | layer_outputs = [vgg.get_layer(name).output for name in layer_names] 99 | return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=layer_outputs, **kwargs) 100 | -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/requirements.txt: -------------------------------------------------------------------------------- 1 | Keras==2.2.5 2 | keras-resnet==0.2.0 3 | opencv-contrib-python==3.4.2.17 4 | opencv-python==3.4.2.17 5 | Pillow==6.2.0 6 | tensorflow-gpu==1.15.2 7 | progressbar2 8 | git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI 9 | -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | from setuptools.extension import Extension 3 | from distutils.command.build_ext import build_ext as DistUtilsBuildExt 4 | 5 | 6 | class BuildExtension(setuptools.Command): 7 | description = DistUtilsBuildExt.description 8 | user_options = DistUtilsBuildExt.user_options 9 | boolean_options = DistUtilsBuildExt.boolean_options 10 | help_options = DistUtilsBuildExt.help_options 11 | 12 | def __init__(self, *args, **kwargs): 13 | from setuptools.command.build_ext import build_ext as SetupToolsBuildExt 14 | 15 | # Bypass __setatrr__ to avoid infinite recursion. 16 | self.__dict__['_command'] = SetupToolsBuildExt(*args, **kwargs) 17 | 18 | def __getattr__(self, name): 19 | return getattr(self._command, name) 20 | 21 | def __setattr__(self, name, value): 22 | setattr(self._command, name, value) 23 | 24 | def initialize_options(self, *args, **kwargs): 25 | return self._command.initialize_options(*args, **kwargs) 26 | 27 | def finalize_options(self, *args, **kwargs): 28 | ret = self._command.finalize_options(*args, **kwargs) 29 | import numpy 30 | self.include_dirs.append(numpy.get_include()) 31 | return ret 32 | 33 | def run(self, *args, **kwargs): 34 | return self._command.run(*args, **kwargs) 35 | 36 | 37 | extensions = [ 38 | Extension( 39 | 'utils.compute_overlap', 40 | ['utils/compute_overlap.pyx'] 41 | ), 42 | ] 43 | 44 | setuptools.setup( 45 | cmdclass={'build_ext': BuildExtension}, 46 | packages=setuptools.find_packages(), 47 | ext_modules=extensions, 48 | setup_requires=["cython>=0.28", "numpy>=1.14.0"] 49 | ) 50 | -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/test/005360.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/kerasfcos/test/005360.jpg -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/test/2010_003345.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/kerasfcos/test/2010_003345.jpg -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/test/2012_000949.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/kerasfcos/test/2012_000949.jpg -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/kerasfcos/utils/__init__.py -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/utils/coco_eval.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | from pycocotools.cocoeval import COCOeval 18 | 19 | import keras 20 | import numpy as np 21 | import json 22 | 23 | import progressbar 24 | assert(callable(progressbar.progressbar)), "Using wrong progressbar module, install 'progressbar2' instead." 25 | 26 | 27 | def evaluate_coco(generator, model, threshold=0.05): 28 | """ Use the pycocotools to evaluate a COCO model on a dataset. 29 | 30 | Args 31 | generator : The generator for generating the evaluation data. 32 | model : The model to evaluate. 33 | threshold : The score threshold to use. 34 | """ 35 | # start collecting results 36 | results = [] 37 | image_ids = [] 38 | for index in progressbar.progressbar(range(generator.size()), prefix='COCO evaluation: '): 39 | image = generator.load_image(index) 40 | image = generator.preprocess_image(image) 41 | image, scale = generator.resize_image(image) 42 | 43 | if keras.backend.image_data_format() == 'channels_first': 44 | image = image.transpose((2, 0, 1)) 45 | 46 | # run network 47 | boxes, scores, labels = model.predict_on_batch(np.expand_dims(image, axis=0)) 48 | 49 | # correct boxes for image scale 50 | boxes /= scale 51 | 52 | # change to (x, y, w, h) (MS COCO standard) 53 | boxes[:, :, 2] -= boxes[:, :, 0] 54 | boxes[:, :, 3] -= boxes[:, :, 1] 55 | 56 | # compute predicted labels and scores 57 | for box, score, label in zip(boxes[0], scores[0], labels[0]): 58 | # scores are sorted, so we can break 59 | if score < threshold: 60 | break 61 | 62 | # append detection for each positively labeled class 63 | image_result = { 64 | 'image_id' : generator.image_ids[index], 65 | 'category_id' : generator.label_to_coco_label(label), 66 | 'score' : float(score), 67 | 'bbox' : box.tolist(), 68 | } 69 | 70 | # append detection to results 71 | results.append(image_result) 72 | 73 | # append image to list of processed images 74 | image_ids.append(generator.image_ids[index]) 75 | 76 | if not len(results): 77 | return 78 | 79 | # write output 80 | json.dump(results, open('{}_bbox_results.json'.format(generator.set_name), 'w'), indent=4) 81 | json.dump(image_ids, open('{}_processed_image_ids.json'.format(generator.set_name), 'w'), indent=4) 82 | 83 | # load results in COCO evaluation tool 84 | coco_true = generator.coco 85 | coco_pred = coco_true.loadRes('{}_bbox_results.json'.format(generator.set_name)) 86 | 87 | # run COCO evaluation 88 | coco_eval = COCOeval(coco_true, coco_pred, 'bbox') 89 | coco_eval.params.imgIds = image_ids 90 | coco_eval.evaluate() 91 | coco_eval.accumulate() 92 | coco_eval.summarize() 93 | return coco_eval.stats 94 | -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/utils/colors.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | 4 | def label_color(label): 5 | """ Return a color from a set of predefined colors. Contains 80 colors in total. 6 | 7 | Args 8 | label: The label to get the color for. 9 | 10 | Returns 11 | A list of three values representing a RGB color. 12 | 13 | If no color is defined for a certain label, the color green is returned and a warning is printed. 14 | """ 15 | if label < len(colors): 16 | return colors[label] 17 | else: 18 | warnings.warn('Label {} has no color, returning default.'.format(label)) 19 | return (0, 255, 0) 20 | 21 | 22 | """ 23 | Generated using: 24 | 25 | ``` 26 | colors = [list((matplotlib.colors.hsv_to_rgb([x, 1.0, 1.0]) * 255).astype(int)) for x in np.arange(0, 1, 1.0 / 80)] 27 | shuffle(colors) 28 | pprint(colors) 29 | ``` 30 | """ 31 | colors = [ 32 | [31 , 0 , 255] , 33 | [0 , 159 , 255] , 34 | [255 , 95 , 0] , 35 | [255 , 19 , 0] , 36 | [255 , 0 , 0] , 37 | [255 , 38 , 0] , 38 | [0 , 255 , 25] , 39 | [255 , 0 , 133] , 40 | [255 , 172 , 0] , 41 | [108 , 0 , 255] , 42 | [0 , 82 , 255] , 43 | [0 , 255 , 6] , 44 | [255 , 0 , 152] , 45 | [223 , 0 , 255] , 46 | [12 , 0 , 255] , 47 | [0 , 255 , 178] , 48 | [108 , 255 , 0] , 49 | [184 , 0 , 255] , 50 | [255 , 0 , 76] , 51 | [146 , 255 , 0] , 52 | [51 , 0 , 255] , 53 | [0 , 197 , 255] , 54 | [255 , 248 , 0] , 55 | [255 , 0 , 19] , 56 | [255 , 0 , 38] , 57 | [89 , 255 , 0] , 58 | [127 , 255 , 0] , 59 | [255 , 153 , 0] , 60 | [0 , 255 , 255] , 61 | [0 , 255 , 216] , 62 | [0 , 255 , 121] , 63 | [255 , 0 , 248] , 64 | [70 , 0 , 255] , 65 | [0 , 255 , 159] , 66 | [0 , 216 , 255] , 67 | [0 , 6 , 255] , 68 | [0 , 63 , 255] , 69 | [31 , 255 , 0] , 70 | [255 , 57 , 0] , 71 | [255 , 0 , 210] , 72 | [0 , 255 , 102] , 73 | [242 , 255 , 0] , 74 | [255 , 191 , 0] , 75 | [0 , 255 , 63] , 76 | [255 , 0 , 95] , 77 | [146 , 0 , 255] , 78 | [184 , 255 , 0] , 79 | [255 , 114 , 0] , 80 | [0 , 255 , 235] , 81 | [255 , 229 , 0] , 82 | [0 , 178 , 255] , 83 | [255 , 0 , 114] , 84 | [255 , 0 , 57] , 85 | [0 , 140 , 255] , 86 | [0 , 121 , 255] , 87 | [12 , 255 , 0] , 88 | [255 , 210 , 0] , 89 | [0 , 255 , 44] , 90 | [165 , 255 , 0] , 91 | [0 , 25 , 255] , 92 | [0 , 255 , 140] , 93 | [0 , 101 , 255] , 94 | [0 , 255 , 82] , 95 | [223 , 255 , 0] , 96 | [242 , 0 , 255] , 97 | [89 , 0 , 255] , 98 | [165 , 0 , 255] , 99 | [70 , 255 , 0] , 100 | [255 , 0 , 172] , 101 | [255 , 76 , 0] , 102 | [203 , 255 , 0] , 103 | [204 , 0 , 255] , 104 | [255 , 0 , 229] , 105 | [255 , 133 , 0] , 106 | [127 , 0 , 255] , 107 | [0 , 235 , 255] , 108 | [0 , 255 , 197] , 109 | [255 , 0 , 191] , 110 | [0 , 44 , 255] , 111 | [50 , 255 , 0] 112 | ] 113 | -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/utils/compute_overlap.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/TestTimeAugmentation/kerasfcos/utils/compute_overlap.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/utils/compute_overlap.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | 13 | def compute_overlap( 14 | np.ndarray[double, ndim=2] boxes, 15 | np.ndarray[double, ndim=2] query_boxes 16 | ): 17 | """ 18 | Args 19 | a: (N, 4) ndarray of float 20 | b: (K, 4) ndarray of float 21 | 22 | Returns 23 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 24 | """ 25 | cdef unsigned int N = boxes.shape[0] 26 | cdef unsigned int K = query_boxes.shape[0] 27 | cdef np.ndarray[double, ndim=2] overlaps = np.zeros((N, K), dtype=np.float64) 28 | cdef double iw, ih, box_area 29 | cdef double ua 30 | cdef unsigned int k, n 31 | for k in range(K): 32 | box_area = ( 33 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 34 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 35 | ) 36 | for n in range(N): 37 | iw = ( 38 | min(boxes[n, 2], query_boxes[k, 2]) - 39 | max(boxes[n, 0], query_boxes[k, 0]) + 1 40 | ) 41 | if iw > 0: 42 | ih = ( 43 | min(boxes[n, 3], query_boxes[k, 3]) - 44 | max(boxes[n, 1], query_boxes[k, 1]) + 1 45 | ) 46 | if ih > 0: 47 | ua = np.float64( 48 | (boxes[n, 2] - boxes[n, 0] + 1) * 49 | (boxes[n, 3] - boxes[n, 1] + 1) + 50 | box_area - iw * ih 51 | ) 52 | overlaps[n, k] = iw * ih / ua 53 | return overlaps 54 | -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/utils/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import configparser 18 | import numpy as np 19 | import keras 20 | from utils.anchors import AnchorParameters 21 | 22 | 23 | def read_config_file(config_path): 24 | config = configparser.ConfigParser() 25 | 26 | with open(config_path, 'r') as file: 27 | config.read_file(file) 28 | 29 | assert 'anchor_parameters' in config, \ 30 | "Malformed config file. Verify that it contains the anchor_parameters section." 31 | 32 | config_keys = set(config['anchor_parameters']) 33 | default_keys = set(AnchorParameters.default.__dict__.keys()) 34 | 35 | assert config_keys <= default_keys, \ 36 | "Malformed config file. These keys are not valid: {}".format(config_keys - default_keys) 37 | 38 | return config 39 | 40 | 41 | def parse_anchor_parameters(config): 42 | ratios = np.array(list(map(float, config['anchor_parameters']['ratios'].split(' '))), keras.backend.floatx()) 43 | scales = np.array(list(map(float, config['anchor_parameters']['scales'].split(' '))), keras.backend.floatx()) 44 | sizes = list(map(int, config['anchor_parameters']['sizes'].split(' '))) 45 | strides = list(map(int, config['anchor_parameters']['strides'].split(' '))) 46 | 47 | return AnchorParameters(sizes, strides, ratios, scales) 48 | -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/utils/keras_version.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | from __future__ import print_function 18 | 19 | import keras 20 | import sys 21 | 22 | minimum_keras_version = 2, 2, 4 23 | 24 | 25 | def keras_version(): 26 | """ 27 | Get the Keras version. 28 | 29 | Returns 30 | tuple of (major, minor, patch). 如 (2, 2, 4) 31 | """ 32 | return tuple(map(int, keras.__version__.split('.'))) 33 | 34 | 35 | def keras_version_ok(): 36 | """ 37 | Check if the current Keras version is higher than the minimum version. 38 | """ 39 | return keras_version() >= minimum_keras_version 40 | 41 | 42 | def assert_keras_version(): 43 | """ 44 | Assert that the Keras version is up to date. 45 | """ 46 | detected = keras.__version__ 47 | required = '.'.join(map(str, minimum_keras_version)) 48 | assert(keras_version() >= minimum_keras_version), 'You are using keras version {}. The minimum required version is {}.'.format(detected, required) 49 | 50 | 51 | def check_keras_version(): 52 | """ 53 | Check that the Keras version is up to date. If it isn't, print an error message and exit the script. 54 | """ 55 | try: 56 | assert_keras_version() 57 | except AssertionError as e: 58 | print(e, file=sys.stderr) 59 | sys.exit(1) 60 | -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/utils/model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | 18 | def freeze(model): 19 | """ 20 | Set all layers in a model to non-trainable. 21 | 22 | The weights for these layers will not be updated during training. 23 | 24 | This function modifies the given model in-place, 25 | but it also returns the modified model to allow easy chaining with other functions. 26 | """ 27 | for layer in model.layers: 28 | layer.trainable = False 29 | return model 30 | -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/utils/visualization.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import cv2 18 | import numpy as np 19 | 20 | from .colors import label_color 21 | 22 | 23 | def draw_box(image, box, color, thickness=2): 24 | """ Draws a box on an image with a given color. 25 | 26 | # Arguments 27 | image : The image to draw on. 28 | box : A list of 4 elements (x1, y1, x2, y2). 29 | color : The color of the box. 30 | thickness : The thickness of the lines to draw a box with. 31 | """ 32 | b = np.array(box).astype(int) 33 | cv2.rectangle(image, (b[0], b[1]), (b[2], b[3]), color, thickness, cv2.LINE_AA) 34 | 35 | 36 | def draw_caption(image, box, caption): 37 | """ Draws a caption above the box in an image. 38 | 39 | # Arguments 40 | image : The image to draw on. 41 | box : A list of 4 elements (x1, y1, x2, y2). 42 | caption : String containing the text to draw. 43 | """ 44 | b = np.array(box).astype(int) 45 | ret, baseline = cv2.getTextSize(caption, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) 46 | cv2.rectangle(image, (b[0], b[3] - ret[1] - baseline), (b[0] + ret[0], b[3]), (255, 255, 255), -1) 47 | cv2.putText(image, caption, (b[0], b[3] - baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1) 48 | 49 | 50 | def draw_boxes(image, boxes, color, thickness=2): 51 | """ Draws boxes on an image with a given color. 52 | 53 | # Arguments 54 | image : The image to draw on. 55 | boxes : A [N, 4] matrix (x1, y1, x2, y2). 56 | color : The color of the boxes. 57 | thickness : The thickness of the lines to draw boxes with. 58 | """ 59 | for b in boxes: 60 | draw_box(image, b, color, thickness=thickness) 61 | 62 | 63 | def draw_detections(image, boxes, scores, labels, color=None, label_to_name=None, score_threshold=0.5): 64 | """ Draws detections in an image. 65 | 66 | # Arguments 67 | image : The image to draw on. 68 | boxes : A [N, 4] matrix (x1, y1, x2, y2). 69 | scores : A list of N classification scores. 70 | labels : A list of N labels. 71 | color : The color of the boxes. By default the color from keras_retinanet.utils.colors.label_color will be used. 72 | label_to_name : (optional) Functor for mapping a label to a name. 73 | score_threshold : Threshold used for determining what detections to draw. 74 | """ 75 | selection = np.where(scores > score_threshold)[0] 76 | 77 | for i in selection: 78 | c = color if color is not None else label_color(labels[i]) 79 | draw_box(image, boxes[i, :], color=c) 80 | 81 | # draw labels 82 | caption = (label_to_name(labels[i]) if label_to_name else labels[i]) + ': {0:.2f}'.format(scores[i]) 83 | draw_caption(image, boxes[i, :], caption) 84 | 85 | 86 | def draw_annotations(image, annotations, color=(0, 255, 0), label_to_name=None): 87 | """ Draws annotations in an image. 88 | 89 | # Arguments 90 | image : The image to draw on. 91 | annotations : A [N, 5] matrix (x1, y1, x2, y2, label) or dictionary containing bboxes (shaped [N, 4]) and labels (shaped [N]). 92 | color : The color of the boxes. By default the color from keras_retinanet.utils.colors.label_color will be used. 93 | label_to_name : (optional) Functor for mapping a label to a name. 94 | """ 95 | if isinstance(annotations, np.ndarray): 96 | annotations = {'bboxes': annotations[:, :4], 'labels': annotations[:, 4]} 97 | 98 | assert('bboxes' in annotations) 99 | assert('labels' in annotations) 100 | assert(annotations['bboxes'].shape[0] == annotations['labels'].shape[0]) 101 | 102 | for i in range(annotations['bboxes'].shape[0]): 103 | label = annotations['labels'][i] 104 | c = color if color is not None else label_color(label) 105 | caption = '{}'.format(label_to_name(label) if label_to_name else label) 106 | draw_caption(image, annotations['bboxes'][i], caption) 107 | draw_box(image, annotations['bboxes'][i], color=c) 108 | -------------------------------------------------------------------------------- /TestTimeAugmentation/kerasfcos/utils_graph.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2017-2018 Fizyr (https://fizyr.com) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import keras.backend as K 18 | import tensorflow as tf 19 | 20 | 21 | def bbox_transform_inv(boxes, deltas, mean=None, std=None): 22 | """ 23 | Applies deltas (usually regression results) to boxes (usually anchors). 24 | 25 | Before applying the deltas to the boxes, the normalization that was previously applied (in the generator) has to be removed. 26 | The mean and std are the mean and std as applied in the generator. They are unnormalized in this function and then applied to the boxes. 27 | 28 | Args 29 | boxes: np.array of shape (B, N, 4), where B is the batch size, N the number of boxes and 4 values for (x1, y1, x2, y2). 30 | deltas: np.array of same shape as boxes. These deltas (d_x1, d_y1, d_x2, d_y2) are a factor of the width/height. 31 | mean: The mean value used when computing deltas (defaults to [0, 0, 0, 0]). 32 | std: The standard deviation used when computing deltas (defaults to [0.2, 0.2, 0.2, 0.2]). 33 | 34 | Returns 35 | A np.array of the same shape as boxes, but with deltas applied to each box. 36 | The mean and std are used during training to normalize the regression values (networks love normalization). 37 | """ 38 | if mean is None: 39 | mean = [0, 0, 0, 0] 40 | if std is None: 41 | std = [0.2, 0.2, 0.2, 0.2] 42 | 43 | width = boxes[:, :, 2] - boxes[:, :, 0] 44 | height = boxes[:, :, 3] - boxes[:, :, 1] 45 | 46 | x1 = boxes[:, :, 0] + (deltas[:, :, 0] * std[0] + mean[0]) * width 47 | y1 = boxes[:, :, 1] + (deltas[:, :, 1] * std[1] + mean[1]) * height 48 | x2 = boxes[:, :, 2] + (deltas[:, :, 2] * std[2] + mean[2]) * width 49 | y2 = boxes[:, :, 3] + (deltas[:, :, 3] * std[3] + mean[3]) * height 50 | 51 | pred_boxes = K.stack([x1, y1, x2, y2], axis=2) 52 | 53 | return pred_boxes 54 | 55 | 56 | def shift(shape, stride, anchors): 57 | """ 58 | Produce shifted anchors based on shape of the map and stride size. 59 | 60 | Args 61 | shape: Shape to shift the anchors over. (h,w) 62 | stride: Stride to shift the anchors with over the shape. 63 | anchors: The anchors to apply at each location. 64 | 65 | Returns 66 | shifted_anchors: (fh * fw * num_anchors, 4) 67 | """ 68 | shift_x = (K.arange(0, shape[1], dtype=K.floatx()) + K.constant(0.5, dtype=K.floatx())) * stride 69 | shift_y = (K.arange(0, shape[0], dtype=K.floatx()) + K.constant(0.5, dtype=K.floatx())) * stride 70 | shift_x, shift_y = tf.meshgrid(shift_x, shift_y) 71 | shift_x = K.reshape(shift_x, [-1]) 72 | shift_y = K.reshape(shift_y, [-1]) 73 | 74 | # (4, fh * fw) 75 | shifts = K.stack([ 76 | shift_x, 77 | shift_y, 78 | shift_x, 79 | shift_y 80 | ], axis=0) 81 | # (fh * fw, 4) 82 | shifts = K.transpose(shifts) 83 | number_anchors = K.shape(anchors)[0] 84 | 85 | # number of base points = fh * fw 86 | k = K.shape(shifts)[0] 87 | 88 | # (k=fh*fw, num_anchors, 4) 89 | shifted_anchors = K.reshape(anchors, [1, number_anchors, 4]) + K.cast(K.reshape(shifts, [k, 1, 4]), K.floatx()) 90 | # (k * num_anchors, 4) 91 | shifted_anchors = K.reshape(shifted_anchors, [k * number_anchors, 4]) 92 | 93 | return shifted_anchors 94 | 95 | 96 | def resize_images(images, size, method='bilinear', align_corners=False): 97 | """ See https://www.tensorflow.org/versions/master/api_docs/python/tf/image/resize_images . 98 | 99 | Args 100 | method: The method used for interpolation. One of ('bilinear', 'nearest', 'bicubic', 'area'). 101 | """ 102 | methods = { 103 | 'bilinear': tf.image.ResizeMethod.BILINEAR, 104 | 'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR, 105 | 'bicubic': tf.image.ResizeMethod.BICUBIC, 106 | 'area': tf.image.ResizeMethod.AREA, 107 | } 108 | return tf.image.resize_images(images, size, methods[method], align_corners) 109 | -------------------------------------------------------------------------------- /TestTimeAugmentation/mainModel.py: -------------------------------------------------------------------------------- 1 | import testTimeAugmentation 2 | import function 3 | import os 4 | import shutil 5 | import argparse 6 | import ensembleOptions 7 | from imutils import paths 8 | notebook = True 9 | def models(listaModels,pathImg,option): 10 | # 1. First we create the folder where we will store the resulting images and create as many folders as we have models 11 | 12 | os.mkdir(pathImg+'/../salida') 13 | for model in listaModels: 14 | os.mkdir(pathImg+'/../salida/'+os.path.splitext(os.path.basename(model.pathPesos))[0]) 15 | 16 | # 2. We create a list with the folders we have created 17 | listDirOut = [] 18 | for filename in os.listdir(pathImg+'/../salida'): 19 | if os.path.isdir(pathImg+'/../salida/'+filename) == True: 20 | listDirOut.append(pathImg+'/../salida/'+filename) 21 | 22 | 23 | # 3. we copy the images from the initial folder to each of the created folders 24 | for dire in listDirOut: 25 | for fich in os.listdir(pathImg): 26 | shutil.copy(pathImg+'/'+fich, dire+'/') 27 | 28 | 29 | # 4. Generate xml 30 | for model in listaModels: 31 | #If the model matches the name of the folder, we will predict it is only folder 32 | for dir in os.listdir(pathImg+'/../salida/'): 33 | if (os.path.splitext(os.path.basename(pathImg+'/../salida/'+model.pathPesos))[0]) == dir: 34 | #Then we list the files in that folder 35 | images = os.listdir(pathImg+'/../salida/'+dir) 36 | model.predict(pathImg+'/../salida/'+dir, pathImg+'/../salida/'+dir)#,confidence) 37 | 38 | 39 | 40 | # 5. We perform the ensemble method 41 | for dirOut in os.listdir(pathImg+'/../salida/'): 42 | #for file in list(paths.list_files(pathImg+'/../salida/'+dirOut, validExts=(".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"))): 43 | for file in list(os.scandir(pathImg+'/../salida/'+dirOut)): 44 | if file.name.endswith('.jpg') or file.name.endswith('.jpeg') or file.name.endswith('.png') or file.name.endswith('.tif') or file.name.endswith('.tiff') or file.name.endswith('.bmp'): 45 | os.remove(file) 46 | 47 | ensembleOptions.ensembleOptions(pathImg+'/../salida/', option) 48 | for xml in os.scandir(pathImg + '/../salida/output/'): 49 | shutil.copy(pathImg + '/../salida/output/' + xml.name, pathImg + '/') 50 | if notebook is False: 51 | shutil.rmtree(pathImg+'/../salida/') 52 | 53 | 54 | if __name__== "__main__": 55 | #Enter the path of the folder that will contain the images 56 | ap = argparse.ArgumentParser() 57 | ap.add_argument("-d", "--dataset", required=True, help="path to the dataset of images") 58 | ap.add_argument("-o", "--option", default='consensus', help="option to the ensemble: affirmative, consensus or unanimous") 59 | notebook = False 60 | args = vars(ap.parse_args()) 61 | pathImg= args["dataset"] 62 | 63 | option = args["option"] 64 | 65 | #fichs = os.listdir(pathImg) 66 | 67 | imgFolder = pathImg 68 | #the user define configurations fichs 69 | 70 | yoloDarknet = testTimeAugmentation.DarknetYoloPred('/home/ancasag/Codigo/General/ensembleObjectDetection/peso/AlvaroPrueba1_600train_65000.weights', '../peso/vocEstomas.names','../peso/yolov3Estomas.cfg',0.7) 71 | ssdResnet = testTimeAugmentation.MXnetSSD512Pred('/home/ancasag/Codigo/General/ensembleObjectDetection/peso/ssd_512_resnet50_v1_voc-9c8b225a.params', '../peso/classesMXnet.txt',0.7) 72 | fasterResnet = testTimeAugmentation.MXnetFasterRCNNPred('/home/ancasag/Codigo/General/ensembleObjectDetection/peso/faster_rcnn_resnet50_v1b_voc-447328d8.params', '../peso/classesMXnet.txt',0.7) 73 | yoloResnet = testTimeAugmentation.MXnetYoloPred('/home/ancasag/Codigo/General/ensembleObjectDetection/peso/yolo3_darknet53_voc-f5ece5ce.params', '../peso/classesMXnet.txt',0.7) 74 | retinaResnet50 = testTimeAugmentation.RetinaNetResnet50Pred('/home/ancasag/Codigo/General/ensembleObjectDetection/peso/resnet50_coco_best_v2.1.0.h5', '../peso/coco.csv',0.7) 75 | maskRcnn = testTimeAugmentation.MaskRCNNPred('/home/ancasag/Codigo/General/ensembleObjectDetection/peso/mask_rcnn_coco.h5', '../peso/coco.names',0.7) 76 | 77 | listaModels = [retinaResnet50, maskRcnn,yoloResnet,yoloDarknet,fasterResnet,ssdResnet] 78 | 79 | models(listaModels,pathImg,option) 80 | -------------------------------------------------------------------------------- /TestTimeAugmentation/mainTTA.py: -------------------------------------------------------------------------------- 1 | import testTimeAugmentation 2 | import function 3 | import os 4 | import shutil 5 | import sys 6 | import argparse 7 | import ensembleOptions 8 | from imutils import paths 9 | notebook = True 10 | def tta(model,myTechniques,pathImg,option): 11 | fichs = os.listdir(pathImg) 12 | # 1. Create tmp folder 13 | os.mkdir(pathImg+'/tmp') 14 | # move imgs to tmp 15 | for fich in fichs: 16 | shutil.copy(pathImg+'/'+fich, pathImg+'/tmp') 17 | imgFolder = pathImg 18 | os.mkdir(pathImg+'/../salida') 19 | # 3. Classification 20 | 21 | for technique in myTechniques: 22 | function.clasification(imgFolder,technique) 23 | # we get all the folders we have created 24 | 25 | listDirOut = [] 26 | for filename in os.scandir(pathImg+'/../salida'): 27 | if os.path.isdir(pathImg+'/../salida/'+filename.name) == True: 28 | listDirOut.append(pathImg+'/../salida/'+filename.name) 29 | 30 | 31 | for dir in listDirOut: 32 | for img in os.scandir(dir+'/tmp'): 33 | img1 = img.name[(img.name).find("_")+1:] 34 | img2 = img1[img1.find("_")+1:] 35 | shutil.move(dir+'/tmp/'+img.name, dir+'/'+img2) 36 | os.rmdir(dir+'/tmp') 37 | 38 | # 4. Generate xml 39 | for dir in listDirOut: 40 | model.predict(dir, dir) 41 | 42 | # 5. Detection 43 | for dir in listDirOut: 44 | tec = dir.split("/") 45 | function.detection(dir, tec[len(tec)-1]) 46 | 47 | for dir in listDirOut: 48 | for img in os.listdir(dir): 49 | if os.path.isdir(dir+'/'+img)== False: 50 | os.remove(dir+'/'+img) 51 | for img in os.listdir(dir+'/tmp'): 52 | img1 = img[img.find("_") + 1:] 53 | img2 = img1[img1.find("_") + 1:] 54 | shutil.move(dir+'/tmp/'+img, dir+'/'+img2) 55 | os.rmdir(dir+'/tmp') 56 | 57 | # 6. Ensemble 58 | for dirOut in os.listdir(pathImg+'/../salida/'): 59 | for file in list(paths.list_files(pathImg+'/../salida/'+dirOut, validExts=(".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"))): 60 | os.remove(file) 61 | 62 | ensembleOptions.ensembleOptions(pathImg+'/../salida/', option) 63 | for xml in os.scandir(pathImg + '/../salida/output/'): 64 | shutil.copy(pathImg + '/../salida/output/' + xml.name, pathImg + '/') 65 | if notebook is False: 66 | shutil.rmtree(pathImg+'/../salida/') 67 | shutil.rmtree(pathImg + '/tmp') 68 | 69 | if __name__== "__main__": 70 | #Enter the path of the folder that will contain the images 71 | ap = argparse.ArgumentParser() 72 | ap.add_argument("-d", "--dataset", required=True, help="path to the dataset of images") 73 | ap.add_argument("-o", "--option", default='consensus', help="option to the ensemble: affirmative, consensus or unanimous") 74 | notebook = False 75 | args = vars(ap.parse_args()) 76 | pathImg = args["dataset"] 77 | 78 | option = args["option"] 79 | imgFolder = pathImg 80 | # the user define configurations fichs 81 | yoloDarknet = testTimeAugmentation.DarknetYoloPred('/home/ancasag/Codigo/General/ensembleObjectDetection/peso/yolov3.weights', '/home/ancasag/Codigo/General/ensembleObjectDetection/peso/coco.names','/home/ancasag/Codigo/General/ensembleObjectDetection/peso/yolov3.cfg', 0.7) 82 | # ssdResnet = testTimeAugmentation.MXnetSSD512Pred('weights/ssd_512_resnet50_v1_voc-9c8b225a.params', 'weights/classesMXnet.txt',0.7) 83 | # fasterResnet = testTimeAugmentation.MXnetFasterRCNNPred('weights/Desktop/peso/faster_rcnn_resnet50_v1b_voc-447328d8.params', 'weights/classesMXnet.txt',0.7) 84 | # yoloResnet = testTimeAugmentation.MXnetYoloPred('weights/Desktop/peso/yolo3_darknet53_voc-f5ece5ce.params', 'weights/classesMXnet.txt',0.7) 85 | # retinaResnet50 = testTimeAugmentation.RetinaNetResnet50Pred('weights/resnet50_coco_best_v2.1.0.h5', 'weights/coco.csv',0.7) 86 | # maskRcnn = testTimeAugmentation.MaskRCNNPred('weights/mask_rcnn_coco.h5', 'weights/coco.names',0.7) 87 | 88 | myTechniques = ["histo", "hflip", "none"] 89 | tta(yoloDarknet, myTechniques, pathImg, option) -------------------------------------------------------------------------------- /TestTimeAugmentation/predict_batch.py: -------------------------------------------------------------------------------- 1 | # USAGE 2 | # python predict_batch.py --input logos/images --output output 3 | 4 | # import the necessary packages 5 | import numpy as np 6 | import mxnet as mx 7 | from mxnet import autograd, gluon 8 | import gluoncv as gcv 9 | import xml.etree.ElementTree as ET 10 | from xml.dom import minidom 11 | from imutils import paths 12 | import numpy as np 13 | import argparse 14 | import cv2 15 | import os 16 | 17 | # TODO: 18 | # Allow option for --input to be a .txt file OR a directory. Check if 19 | # file, and if so, presume keras-retinanet set of images + labels 20 | #confidence=0.25 21 | 22 | def mainDataset(dataset,output,name,weights,fichClass,confidence): 23 | classes=[] 24 | f = open(fichClass) 25 | for linea in f: 26 | classes.append(str((linea.rstrip("\n")).strip())) 27 | f.close() 28 | net = gcv.model_zoo.get_model(name, classes=classes, pretrained_base=False) 29 | net.load_parameters(weights) 30 | imagePaths = list(os.scandir(dataset)) 31 | # loop over the input image paths 32 | for (i, imagePath) in enumerate(imagePaths): 33 | # load the input image (in BGR order), clone it, and preprocess it 34 | image = cv2.imread(dataset+'/'+imagePath.name) 35 | (hI, wI, d) = image.shape 36 | # detect objects in the input image and correct for the image scale 37 | x, image = gcv.data.transforms.presets.ssd.load_test(dataset+'/'+imagePath.name,min(wI,hI)) 38 | cid, score, bbox = net(x) 39 | boxes1 = [] 40 | for (box, score, cid) in zip(bbox[0], score[0], cid[0]): 41 | if score < confidence: 42 | continue 43 | boxes1.append((box,score, net.classes[cid[0].asnumpy()[0].astype('int')])) 44 | 45 | # parse the filename from the input image path, construct the 46 | # path to the output image, and write the image to disk 47 | #filename = imagePath.name.split(os.path.sep)[-1] 48 | ext = os.path.splitext(imagePath) 49 | file = open(ext[0] + ".xml", "w") 50 | file.write(generateXML(ext[0], imagePath.name, hI, wI, d, boxes1)) 51 | file.close() 52 | 53 | def prettify(elem): 54 | """Return a pretty-printed XML string for the Element. 55 | """ 56 | rough_string = ET.tostring(elem, 'utf-8') 57 | reparsed = minidom.parseString(rough_string) 58 | return reparsed.toprettyxml(indent=" ") 59 | 60 | def generateXML(filename,outputPath,w,h,d,boxes): 61 | top = ET.Element('annotation') 62 | childFolder = ET.SubElement(top, 'folder') 63 | childFolder.text = 'images' 64 | childFilename = ET.SubElement(top, 'filename') 65 | childFilename.text = filename[0:filename.rfind(".")] 66 | childPath = ET.SubElement(top, 'path') 67 | childPath.text = outputPath + "/" + filename 68 | childSource = ET.SubElement(top, 'source') 69 | childDatabase = ET.SubElement(childSource, 'database') 70 | childDatabase.text = 'Unknown' 71 | childSize = ET.SubElement(top, 'size') 72 | childWidth = ET.SubElement(childSize, 'width') 73 | childWidth.text = str(w) 74 | childHeight = ET.SubElement(childSize, 'height') 75 | childHeight.text = str(h) 76 | childDepth = ET.SubElement(childSize, 'depth') 77 | childDepth.text = str(d) 78 | childSegmented = ET.SubElement(top, 'segmented') 79 | childSegmented.text = str(0) 80 | for (box,score, label) in boxes: 81 | box = box.astype("int") 82 | (x,y,xmax,ymax) = box 83 | childObject = ET.SubElement(top, 'object') 84 | childName = ET.SubElement(childObject, 'name') 85 | childName.text = label 86 | childScore = ET.SubElement(childObject, 'confidence') 87 | childScore.text = str(score.asscalar()) 88 | childPose = ET.SubElement(childObject, 'pose') 89 | childPose.text = 'Unspecified' 90 | childTruncated = ET.SubElement(childObject, 'truncated') 91 | childTruncated.text = '0' 92 | childDifficult = ET.SubElement(childObject, 'difficult') 93 | childDifficult.text = '0' 94 | childBndBox = ET.SubElement(childObject, 'bndbox') 95 | childXmin = ET.SubElement(childBndBox, 'xmin') 96 | childXmin.text = str(max(x.asscalar(),1)) 97 | childYmin = ET.SubElement(childBndBox, 'ymin') 98 | childYmin.text = str(max(y.asscalar(),1)) 99 | childXmax = ET.SubElement(childBndBox, 'xmax') 100 | childXmax.text = str(min(xmax.asscalar(),w-1)) 101 | childYmax = ET.SubElement(childBndBox, 'ymax') 102 | childYmax.text = str(min(ymax.asscalar(),h-1)) 103 | return prettify(top) 104 | 105 | 106 | -------------------------------------------------------------------------------- /TestTimeAugmentation/predict_batch_FCOS.py: -------------------------------------------------------------------------------- 1 | import keras 2 | import kerasfcos.models 3 | from kerasfcos.utils.image import read_image_bgr, preprocess_image, resize_image 4 | from kerasfcos.utils.visualization import draw_box, draw_caption 5 | from kerasfcos.utils.colors import label_color 6 | import xml.etree.ElementTree as ET 7 | from xml.dom import minidom 8 | from imutils import paths 9 | 10 | # import miscellaneous modules 11 | import cv2 12 | import os 13 | import os.path as osp 14 | import numpy as np 15 | import time 16 | import glob 17 | 18 | # set tf backend to allow memory to grow, instead of claiming everything 19 | import tensorflow as tf 20 | from kerasfcos.utils.anchors import guess_shapes, compute_locations 21 | 22 | 23 | def get_session(): 24 | config = tf.ConfigProto() 25 | config.gpu_options.allow_growth = True 26 | return tf.Session(config=config) 27 | 28 | #confidence=0.5 29 | 30 | def prettify(elem): 31 | """Return a pretty-printed XML string for the Element. 32 | """ 33 | rough_string = ET.tostring(elem, 'utf-8') 34 | reparsed = minidom.parseString(rough_string) 35 | return reparsed.toprettyxml(indent=" ") 36 | 37 | 38 | def generateXML(filename,outputPath,w,h,d,boxes,classes): 39 | top = ET.Element('annotation') 40 | childFolder = ET.SubElement(top, 'folder') 41 | childFolder.text = 'images' 42 | childFilename = ET.SubElement(top, 'filename') 43 | childFilename.text = filename[0:filename.rfind(".")] 44 | childPath = ET.SubElement(top, 'path') 45 | childPath.text = outputPath + "/" + filename 46 | childSource = ET.SubElement(top, 'source') 47 | childDatabase = ET.SubElement(childSource, 'database') 48 | childDatabase.text = 'Unknown' 49 | childSize = ET.SubElement(top, 'size') 50 | childWidth = ET.SubElement(childSize, 'width') 51 | childWidth.text = str(w) 52 | childHeight = ET.SubElement(childSize, 'height') 53 | childHeight.text = str(h) 54 | childDepth = ET.SubElement(childSize, 'depth') 55 | childDepth.text = str(d) 56 | childSegmented = ET.SubElement(top, 'segmented') 57 | childSegmented.text = str(0) 58 | for (box,score) in boxes: 59 | category = classes[box[0]] 60 | box = box[1].astype("int") 61 | (x,y,xmax,ymax) = box 62 | childObject = ET.SubElement(top, 'object') 63 | childName = ET.SubElement(childObject, 'name') 64 | childName.text = category 65 | childScore = ET.SubElement(childObject, 'confidence') 66 | childScore.text = str(score) 67 | childPose = ET.SubElement(childObject, 'pose') 68 | childPose.text = 'Unspecified' 69 | childTruncated = ET.SubElement(childObject, 'truncated') 70 | childTruncated.text = '0' 71 | childDifficult = ET.SubElement(childObject, 'difficult') 72 | childDifficult.text = '0' 73 | childBndBox = ET.SubElement(childObject, 'bndbox') 74 | childXmin = ET.SubElement(childBndBox, 'xmin') 75 | childXmin.text = str(x) 76 | childYmin = ET.SubElement(childBndBox, 'ymin') 77 | childYmin.text = str(y) 78 | childXmax = ET.SubElement(childBndBox, 'xmax') 79 | childXmax.text = str(xmax) 80 | childYmax = ET.SubElement(childBndBox, 'ymax') 81 | childYmax.text = str(ymax) 82 | return prettify(top) 83 | 84 | def mainDataset(dataset,output, weights, fichClass,confidence): 85 | f = open(fichClass) 86 | LABELS = f.read().strip().split("\n") 87 | LABELS = [label.split(',')[0] for label in LABELS] 88 | f.close() 89 | 90 | weighted_bifpn = False 91 | keras.backend.tensorflow_backend.set_session(get_session()) 92 | model_path = weights 93 | model = kerasfcos.models.load_model(model_path, backbone_name='resnet50') 94 | model = kerasfcos.models.convert_model(model) 95 | 96 | 97 | imagePaths = list(os.scandir(dataset)) 98 | # loop over the input image paths 99 | for (i, image_path) in enumerate(imagePaths): 100 | image = read_image_bgr(dataset+'/'+imagePath.name) 101 | image = preprocess_image(image) 102 | image, scale = resize_image(image) 103 | boxes, scores, labels = model.predict_on_batch(np.expand_dims(image, axis=0)) 104 | boxes /= scale 105 | h, w, d = image.shape 106 | boxes1 = [] 107 | for (box, score, label) in zip(boxes[0], scores[0], labels[0]): 108 | if score < confidence: 109 | continue 110 | boxes1.append(([label,box],score)) 111 | ext = os.path.splitext(imagePath) 112 | file = open(ext[0] + ".xml", "w") 113 | file.write(generateXML(ext[0], imagePath.name, hI, wI, d, boxes1)) 114 | file.close() 115 | -------------------------------------------------------------------------------- /TestTimeAugmentation/predict_batch_FSAF.py: -------------------------------------------------------------------------------- 1 | import keras 2 | import FSAF.models 3 | from FSAF.utils.image import read_image_bgr, preprocess_image, resize_image 4 | from FSAF.utils.visualization import draw_box, draw_caption 5 | from FSAF.utils.colors import label_color 6 | import xml.etree.ElementTree as ET 7 | from xml.dom import minidom 8 | from imutils import paths 9 | 10 | from FSAF.models.resnet import resnet_fsaf 11 | from FSAF.models.retinanet import fsaf_bbox 12 | 13 | # import miscellaneous modules 14 | import cv2 15 | import os 16 | import os.path as osp 17 | import numpy as np 18 | import time 19 | import glob 20 | 21 | # set tf backend to allow memory to grow, instead of claiming everything 22 | import tensorflow as tf 23 | 24 | 25 | def get_session(): 26 | config = tf.ConfigProto() 27 | config.gpu_options.allow_growth = True 28 | return tf.Session(config=config) 29 | 30 | #confidence=0.5 31 | 32 | def prettify(elem): 33 | """Return a pretty-printed XML string for the Element. 34 | """ 35 | rough_string = ET.tostring(elem, 'utf-8') 36 | reparsed = minidom.parseString(rough_string) 37 | return reparsed.toprettyxml(indent=" ") 38 | 39 | 40 | def generateXML(filename,outputPath,w,h,d,boxes,classes): 41 | top = ET.Element('annotation') 42 | childFolder = ET.SubElement(top, 'folder') 43 | childFolder.text = 'images' 44 | childFilename = ET.SubElement(top, 'filename') 45 | childFilename.text = filename[0:filename.rfind(".")] 46 | childPath = ET.SubElement(top, 'path') 47 | childPath.text = outputPath + "/" + filename 48 | childSource = ET.SubElement(top, 'source') 49 | childDatabase = ET.SubElement(childSource, 'database') 50 | childDatabase.text = 'Unknown' 51 | childSize = ET.SubElement(top, 'size') 52 | childWidth = ET.SubElement(childSize, 'width') 53 | childWidth.text = str(w) 54 | childHeight = ET.SubElement(childSize, 'height') 55 | childHeight.text = str(h) 56 | childDepth = ET.SubElement(childSize, 'depth') 57 | childDepth.text = str(d) 58 | childSegmented = ET.SubElement(top, 'segmented') 59 | childSegmented.text = str(0) 60 | for (box,score) in boxes: 61 | category = classes[box[0]] 62 | box = box[1].astype("int") 63 | (x,y,xmax,ymax) = box 64 | childObject = ET.SubElement(top, 'object') 65 | childName = ET.SubElement(childObject, 'name') 66 | childName.text = category 67 | childScore = ET.SubElement(childObject, 'confidence') 68 | childScore.text = str(score) 69 | childPose = ET.SubElement(childObject, 'pose') 70 | childPose.text = 'Unspecified' 71 | childTruncated = ET.SubElement(childObject, 'truncated') 72 | childTruncated.text = '0' 73 | childDifficult = ET.SubElement(childObject, 'difficult') 74 | childDifficult.text = '0' 75 | childBndBox = ET.SubElement(childObject, 'bndbox') 76 | childXmin = ET.SubElement(childBndBox, 'xmin') 77 | childXmin.text = str(x) 78 | childYmin = ET.SubElement(childBndBox, 'ymin') 79 | childYmin.text = str(y) 80 | childXmax = ET.SubElement(childBndBox, 'xmax') 81 | childXmax.text = str(xmax) 82 | childYmax = ET.SubElement(childBndBox, 'ymax') 83 | childYmax.text = str(ymax) 84 | return prettify(top) 85 | 86 | def mainDataset(dataset,output,weights,fichClass,confidence,backbone='resnet50'): 87 | f = open(fichClass) 88 | LABELS = f.read().strip().split("\n") 89 | LABELS = [label.split(',')[0] for label in LABELS] 90 | f.close() 91 | 92 | fsaf = resnet_fsaf(num_classes=len(LABELS), backbone=backbone) 93 | model = fsaf_bbox(fsaf) 94 | model.load_weights(weights, by_name=True) 95 | 96 | 97 | imagePaths = list(os.scandir(dataset)) 98 | # loop over the input image paths 99 | for (i, image_path) in enumerate(imagePaths): 100 | image = read_image_bgr(dataset+'/'+imagePath.name) 101 | image = preprocess_image(image) 102 | image, scale = resize_image(image) 103 | boxes, scores, labels = model.predict_on_batch(np.expand_dims(image, axis=0)) 104 | boxes /= scale 105 | h, w, d = image.shape 106 | boxes1 = [] 107 | for (box, score, label) in zip(boxes[0], scores[0], labels[0]): 108 | if score < confidence: 109 | continue 110 | boxes1.append(([label,box],score)) 111 | ext = os.path.splitext(imagePath) 112 | file = open(ext[0] + ".xml", "w") 113 | file.write(generateXML(ext[0], imagePath.name, hI, wI, d, boxes1)) 114 | file.close() 115 | -------------------------------------------------------------------------------- /TestTimeAugmentation/techniques.py: -------------------------------------------------------------------------------- 1 | from clodsa.techniques.techniqueFactory import createTechnique 2 | 3 | techniques={ 4 | "avgBlur": (createTechnique("average_blurring", {"kernel" : 5}), createTechnique("none", {})), 5 | "bilaBlur": (createTechnique("average_blurring", {"diameter" : 11, "sigmaColor": 21, "sigmaSpace":7}), createTechnique("none", {})), 6 | "blur": (createTechnique("blurring", {"ksize" : 5}), createTechnique("none", {})), 7 | "chanHsv": (createTechnique("change_to_hsv",{}), createTechnique("none", {})), 8 | "chanLab": (createTechnique("blurring", {"ksize" : 5}), createTechnique("none", {})), 9 | "crop": (createTechnique("crop",{"percentage":0.8,"startFrom": "TOPLEFT"}), createTechnique("none", {})), 10 | "dropOut": (createTechnique("dropout",{"percentage":0.05}), createTechnique("none", {})), 11 | "elastic": (createTechnique("elastic",{"alpha":5,"sigma":0.05}), createTechnique("none", {})), 12 | "histo": (createTechnique("equalize_histogram",{}), createTechnique("none", {})), 13 | "vflip": (createTechnique("flip", {"flip": 0}), createTechnique("flip", {"flip": 0})), 14 | "hflip": (createTechnique("flip", {"flip": 1}), createTechnique("flip", {"flip": 1})), 15 | "hvflip": (createTechnique("flip", {"flip": -1}), createTechnique("flip", {"flip": -1})), 16 | "gamma": (createTechnique("gamma",{"gamma":1.5}), createTechnique("none", {})), 17 | "blurGau": (createTechnique("gaussian_blur", {"kernel" : 5}), createTechnique("none", {})), 18 | "avgNoise": (createTechnique("gaussian_noise", {"mean":0, "sigma":10}), createTechnique("none", {})), 19 | "invert": (createTechnique("invert",{}), createTechnique("none", {})), 20 | "medianblur": (createTechnique("median_blur", {"kernel" : 5}), createTechnique("none", {})), 21 | "none": (createTechnique("none", {}), createTechnique("none", {})), 22 | "raiseBlue": (createTechnique("raise_blue", {"power" : 0.9}), createTechnique("none", {})), 23 | "raiseGreen": (createTechnique("raise_green", {"power" : 0.9}), createTechnique("none", {})), 24 | "raiseHue": (createTechnique("raise_hue", {"power" : 0.9}), createTechnique("none", {})), 25 | "raiseRed": (createTechnique("raise_red", {"power" : 0.9}), createTechnique("none", {})), 26 | "raiseSatu": (createTechnique("raise_saturation", {"power" : 0.9}), createTechnique("none", {})), 27 | "raiseValue": (createTechnique("raise_value", {"power" : 0.9}), createTechnique("none", {})), 28 | "resize": (createTechnique("resize", {"percentage" : 0.9,"method":"INTER_NEAREST"}), createTechnique("none", {})), 29 | "rotation10": (createTechnique("rotate", {"angle": 10}), createTechnique("rotate", {"angle": -10})), 30 | "rotation90": (createTechnique("rotate", {"angle": 90}), createTechnique("rotate", {"angle": -90})), 31 | "rotation180": (createTechnique("rotate", {"angle": 180}), createTechnique("rotate", {"angle": -180})), 32 | "rotation270": (createTechnique("rotate", {"angle": 270}), createTechnique("rotate", {"angle": -270})), 33 | "saltPeper": (createTechnique("salt_and_pepper", {"low" : 0,"up":25}), createTechnique("none", {})), 34 | "sharpen": (createTechnique("sharpen", {}), createTechnique("none", {})), 35 | "shiftChannel": (createTechnique("shift_channel", {"shift":0.2}), createTechnique("none", {})), 36 | "shearing": (createTechnique("shearing", {"a":0.5}), createTechnique("none", {})), 37 | "translation": (createTechnique("translation", {"x":10,"y":10}), createTechnique("none", {})) 38 | } -------------------------------------------------------------------------------- /TestTimeAugmentation/testTimeAugmentation.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from abc import ABC 3 | 4 | #abstract class 5 | class IPredictor(ABC): 6 | #constructor 7 | def __init__(self, weightPath): 8 | self.pathPesos = weightPath 9 | 10 | @abc.abstractmethod 11 | def predict(self,imgPath): 12 | pass 13 | 14 | #heritage 15 | class DarknetYoloPred(IPredictor): 16 | 17 | def __init__(self,weightPath,fichNames, fichCfg, conf): 18 | IPredictor.__init__(self, weightPath) 19 | self.fichNames = fichNames 20 | self.fichCfg = fichCfg 21 | self.conf = conf 22 | 23 | def predict(self, imgPath, output): 24 | import detect 25 | detect.mainDataset(imgPath, output, self.pathPesos, self.fichNames, self.fichCfg, self.conf) 26 | 27 | 28 | class MXnetYoloPred(IPredictor): 29 | 30 | def __init__(self,weightPath,classes, conf): 31 | IPredictor.__init__(self, weightPath) 32 | self.classes=classes 33 | self.conf = conf 34 | 35 | def predict(self, imgPath, output): 36 | import predict_batch 37 | predict_batch.mainDataset(imgPath, output,'yolo3_darknet53_custom', self.pathPesos, self.classes, self.conf) 38 | 39 | class MXnetSSD512Pred(IPredictor): 40 | 41 | def __init__(self,weightPath,classes, conf): 42 | IPredictor.__init__(self, weightPath) 43 | self.classes=classes 44 | self.conf = conf 45 | 46 | def predict(self, imgPath, output): 47 | import predict_batch 48 | predict_batch.mainDataset(imgPath, output,'ssd_512_resnet50_v1_custom',self.pathPesos, self.classes, self.conf) 49 | 50 | class MXnetFasterRCNNPred(IPredictor): 51 | 52 | def __init__(self,weightPath,classes, conf): 53 | IPredictor.__init__(self, weightPath) 54 | self.classes=classes 55 | self.conf = conf 56 | 57 | def predict(self, imgPath, output): 58 | import predict_batch 59 | predict_batch.mainDataset(imgPath, output,'faster_rcnn_resnet50_v1b_custom', self.pathPesos, self.classes, self.conf) 60 | 61 | class RetinaNetResnet50Pred(IPredictor): 62 | 63 | def __init__(self,weightPath,classes, conf): 64 | IPredictor.__init__(self, weightPath) 65 | self.classes=classes 66 | self.conf = conf 67 | 68 | def predict(self, imgPath, output): 69 | import predict_batch_retinanet 70 | predict_batch_retinanet.mainDataset(imgPath, output,'resnet50_v1', self.pathPesos, self.classes, self.conf) 71 | 72 | class MaskRCNNPred(IPredictor): 73 | 74 | def __init__(self,weightPath,classes, conf): 75 | IPredictor.__init__(self, weightPath) 76 | self.classes=classes 77 | self.conf = conf 78 | 79 | def predict(self, imgPath, output): 80 | import predict_batch_rcnn 81 | predict_batch_rcnn.mainDataset(imgPath, output, self.pathPesos, self.classes, self.conf) 82 | 83 | 84 | class Efficient(IPredictor): 85 | 86 | def __init__(self, weightPath, classes, conf): 87 | IPredictor.__init__(self, weightPath) 88 | self.classes = classes 89 | self.conf = conf 90 | 91 | def predict(self, imgPath, output): 92 | import predict_batch_efficient 93 | predict_batch_efficient.mainDataset(imgPath, output, self.pathPesos, self.classes, self.conf) 94 | 95 | class FSAF(IPredictor): 96 | 97 | def __init__(self, weightPath, classes, conf): 98 | IPredictor.__init__(self, weightPath) 99 | self.classes = classes 100 | self.conf = conf 101 | 102 | def predict(self, imgPath, output): 103 | import predict_batch_FSAF 104 | predict_batch_FSAF.mainDataset(imgPath, output, self.pathPesos, self.classes, self.conf) 105 | 106 | class FCOS(IPredictor): 107 | 108 | def __init__(self, weightPath, classes, conf): 109 | IPredictor.__init__(self, weightPath) 110 | self.classes = classes 111 | self.conf = conf 112 | 113 | def predict(self, imgPath, output): 114 | import predict_batch_FCOS 115 | predict_batch_FCOS.mainDataset(imgPath, output, self.pathPesos, self.classes, self.conf) 116 | -------------------------------------------------------------------------------- /images/diagramaClases.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/images/diagramaClases.jpg -------------------------------------------------------------------------------- /images/ensemble.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/images/ensemble.jpg -------------------------------------------------------------------------------- /images/images.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /images/testTimeAugm.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ancasag/ensembleObjectDetection/2c3be846caf31eafab8b5660a3f62a6d88578c03/images/testTimeAugm.jpg -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | clodsa==1.2.35 2 | future==0.17.1 3 | gluoncv==0.5.0 4 | imutils==0.5.3 5 | keras==2.3.1 6 | keras_retinanet==0.5.1 7 | lxml==4.4.2 8 | mrcnn==0.2 9 | mxnet==1.5.1.post0 10 | numpy==1.17.4 11 | opencv-python==3.4.2.16 12 | pandas==0.24.1 13 | scikit-image==0.16.2 14 | tensorflow==1.15.2 15 | tqdm==4.40.2 16 | --------------------------------------------------------------------------------