├── LICENSE ├── README.md ├── angle_nms ├── angle_soft_nms.py ├── librbox.cpp ├── librbox.so └── make_angle_nms.sh ├── cocoapi_ro ├── LuaAPI │ ├── CocoApi.lua │ ├── MaskApi.lua │ ├── cocoDemo.lua │ ├── env.lua │ ├── init.lua │ └── rocks │ │ └── coco-scm-1.rockspec ├── MatlabAPI │ ├── CocoApi.m │ ├── CocoEval.m │ ├── CocoUtils.m │ ├── MaskApi.m │ ├── cocoDemo.m │ ├── evalDemo.m │ ├── gason.m │ └── private │ │ ├── gasonMex.cpp │ │ ├── gasonMex.mexa64 │ │ ├── gasonMex.mexmaci64 │ │ └── getPrmDflt.m ├── PythonAPI │ ├── Makefile │ ├── pycocoDemo.ipynb │ ├── pycocoEvalDemo.ipynb │ ├── pycocotools_ro │ │ ├── __init__.py │ │ ├── _mask.c │ │ ├── _mask.pyx │ │ ├── coco.py │ │ ├── cocoeval.py │ │ └── mask.py │ └── setup.py ├── README.txt ├── common │ ├── gason.cpp │ ├── gason.h │ ├── maskApi.c │ └── maskApi.h └── license.txt ├── images ├── drn.png └── sku110k_r.png ├── rotate_augment.py └── rotation conv layer ├── rotation_conv_utils.py └── test_rcl.py /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # DRN and SKU110K-R 3 | #### Xingjia Pan, Yuqiang Ren, Kekai Sheng, Weiming Dong, Haolei Yuan, Xiaowei Guo, Chongyang Ma, Changsheng Xu 4 | 5 | ### Work in process. 6 | 7 | Dynamic Refinement Network for Oriented and Densely Packed Object Detection[[Paper Link]](https://arxiv.org/abs/2005.09973) 8 | 9 | 10 | 11 | Figure 1. Overall framework of our Dynamic Refinement Network. The backbone network is followed by two modules, i.e., feature selection module (FSM) and dynamic refinement heads (DRHs). FSM selects the most suitable features by adaptively adjusting receptive fields. The DRHs dynamically refine the predictions in an object-aware manner. 12 | 13 | ## SKU110K-R 14 | 15 | 16 | Figure 2. Some sample images from SKU110K. The images in top row are annotated with horizontal bounding boxes while the images in bottom row are with oriented bounding boxes. 17 | 18 | To use SKU110K-R, 19 | 20 | 0. Download the original SKU110K data set from [website](https://github.com/eg4000/SKU110K_CVPR19) and extract images 21 | 22 | 1. Generate SKU110K-R using our rotate augment script 23 | 24 | ``` 25 | python rotate_augment.py path/to/images 26 | ``` 27 | 28 | 2. Download the annotations for SKU110K-R from [website](https://drive.google.com/file/d/1_5JsVc_A5vWm-d-JXMJdX0Lx5FIlgAXJ/view?usp=sharing) 29 | 30 | The annotation is in coco format. 31 | 32 | ## Evaluation tools 33 | ### cocoapi_ro 34 | we provide a variant of cocoapi for evaluation of rotated bounding boxes. 35 | 36 | 0. Install cocoapi_ro(similar with cocoapi) 37 | 38 | ``` 39 | cd PythonAPi 40 | make 41 | ``` 42 | 43 | 1. Replace pycocotools with pycocotools_ro 44 | 45 | **FROM** 46 | ``` 47 | import pycocotools.coco as coco 48 | from pycocotools.cocoeval import COCOeval 49 | ``` 50 | **TO** 51 | ``` 52 | import pycocotools_ro.coco as coco 53 | from pycocotools_ro.cocoeval import COCOeval 54 | ``` 55 | 56 | 2. Update the evaluation code. 57 | 58 | **FROM** 59 | ``` 60 | coco_eval = COCOeval(self.coco, coco_dets, "bbox") 61 | ``` 62 | **TO** 63 | ``` 64 | coco_eval = COCOeval(self.coco, coco_dets, "rbbox") 65 | coco_eval.params.maxDets = [1, 10, 300] 66 | ``` 67 | ### angle_nms 68 | we provide **angle_nms** for nms of rotated bounding box in post process. 69 | 70 | ``` 71 | from angle_nms.angle_soft_nms import angle_soft_nms 72 | # Example 73 | result_after_nms = angle_soft_nms(all_dets, Nt=0.5, method=1,threshold=0.05) 74 | # all_dets: detection results 75 | # Nt: iou threshold 76 | # method: 1, linear soft nms; 2, gaussian soft nms; other, nms 77 | # threshold: the minimum confidence valu to retain the detection bbox 78 | ``` 79 | ## Rotation Conv Layer 80 | 81 | 1. To use the rotation conv layer, you need to install dcn_v2 first, 82 | 83 | ``` 84 | # git clone -b pytorch_1.0.0 https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch.git 85 | # mv Deformable-Convolution-V2-PyTorch DCNv2 86 | cd DCNv2 87 | ./make.sh 88 | ``` 89 | 2. Then you need to modify the `import path` of dcnv2 in **rotation_conv_utils.py**. 90 | 91 | ``` 92 | from path\to\DCNv1.modules.modulated_deform_conv import ModulatedDeformConv 93 | from path\to\DCNv2.functions.modulated_deform_conv_func import ModulatedDeformConvFunction 94 | 95 | ``` 96 | 97 | 3. We provide a simple example to use the rotation conv layer in **test_rcl.py**. 98 | 99 | 100 | ## Citation 101 | 102 | If you find this project useful for your research, please use the following BibTeX entry. 103 | ``` 104 | @article{pan2020dynamic, 105 | title={Dynamic Refinement Network for Oriented and Densely Packed Object Detection}, 106 | author={Xingjia Pan and Yuqiang Ren and Kekai Sheng and Weiming Dong and Haolei Yuan and Xiaowei Guo and Chongyang Ma and Changsheng Xu}, 107 | booktitle={CVPR}, 108 | pages={1--8}, 109 | year={2020} 110 | } 111 | ``` 112 | ## Contacts 113 | If you have any questions about our work, please do not hesitate to contact us by emails. 114 | Xingjia Pan: xingjia.pan@nlpr.ia.ac.cn 115 | Yuqiang Ren: condiren@tencent.com 116 | -------------------------------------------------------------------------------- /angle_nms/angle_soft_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | # written by allenjbqin 9 | # 2019.05.15 10 | import os 11 | import numpy as np 12 | import ctypes 13 | from ctypes import * 14 | 15 | so_file_path = os.path.join( os.path.abspath(os.path.dirname(__file__)), 'librbox.so') 16 | so = ctypes.cdll.LoadLibrary 17 | librbox = so(so_file_path) 18 | 19 | overlap = librbox.Overlap 20 | overlap.argtypes = (POINTER(c_double), POINTER(c_double)) 21 | overlap.restype = c_double 22 | 23 | 24 | def py_cpu_nms(dets, thresh): 25 | """Pure Python NMS baseline.""" 26 | x1 = dets[:, 0] 27 | y1 = dets[:, 1] 28 | x2 = dets[:, 2] 29 | y2 = dets[:, 3] 30 | scores = dets[:, 4] 31 | 32 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 33 | order = scores.argsort()[::-1] 34 | 35 | keep = [] 36 | while order.size > 0: 37 | i = order[0] 38 | keep.append(i) 39 | xx1 = np.maximum(x1[i], x1[order[1:]]) 40 | yy1 = np.maximum(y1[i], y1[order[1:]]) 41 | xx2 = np.minimum(x2[i], x2[order[1:]]) 42 | yy2 = np.minimum(y2[i], y2[order[1:]]) 43 | 44 | w = np.maximum(0.0, xx2 - xx1 + 1) 45 | h = np.maximum(0.0, yy2 - yy1 + 1) 46 | inter = w * h 47 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 48 | 49 | inds = np.where(ovr <= thresh)[0] 50 | order = order[inds + 1] 51 | 52 | return keep 53 | 54 | def angle_soft_nms(all_dets, sigma=0.5, Nt=0.1, threshold=0.001, method=0): 55 | """Pure Python NMS baseline.""" 56 | # dets = np.concatenate((all_dets[:, 0:4], all_dets[:, -1:]), axis=1) 57 | # scores = all_dets[:, 4] 58 | # cx,cy,w,h,angle,score 59 | # all_dets[:,4] = (all_dets[:,4]-0.5)*180.0 60 | all_dets[:,4] = all_dets[:,4] / np.pi *180.0 61 | boxes = all_dets 62 | # scores = all_dets[:, -1] 63 | N = all_dets.shape[0] 64 | if N >0 and len(boxes[0] == 7): 65 | is_scale = True 66 | else: 67 | is_scale = False 68 | 69 | # # ## a simple example 70 | # a = np.array([10,10,10,20,0]) 71 | # b = np.array([10,10,10,20,0.75]) 72 | # cd_a = (c_double * 5)() 73 | # cd_b = (c_double * 5)() 74 | # cd_a[0] = c_double(a[0]) 75 | # cd_a[1] = c_double(a[1]) 76 | # cd_a[2] = c_double(a[2]) 77 | # cd_a[3] = c_double(a[3]) 78 | # cd_a[4] = c_double(a[4]) 79 | # cd_b[0] = c_double(b[0]) 80 | # cd_b[1] = c_double(b[1]) 81 | # cd_b[2] = c_double(b[2]) 82 | # cd_b[3] = c_double(b[3]) 83 | # cd_b[4] = c_double(b[4]) 84 | # ov = overlap(cd_a, cd_b) 85 | for i in range(N): 86 | maxscore = boxes[i, 5] 87 | maxpos = i 88 | # 将第i个bbox存在temp 89 | tcx = boxes[i, 0] 90 | tcy = boxes[i, 1] 91 | tw = boxes[i, 2] 92 | th = boxes[i, 3] 93 | tangle = boxes[i, 4] 94 | ts= boxes[i, 5] 95 | if is_scale: 96 | scale= boxes[i, 6] 97 | 98 | pos = i + 1 99 | # get max box 100 | while pos < N: 101 | if maxscore < boxes[pos, 5]: 102 | maxscore = boxes[pos, 5] 103 | maxpos = pos 104 | pos = pos + 1 105 | 106 | # add max box as a detection 107 | boxes[i, 0] = boxes[maxpos, 0] 108 | boxes[i, 1] = boxes[maxpos, 1] 109 | boxes[i, 2] = boxes[maxpos, 2] 110 | boxes[i, 3] = boxes[maxpos, 3] 111 | boxes[i, 4] = boxes[maxpos, 4] 112 | boxes[i, 5] = boxes[maxpos, 5] 113 | if is_scale: 114 | boxes[i, 6] = boxes[maxpos, 6] 115 | 116 | # swap ith box with position of max box 117 | boxes[maxpos, 0] = tcx 118 | boxes[maxpos, 1] = tcy 119 | boxes[maxpos, 2] = tw 120 | boxes[maxpos, 3] = th 121 | boxes[maxpos, 4] = tangle 122 | boxes[maxpos, 5] = ts 123 | if is_scale: 124 | boxes[maxpos, 6] = scale 125 | 126 | # 此时第i个位最大score的,重新将第i个bbox存在temp 127 | # tcx = boxes[i, 0] 128 | # tcy = boxes[i, 1] 129 | # tw = boxes[i, 2] 130 | # th = boxes[i, 3] 131 | # tangle = boxes[i, 4] 132 | # ts= boxes[i, 5] 133 | 134 | box1 = (c_double * 5)() 135 | box2 = (c_double * 5)() 136 | box1[0] = c_double(boxes[i, 0]) 137 | box1[1] = c_double(boxes[i, 1]) 138 | box1[2] = c_double(boxes[i, 2]) 139 | box1[3] = c_double(boxes[i, 3]) 140 | box1[4] = c_double(boxes[i, 4]) 141 | 142 | pos = i + 1 143 | # NMS iterations, note that N changes if detection boxes fall below threshold 144 | while pos < N: 145 | box2[0] = c_double(boxes[pos, 0]) 146 | box2[1] = c_double(boxes[pos, 1]) 147 | box2[2] = c_double(boxes[pos, 2]) 148 | box2[3] = c_double(boxes[pos, 3]) 149 | box2[4] = c_double(boxes[pos, 4]) 150 | 151 | ov = overlap(box1, box2) 152 | if ov > 0: 153 | if method == 1: # linear 154 | if ov > Nt: 155 | weight = 1 - ov 156 | else: 157 | weight = 1 158 | elif method == 2: # gaussian 159 | weight = np.exp(-(ov * ov) / sigma) 160 | else: # original NMS 161 | if ov > Nt: 162 | weight = 0 163 | else: 164 | weight = 1 165 | 166 | boxes[pos, 5] = weight * boxes[pos, 5] 167 | 168 | # if box score falls below threshold, discard the box by swapping with last box 169 | # update N 170 | if boxes[pos, 5] < threshold: 171 | boxes[pos, 0] = boxes[N - 1, 0] 172 | boxes[pos, 1] = boxes[N - 1, 1] 173 | boxes[pos, 2] = boxes[N - 1, 2] 174 | boxes[pos, 3] = boxes[N - 1, 3] 175 | boxes[pos, 4] = boxes[N - 1, 4] 176 | boxes[pos, 5] = boxes[N - 1, 5] 177 | if is_scale: 178 | boxes[pos, 6] = boxes[N - 1, 6] 179 | N = N - 1 180 | pos = pos - 1 181 | pos = pos + 1 182 | keep = [i for i in range(N)] 183 | # boxes[:, 4] = (boxes[:, 4] / 180.0) + 0.5 184 | boxes[:, 4] = boxes[:, 4] / 180.0 * np.pi 185 | return boxes 186 | 187 | def angle_soft_nms_new(all_dets, sigma=0.5, Nt=0.5, threshold=0.03, method=0, all_cls=False, cls_decay=1.5): 188 | """Pure Python Soft-NMS baseline. 189 | author: Xingjia Pan 190 | date: 2019/11/4 191 | all_dets: cx,cy,w,h,angle,score for one row 192 | """ 193 | all_dets[:,4] = all_dets[:,4] / np.pi *180.0 194 | N = all_dets.shape[0] 195 | for i in range(N): 196 | order = np.argsort(-all_dets[:, 5]) 197 | all_dets = all_dets[order, :] 198 | ## calc distance of center point 199 | if i == N-1: 200 | continue 201 | dist_score = np.linalg.norm(all_dets[i,:2]-all_dets[i+1:, :2],axis=1) 202 | min_side = np.min(all_dets[i,2:4])+1e-8 203 | div_factor = 1./10 if min_side>96 else 1./7 204 | dist_score = dist_score/(div_factor * min_side) 205 | dist_score = np.clip(dist_score, 0.0, 1.0) 206 | dist_score = dist_score**2 207 | box1 = (c_double * 5)() 208 | box1[0] = c_double(all_dets[i, 0]) 209 | box1[1] = c_double(all_dets[i, 1]) 210 | box1[2] = c_double(all_dets[i, 2]) 211 | box1[3] = c_double(all_dets[i, 3]) 212 | box1[4] = c_double(all_dets[i, 4]) 213 | j = i + 1 214 | # NMS iterations, note that N changes if detection boxes fall below threshold 215 | while j < N: 216 | box2 = (c_double * 5)() 217 | box2[0] = c_double(all_dets[j, 0]) 218 | box2[1] = c_double(all_dets[j, 1]) 219 | box2[2] = c_double(all_dets[j, 2]) 220 | box2[3] = c_double(all_dets[j, 3]) 221 | box2[4] = c_double(all_dets[j, 4]) 222 | ov = overlap(box1, box2) 223 | weight = 1.0 224 | if ov > 0: 225 | if method == 1: # linear 226 | if ov > Nt: 227 | weight = 1 - ov 228 | else: 229 | weight = 1 230 | elif method == 2: # gaussian 231 | weight = np.exp(-(ov * ov) / sigma) 232 | else: # original NMS 233 | if ov > Nt: 234 | weight = 0 235 | else: 236 | weight = 1 237 | if all_cls: 238 | if all_dets[i,6] != all_dets[j, 6]: 239 | dist_score[j-i-1] *= cls_decay 240 | dist_score[j-i-1] = np.minimum(dist_score[j-i-1], 1.0) 241 | weight *= dist_score[j-i-1] 242 | all_dets[j, 5] = weight * all_dets[j, 5] 243 | j = j + 1 244 | keep = all_dets[:,5] > threshold 245 | all_dets[:, 4] = all_dets[:, 4] / 180.0 * np.pi 246 | return all_dets[keep,:] 247 | 248 | 249 | def py_yt_nms(dets, thresh): 250 | x1 = dets[:, 0] 251 | y1 = dets[:, 1] 252 | 253 | x2 = dets[:, 2] 254 | y2 = dets[:, 3] 255 | scores = dets[:, 4] 256 | angle = 180.0 * (dets[:, 5] - 0.5) 257 | 258 | # sort by confidence 259 | order = scores.argsort()[::-1] 260 | 261 | # list of keep box 262 | keep = [] 263 | while order.size > 0: 264 | i = order[0] 265 | keep.append(i) 266 | 267 | cur_box = [x1[i], y1[i], x2[i], y2[i], angle[i]] 268 | other_boxes = [] 269 | for each_box in range(1, len(order)): 270 | each_other_box = [x1[order[each_box]], y1[order[each_box]], x2[order[each_box]], y2[order[each_box]],angle[order[each_box]]] 271 | other_boxes.append(each_other_box) 272 | iou_result_list = [] 273 | box1 = (c_double * 5)() 274 | box2 = (c_double * 5)() 275 | box1[0] = c_double(cur_box[0]) 276 | box1[1] = c_double(cur_box[1]) 277 | box1[2] = c_double(cur_box[2]) 278 | box1[3] = c_double(cur_box[3]) 279 | box1[4] = c_double(cur_box[4]) 280 | # call for cpp nms function 281 | for each_gt_box in other_boxes: 282 | box2[0] = c_double(each_gt_box[0]) 283 | box2[1] = c_double(each_gt_box[1]) 284 | box2[2] = c_double(each_gt_box[2]) 285 | box2[3] = c_double(each_gt_box[3]) 286 | box2[4] = c_double(each_gt_box[4]) 287 | 288 | # get return iou result 289 | each_iou = overlap(box1, box2) 290 | iou_result_list.append(each_iou) 291 | 292 | ovr = iou_result_list 293 | ovr = np.array(ovr) 294 | 295 | inds = np.where(ovr <= thresh)[0] 296 | order = order[inds + 1] 297 | 298 | return keep 299 | 300 | 301 | if __name__ == '__main__': 302 | a = np.array([(0, 0, 30 / 1000, 10 / 1000, .9, 0), 303 | (0, 0, 30 / 1000, 10 / 1000, .98, 0.25)]) # , (-5, -5, 5, 5, .98, 45), (-5, -5, 6, 6, .99, 30)]) 304 | # print(py_cpu_nms(a, 0.45)) 305 | # print(py_poly_nms(a, 0.45)) 306 | # print(Polygon(a).area) 307 | -------------------------------------------------------------------------------- /angle_nms/librbox.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | using namespace std; 5 | 6 | struct Line 7 | { 8 | int crossnum;//0:ignore; -1:all inner point; 2:two crossing point; 1:one crossing point 9 | int p1;//index of the start point 10 | int p2;//index of the end point 11 | int d[2][2];//the index of the start point after division 12 | double length;//the length after division 13 | }; 14 | 15 | void OverlapSub (double *rbox1, double *rbox2, double *area) 16 | { 17 | double xcenter1 = rbox1[0]; 18 | double ycenter1 = rbox1[1]; 19 | double width1 = rbox1[2]; 20 | double height1 = rbox1[3]; 21 | double angle1 = rbox1[4]; 22 | double xcenter2 = rbox2[0]; 23 | double ycenter2 = rbox2[1]; 24 | double width2 = rbox2[2]; 25 | double height2 = rbox2[3]; 26 | double angle2 = rbox2[4]; 27 | //for(int i=0;i<5;i++) cout< height1? width1 : height1; 48 | double max_width_height2 = width2 > height2? width2 : height2; 49 | if (sqrt(xcenterd * xcenterd + ycenterd * ycenterd) > 50 | (max_width_height1 + max_width_height2) * 1.414214/2) 51 | { 52 | area[0] = 0; 53 | //fout< (hw1 + hw2) || fabs(ycenterd) > (hh1 + hh2)) 60 | { 61 | area[0] = 0; 62 | //fout< (xcenterd - hw2)? -hw1 : (xcenterd - hw2); 69 | double x_max_inter = hw1 < (xcenterd + hw2)? hw1 : (xcenterd + hw2); 70 | double y_min_inter = -hh1 > (ycenterd - hh2)? -hh1 : (ycenterd - hh2); 71 | double y_max_inter = hh1 < (ycenterd + hh2)? hh1 : (ycenterd + hh2); 72 | const double inter_width = x_max_inter - x_min_inter; 73 | const double inter_height = y_max_inter - y_min_inter; 74 | const double inter_size = inter_width * inter_height; 75 | area[0] = inter_size; 76 | area[0] = area[0] / (width1 * height1 + width2 * height2 - area[0]); 77 | //LOG(INFO)<<"AREA = "< (xcenterd - hh2)? -hw1 : (xcenterd - hh2); 85 | double x_max_inter = hw1 < (xcenterd + hh2)? hw1 : (xcenterd + hh2); 86 | double y_min_inter = -hh1 > (ycenterd - hw2)? -hh1 : (ycenterd - hw2); 87 | double y_max_inter = hh1 < (ycenterd + hw2)? hh1 : (ycenterd + hw2); 88 | const double inter_width = x_max_inter - x_min_inter; 89 | const double inter_height = y_max_inter - y_min_inter; 90 | const double inter_size = inter_width * inter_height; 91 | area[0] = inter_size; 92 | area[0] = area[0] / (width1 * height1 + width2 * height2 - area[0]); 93 | //fout< -hw1; 132 | inner_side2[i][2] = point2y[i] > -hh1; 133 | inner_side2[i][3] = point2x[i] < hw1; 134 | inner2[i] = inner_side2[i][0] & inner_side2[i][1] & inner_side2[i][2] & inner_side2[i][3]; 135 | if (inner2[i]) { pcenter_x += point2x[i]; pcenter_y += point2y[i]; count++;} 136 | } 137 | 138 | //similar operating for rbox1: angled -> -angled, xcenterd -> -xcenterd, ycenterd -> -ycenterd 139 | // point10: (w/2, h/2) 140 | double xcenterd_hat = - xcenterd * cos_angled - ycenterd * sin_angled; 141 | double ycenterd_hat = xcenterd * sin_angled - ycenterd * cos_angled; 142 | double point1x[4], point1y[4]; 143 | 144 | point1x[0] = xcenterd_hat + cos_angled_hw1 + sin_angled_hh1; 145 | point1y[0] = ycenterd_hat - sin_angled_hw1 + cos_angled_hh1; 146 | // point21: (-w/2, h/2) 147 | point1x[1] = xcenterd_hat - cos_angled_hw1 + sin_angled_hh1; 148 | point1y[1] = ycenterd_hat + sin_angled_hw1 + cos_angled_hh1; 149 | // point22: (-w/2, -h/2) 150 | point1x[2] = xcenterd_hat - cos_angled_hw1 - sin_angled_hh1; 151 | point1y[2] = ycenterd_hat + sin_angled_hw1 - cos_angled_hh1; 152 | // point23: (w/2, -h/2) 153 | point1x[3] = xcenterd_hat + cos_angled_hw1 - sin_angled_hh1; 154 | point1y[3] = ycenterd_hat - sin_angled_hw1 - cos_angled_hh1; 155 | 156 | // determine the inner point 157 | // determine the inner point 158 | bool inner_side1[4][4], inner1[4]; 159 | for(int i = 0; i < 4; i++) 160 | { 161 | inner_side1[i][0] = point1y[i] < hh2; 162 | inner_side1[i][1] = point1x[i] > -hw2; 163 | inner_side1[i][2] = point1y[i] > -hh2; 164 | inner_side1[i][3] = point1x[i] < hw2; 165 | inner1[i] = inner_side1[i][0] & inner_side1[i][1] & inner_side1[i][2] & inner_side1[i][3]; 166 | } 167 | point1x[0] = hw1; 168 | point1y[0] = hh1; 169 | // point21: (-w/2, h/2) 170 | point1x[1] = -hw1; 171 | point1y[1] = hh1; 172 | // point22: (-w/2, -h/2) 173 | point1x[2] = -hw1; 174 | point1y[2] = -hh1; 175 | // point23: (w/2, -h/2) 176 | point1x[3] = hw1; 177 | point1y[3] = -hh1; 178 | if (inner1[0]) { pcenter_x += hw1; pcenter_y += hh1; count++;} 179 | if (inner1[1]) { pcenter_x -= hw1; pcenter_y += hh1; count++;} 180 | if (inner1[2]) { pcenter_x -= hw1; pcenter_y -= hh1; count++;} 181 | if (inner1[3]) { pcenter_x += hw1; pcenter_y -= hh1; count++;} 182 | //find cross_points 183 | Line line1[4], line2[4]; 184 | line1[0].p1 = 0; line1[0].p2 = 1; 185 | line1[1].p1 = 1; line1[1].p2 = 2; 186 | line1[2].p1 = 2; line1[2].p2 = 3; 187 | line1[3].p1 = 3; line1[3].p2 = 0; 188 | line2[0].p1 = 0; line2[0].p2 = 1; 189 | line2[1].p1 = 1; line2[1].p2 = 2; 190 | line2[2].p1 = 2; line2[2].p2 = 3; 191 | line2[3].p1 = 3; line2[3].p2 = 0; 192 | double pointc_x[4][4], pointc_y[4][4]; 193 | for (int i = 0; i < 4; i++) 194 | { 195 | int index1 = line1[i].p1; 196 | int index2 = line1[i].p2; 197 | line1[i].crossnum = 0; 198 | if (inner1[index1] && inner1[index2]) 199 | { 200 | if (i == 0 || i == 2) line1[i].length = width1; 201 | else line1[i].length = height1; 202 | line1[i].crossnum = -1; 203 | continue; 204 | } 205 | if (inner1[index1]) 206 | { 207 | line1[i].crossnum ++; 208 | line1[i].d[0][0] = index1; 209 | line1[i].d[0][1] = -1; 210 | continue; 211 | } 212 | if (inner1[index2]) 213 | { 214 | line1[i].crossnum ++; 215 | line1[i].d[0][0] = index2; 216 | line1[i].d[0][1] = -1; 217 | continue; 218 | } 219 | } 220 | for (int i = 0; i < 4; i++) 221 | { 222 | int index1 = line2[i].p1; 223 | double x1 = point2x[index1]; 224 | double y1 = point2y[index1]; 225 | int index2 = line2[i].p2; 226 | double x2 = point2x[index2]; 227 | double y2 = point2y[index2]; 228 | line2[i].crossnum = 0; 229 | if (inner2[index1] && inner2[index2]) 230 | { 231 | if (i == 0 || i == 2) line2[i].length = width2; 232 | else line2[i].length = height1; 233 | line2[i].crossnum = -1; 234 | continue; 235 | } 236 | if (inner2[index1]) 237 | { 238 | line2[i].crossnum ++; 239 | line2[i].d[0][0] = index1; 240 | line2[i].d[0][1] = -1; 241 | } 242 | else if (inner2[index2]) 243 | { 244 | line2[i].crossnum ++; 245 | line2[i].d[0][0] = index2; 246 | line2[i].d[0][1] = -1; 247 | } 248 | double tmp1 = (y1*x2 - y2*x1) / (y1 - y2); 249 | double tmp2 = (x1 - x2) / (y1 - y2); 250 | //cout<<"tmp"<<" "< 0) 307 | { 308 | if (line1[i].d[0][1] == -1) 309 | { 310 | if (i==0 || i==2) 311 | line1[i].length = fabs(point1x[line1[i].d[0][0]] - pointc_x[line1[i].d[1][0]][line1[i].d[1][1]]); 312 | else 313 | line1[i].length = fabs(point1y[line1[i].d[0][0]] - pointc_y[line1[i].d[1][0]][line1[i].d[1][1]]); 314 | } 315 | else 316 | { 317 | if (i==0 || i==2) 318 | line1[i].length = fabs(pointc_x[line1[i].d[0][0]][line1[i].d[0][1]] - pointc_x[line1[i].d[1][0]][line1[i].d[1][1]]); 319 | else 320 | line1[i].length = fabs(pointc_y[line1[i].d[0][0]][line1[i].d[0][1]] - pointc_y[line1[i].d[1][0]][line1[i].d[1][1]]); 321 | } 322 | } 323 | if (line2[i].crossnum >0) 324 | { 325 | if (line2[i].d[0][1] == -1) 326 | line2[i].length = fabs(point2x[line2[i].d[0][0]] - pointc_x[line2[i].d[1][0]][line2[i].d[1][1]]); 327 | else 328 | line2[i].length = fabs(pointc_x[line2[i].d[0][0]][line2[i].d[0][1]] - pointc_x[line2[i].d[1][0]][line2[i].d[1][1]]); 329 | if(i == 0 || i == 2) line2[i].length *= width2 / fabs(point2x[line2[i].p1] - point2x[line2[i].p2]); 330 | else line2[i].length *= height2 / fabs(point2x[line2[i].p1] - point2x[line2[i].p2]); 331 | } 332 | } 333 | 334 | double dis1[4], dis2[4]; 335 | dis1[0] = fabs(pcenter_y - hh1); 336 | dis1[1] = fabs(pcenter_x + hw1); 337 | dis1[2] = fabs(pcenter_y + hh1); 338 | dis1[3] = fabs(pcenter_x - hw1); 339 | dis2[0] = fabs(pcenter_y_hat - hh2); 340 | dis2[1] = fabs(pcenter_x_hat + hw2); 341 | dis2[2] = fabs(pcenter_y_hat + hh2); 342 | dis2[3] = fabs(pcenter_x_hat - hw2); 343 | for (int i=0; i < 4; i++) 344 | { 345 | //cout<<"line1["< 1) rbox_angle = 1; 377 | if (rbox_angle < -1) rbox_angle = -1; 378 | preds[5*i+4] = asin(rbox_angle) * 180 / 3.141593 + prior_angle; 379 | } 380 | } 381 | 382 | struct node 383 | { 384 | double value; 385 | int index; 386 | }; 387 | 388 | bool cmp(struct node a, struct node b) 389 | { 390 | if(a.value > b.value) return true; 391 | else return false; 392 | } 393 | 394 | void NMS_sub(double *preds, int *indices, double *scores, int *n, double threshold) 395 | { 396 | int count=0; 397 | node *scorenodes = new node[n[0]]; 398 | for(int i = 0; i < n[0]; i++) 399 | { 400 | scorenodes[i].index = i; 401 | scorenodes[i].value = scores[i]; 402 | } 403 | sort(scorenodes, scorenodes + n[0], cmp); 404 | for(int i = 0; i < n[0]; i++) 405 | { 406 | //cout< 0.2) continue; 553 | indices[count] = ind; 554 | count ++; 555 | } 556 | n[0] = count; 557 | delete []scorenodes; 558 | delete []arealist; 559 | } 560 | 561 | extern "C" 562 | { 563 | #include 564 | double Overlap(double *rbox1, double *rbox2) 565 | { 566 | double area[1]; 567 | int i; 568 | OverlapSub(rbox1, rbox2, area); 569 | // printf("%s\n",area[0] ); 570 | return area[0]; 571 | // return 2; 572 | } 573 | 574 | void DecodeAndNMS(double *preds, double *priors, int *indices, double *scores, int *n, double threshold) 575 | { 576 | int i; 577 | Decode(preds, priors, n[0]); 578 | //for(i=0;i0) 59 | local isStr = torch.type(T[1])=='string' 60 | assert(isStr or torch.isTensor(T[1])) 61 | local c=function(s) return torch.CharTensor(torch.CharStorage():string(s)) end 62 | if isStr then local S=T; T={}; for i=1,n do T[i]=c(S[i]) end end 63 | local ms, idx = torch.LongTensor(n), torch.LongTensor(n+1) 64 | for i=1,n do ms[i]=T[i]:numel() end 65 | idx[1]=1; idx:narrow(1,2,n):copy(ms); idx=idx:cumsum() 66 | local type = string.sub(torch.type(T[1]),7,-1) 67 | local data = torch[type](idx[n+1]-1) 68 | if isStr then type='string' end 69 | for i=1,n do if ms[i]>0 then data:sub(idx[i],idx[i+1]-1):copy(T[i]) end end 70 | if ms:eq(ms[1]):all() and ms[1]>0 then data=data:view(n,ms[1]); idx=nil end 71 | self.data, self.idx, self.type = data, idx, type 72 | end 73 | 74 | function TensorTable:__index__( i ) 75 | if torch.type(i)~='number' then return false end 76 | local d, idx, type = self.data, self.idx, self.type 77 | if idx and idx[i]==idx[i+1] then 78 | if type=='string' then d='' else d=torch[type]() end 79 | else 80 | if idx then d=d:sub(idx[i],idx[i+1]-1) else d=d[i] end 81 | if type=='string' then d=d:clone():storage():string() end 82 | end 83 | return d, true 84 | end 85 | 86 | -------------------------------------------------------------------------------- 87 | 88 | --[[ CocoSeg is an efficient data structure for storing COCO segmentations. ]] 89 | 90 | function CocoSeg:__init( segs ) 91 | local polys, pIdx, sizes, rles, p, isStr = {}, {}, {}, {}, 0, 0 92 | for i,seg in pairs(segs) do if seg.size then isStr=seg.counts break end end 93 | isStr = torch.type(isStr)=='string' 94 | for i,seg in pairs(segs) do 95 | pIdx[i], sizes[i] = {}, {} 96 | if seg.size then 97 | sizes[i],rles[i] = seg.size,seg.counts 98 | else 99 | if isStr then rles[i]='' else rles[i]={} end 100 | for j=1,#seg do p=p+1; pIdx[i][j],polys[p] = p,seg[j] end 101 | end 102 | pIdx[i],sizes[i] = torch.LongTensor(pIdx[i]),torch.IntTensor(sizes[i]) 103 | if not isStr then rles[i]=torch.IntTensor(rles[i]) end 104 | end 105 | for i=1,p do polys[i]=torch.DoubleTensor(polys[i]) end 106 | self.polys, self.pIdx = coco.TensorTable(polys), coco.TensorTable(pIdx) 107 | self.sizes, self.rles = coco.TensorTable(sizes), coco.TensorTable(rles) 108 | end 109 | 110 | function CocoSeg:__index__( i ) 111 | if torch.type(i)~='number' then return false end 112 | if self.sizes[i]:numel()>0 then 113 | return {size=self.sizes[i],counts=self.rles[i]}, true 114 | else 115 | local ids, polys = self.pIdx[i], {} 116 | for i=1,ids:numel() do polys[i]=self.polys[ids[i]] end 117 | return polys, true 118 | end 119 | end 120 | 121 | -------------------------------------------------------------------------------- 122 | 123 | --[[ CocoApi is the API to the COCO dataset, see main comment for details. ]] 124 | 125 | function CocoApi:__init( annFile ) 126 | assert( string.sub(annFile,-4,-1)=='json' and paths.filep(annFile) ) 127 | local torchFile = string.sub(annFile,1,-6) .. '.t7' 128 | if not paths.filep(torchFile) then self:__convert(annFile,torchFile) end 129 | local data = torch.load(torchFile) 130 | self.data, self.inds = data, {} 131 | for k,v in pairs({images='img',categories='cat',annotations='ann'}) do 132 | local M = {}; self.inds[v..'IdsMap']=M 133 | if data[k] then for i=1,data[k].id:size(1) do M[data[k].id[i]]=i end end 134 | end 135 | end 136 | 137 | function CocoApi:__convert( annFile, torchFile ) 138 | print('convert: '..annFile..' --> .t7 [please be patient]') 139 | local tic = torch.tic() 140 | -- load data and decode json 141 | local data = torch.CharStorage(annFile):string() 142 | data = json.decode(data); collectgarbage() 143 | -- transpose and flatten each field in the coco data struct 144 | local convert = {images=true, categories=true, annotations=true} 145 | for field, d in pairs(data) do if convert[field] then 146 | print('converting: '..field) 147 | local n, out = #d, {} 148 | if n==0 then d,n={d},1 end 149 | for k,v in pairs(d[1]) do 150 | local t, isReg = torch.type(v), true 151 | for i=1,n do isReg=isReg and torch.type(d[i][k])==t end 152 | if t=='number' and isReg then 153 | out[k] = torch.DoubleTensor(n) 154 | for i=1,n do out[k][i]=d[i][k] end 155 | elseif t=='string' and isReg then 156 | out[k]={}; for i=1,n do out[k][i]=d[i][k] end 157 | out[k] = coco.TensorTable(out[k]) 158 | elseif t=='table' and isReg and torch.type(v[1])=='number' then 159 | out[k]={}; for i=1,n do out[k][i]=torch.DoubleTensor(d[i][k]) end 160 | out[k] = coco.TensorTable(out[k]) 161 | if not out[k].idx then out[k]=out[k].data end 162 | else 163 | out[k]={}; for i=1,n do out[k][i]=d[i][k] end 164 | if k=='segmentation' then out[k] = coco.CocoSeg(out[k]) end 165 | end 166 | collectgarbage() 167 | end 168 | if out.id then out.idx=torch.range(1,out.id:size(1)) end 169 | data[field] = out 170 | collectgarbage() 171 | end end 172 | -- create mapping from cat/img index to anns indices for that cat/img 173 | print('convert: building indices') 174 | local makeMap = function( type, type_id ) 175 | if not data[type] or not data.annotations then return nil end 176 | local invmap, n = {}, data[type].id:size(1) 177 | for i=1,n do invmap[data[type].id[i]]=i end 178 | local map = {}; for i=1,n do map[i]={} end 179 | data.annotations[type_id..'x'] = data.annotations[type_id]:clone() 180 | for i=1,data.annotations.id:size(1) do 181 | local id = invmap[data.annotations[type_id][i]] 182 | data.annotations[type_id..'x'][i] = id 183 | table.insert(map[id],data.annotations.id[i]) 184 | end 185 | for i=1,n do map[i]=torch.LongTensor(map[i]) end 186 | return coco.TensorTable(map) 187 | end 188 | data.annIdsPerImg = makeMap('images','image_id') 189 | data.annIdsPerCat = makeMap('categories','category_id') 190 | -- save to disk 191 | torch.save( torchFile, data ) 192 | print(('convert: complete [%.2f s]'):format(torch.toc(tic))) 193 | end 194 | 195 | function CocoApi:getAnnIds( filters ) 196 | if not filters then filters = {} end 197 | if filters.imgId then 198 | return self.data.annIdsPerImg[self.inds.imgIdsMap[filters.imgId]] or {} 199 | elseif filters.catId then 200 | return self.data.annIdsPerCat[self.inds.catIdsMap[filters.catId]] or {} 201 | else 202 | return self.data.annotations.id 203 | end 204 | end 205 | 206 | function CocoApi:getCatIds() 207 | return self.data.categories.id 208 | end 209 | 210 | function CocoApi:getImgIds() 211 | return self.data.images.id 212 | end 213 | 214 | function CocoApi:loadAnns( ids ) 215 | return self:__load(self.data.annotations,self.inds.annIdsMap,ids) 216 | end 217 | 218 | function CocoApi:loadCats( ids ) 219 | return self:__load(self.data.categories,self.inds.catIdsMap,ids) 220 | end 221 | 222 | function CocoApi:loadImgs( ids ) 223 | return self:__load(self.data.images,self.inds.imgIdsMap,ids) 224 | end 225 | 226 | function CocoApi:showAnns( img, anns ) 227 | local n, h, w = #anns, img:size(2), img:size(3) 228 | local MaskApi, clrs = coco.MaskApi, torch.rand(n,3)*.6+.4 229 | local O = img:clone():contiguous():float() 230 | if n==0 then anns,n={anns},1 end 231 | if anns[1].keypoints then for i=1,n do if anns[i].iscrowd==0 then 232 | local sk, kp, j, k = self:loadCats(anns[i].category_id)[1].skeleton 233 | kp=anns[i].keypoints; k=kp:size(1); j=torch.range(1,k,3):long(); k=k/3; 234 | local x,y,v = kp:index(1,j), kp:index(1,j+1), kp:index(1,j+2) 235 | for _,s in pairs(sk) do if v[s[1]]>0 and v[s[2]]>0 then 236 | MaskApi.drawLine(O,x[s[1]],y[s[1]],x[s[2]],y[s[2]],.75,clrs[i]) 237 | end end 238 | for j=1,k do if v[j]==1 then MaskApi.drawCirc(O,x[j],y[j],4,{0,0,0}) end end 239 | for j=1,k do if v[j]>0 then MaskApi.drawCirc(O,x[j],y[j],3,clrs[i]) end end 240 | end end end 241 | if anns[1].segmentation or anns[1].bbox then 242 | local Rs, alpha = {}, anns[1].keypoints and .25 or .4 243 | for i=1,n do 244 | Rs[i]=anns[i].segmentation 245 | if Rs[i] and #Rs[i]>0 then Rs[i]=MaskApi.frPoly(Rs[i],h,w) end 246 | if not Rs[i] then Rs[i]=MaskApi.frBbox(anns[i].bbox,h,w)[1] end 247 | end 248 | MaskApi.drawMasks(O,MaskApi.decode(Rs),nil,alpha,clrs) 249 | end 250 | return O 251 | end 252 | 253 | function CocoApi:__load( data, map, ids ) 254 | if not torch.isTensor(ids) then ids=torch.LongTensor({ids}) end 255 | local out, idx = {}, nil 256 | for i=1,ids:numel() do 257 | out[i], idx = {}, map[ids[i]] 258 | for k,v in pairs(data) do out[i][k]=v[idx] end 259 | end 260 | return out 261 | end 262 | -------------------------------------------------------------------------------- /cocoapi_ro/LuaAPI/MaskApi.lua: -------------------------------------------------------------------------------- 1 | --[[---------------------------------------------------------------------------- 2 | 3 | Interface for manipulating masks stored in RLE format. 4 | 5 | For an overview of RLE please see http://mscoco.org/dataset/#download. 6 | Additionally, more detailed information can be found in the Matlab MaskApi.m: 7 | https://github.com/pdollar/coco/blob/master/MatlabAPI/MaskApi.m 8 | 9 | The following API functions are defined: 10 | encode - Encode binary masks using RLE. 11 | decode - Decode binary masks encoded via RLE. 12 | merge - Compute union or intersection of encoded masks. 13 | iou - Compute intersection over union between masks. 14 | nms - Compute non-maximum suppression between ordered masks. 15 | area - Compute area of encoded masks. 16 | toBbox - Get bounding boxes surrounding encoded masks. 17 | frBbox - Convert bounding boxes to encoded masks. 18 | frPoly - Convert polygon to encoded mask. 19 | drawCirc - Draw circle into image (alters input). 20 | drawLine - Draw line into image (alters input). 21 | drawMasks - Draw masks into image (alters input). 22 | 23 | Usage: 24 | Rs = MaskApi.encode( masks ) 25 | masks = MaskApi.decode( Rs ) 26 | R = MaskApi.merge( Rs, [intersect=false] ) 27 | o = MaskApi.iou( dt, gt, [iscrowd=false] ) 28 | keep = MaskApi.nms( dt, thr ) 29 | a = MaskApi.area( Rs ) 30 | bbs = MaskApi.toBbox( Rs ) 31 | Rs = MaskApi.frBbox( bbs, h, w ) 32 | R = MaskApi.frPoly( poly, h, w ) 33 | MaskApi.drawCirc( img, x, y, rad, clr ) 34 | MaskApi.drawLine( img, x0, y0, x1, y1, rad, clr ) 35 | MaskApi.drawMasks( img, masks, [maxn=n], [alpha=.4], [clrs] ) 36 | For detailed usage information please see cocoDemo.lua. 37 | 38 | In the API the following formats are used: 39 | R,Rs - [table] Run-length encoding of binary mask(s) 40 | masks - [nxhxw] Binary mask(s) 41 | bbs - [nx4] Bounding box(es) stored as [x y w h] 42 | poly - Polygon stored as {[x1 y1 x2 y2...],[x1 y1 ...],...} 43 | dt,gt - May be either bounding boxes or encoded masks 44 | Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 45 | 46 | Common Objects in COntext (COCO) Toolbox. version 3.0 47 | Data, paper, and tutorials available at: http://mscoco.org/ 48 | Code written by Pedro O. Pinheiro and Piotr Dollar, 2016. 49 | Licensed under the Simplified BSD License [see coco/license.txt] 50 | 51 | ------------------------------------------------------------------------------]] 52 | 53 | local ffi = require 'ffi' 54 | local coco = require 'coco.env' 55 | 56 | coco.MaskApi = {} 57 | local MaskApi = coco.MaskApi 58 | 59 | coco.libmaskapi = ffi.load(package.searchpath('libmaskapi',package.cpath)) 60 | local libmaskapi = coco.libmaskapi 61 | 62 | -------------------------------------------------------------------------------- 63 | 64 | MaskApi.encode = function( masks ) 65 | local n, h, w = masks:size(1), masks:size(2), masks:size(3) 66 | masks = masks:type('torch.ByteTensor'):transpose(2,3) 67 | local data = masks:contiguous():data() 68 | local Qs = MaskApi._rlesInit(n) 69 | libmaskapi.rleEncode(Qs[0],data,h,w,n) 70 | return MaskApi._rlesToLua(Qs,n) 71 | end 72 | 73 | MaskApi.decode = function( Rs ) 74 | local Qs, n, h, w = MaskApi._rlesFrLua(Rs) 75 | local masks = torch.ByteTensor(n,w,h):zero():contiguous() 76 | libmaskapi.rleDecode(Qs,masks:data(),n) 77 | MaskApi._rlesFree(Qs,n) 78 | return masks:transpose(2,3) 79 | end 80 | 81 | MaskApi.merge = function( Rs, intersect ) 82 | intersect = intersect or 0 83 | local Qs, n, h, w = MaskApi._rlesFrLua(Rs) 84 | local Q = MaskApi._rlesInit(1) 85 | libmaskapi.rleMerge(Qs,Q,n,intersect) 86 | MaskApi._rlesFree(Qs,n) 87 | return MaskApi._rlesToLua(Q,1)[1] 88 | end 89 | 90 | MaskApi.iou = function( dt, gt, iscrowd ) 91 | if not iscrowd then iscrowd = NULL else 92 | iscrowd = iscrowd:type('torch.ByteTensor'):contiguous():data() 93 | end 94 | if torch.isTensor(gt) and torch.isTensor(dt) then 95 | local nDt, k = dt:size(1), dt:size(2); assert(k==4) 96 | local nGt, k = gt:size(1), gt:size(2); assert(k==4) 97 | local dDt = dt:type('torch.DoubleTensor'):contiguous():data() 98 | local dGt = gt:type('torch.DoubleTensor'):contiguous():data() 99 | local o = torch.DoubleTensor(nGt,nDt):contiguous() 100 | libmaskapi.bbIou(dDt,dGt,nDt,nGt,iscrowd,o:data()) 101 | return o:transpose(1,2) 102 | else 103 | local qDt, nDt = MaskApi._rlesFrLua(dt) 104 | local qGt, nGt = MaskApi._rlesFrLua(gt) 105 | local o = torch.DoubleTensor(nGt,nDt):contiguous() 106 | libmaskapi.rleIou(qDt,qGt,nDt,nGt,iscrowd,o:data()) 107 | MaskApi._rlesFree(qDt,nDt); MaskApi._rlesFree(qGt,nGt) 108 | return o:transpose(1,2) 109 | end 110 | end 111 | 112 | MaskApi.nms = function( dt, thr ) 113 | if torch.isTensor(dt) then 114 | local n, k = dt:size(1), dt:size(2); assert(k==4) 115 | local Q = dt:type('torch.DoubleTensor'):contiguous():data() 116 | local kp = torch.IntTensor(n):contiguous() 117 | libmaskapi.bbNms(Q,n,kp:data(),thr) 118 | return kp 119 | else 120 | local Q, n = MaskApi._rlesFrLua(dt) 121 | local kp = torch.IntTensor(n):contiguous() 122 | libmaskapi.rleNms(Q,n,kp:data(),thr) 123 | MaskApi._rlesFree(Q,n) 124 | return kp 125 | end 126 | end 127 | 128 | MaskApi.area = function( Rs ) 129 | local Qs, n, h, w = MaskApi._rlesFrLua(Rs) 130 | local a = torch.IntTensor(n):contiguous() 131 | libmaskapi.rleArea(Qs,n,a:data()) 132 | MaskApi._rlesFree(Qs,n) 133 | return a 134 | end 135 | 136 | MaskApi.toBbox = function( Rs ) 137 | local Qs, n, h, w = MaskApi._rlesFrLua(Rs) 138 | local bb = torch.DoubleTensor(n,4):contiguous() 139 | libmaskapi.rleToBbox(Qs,bb:data(),n) 140 | MaskApi._rlesFree(Qs,n) 141 | return bb 142 | end 143 | 144 | MaskApi.frBbox = function( bbs, h, w ) 145 | if bbs:dim()==1 then bbs=bbs:view(1,bbs:size(1)) end 146 | local n, k = bbs:size(1), bbs:size(2); assert(k==4) 147 | local data = bbs:type('torch.DoubleTensor'):contiguous():data() 148 | local Qs = MaskApi._rlesInit(n) 149 | libmaskapi.rleFrBbox(Qs[0],data,h,w,n) 150 | return MaskApi._rlesToLua(Qs,n) 151 | end 152 | 153 | MaskApi.frPoly = function( poly, h, w ) 154 | local n = #poly 155 | local Qs, Q = MaskApi._rlesInit(n), MaskApi._rlesInit(1) 156 | for i,p in pairs(poly) do 157 | local xy = p:type('torch.DoubleTensor'):contiguous():data() 158 | libmaskapi.rleFrPoly(Qs[i-1],xy,p:size(1)/2,h,w) 159 | end 160 | libmaskapi.rleMerge(Qs,Q[0],n,0) 161 | MaskApi._rlesFree(Qs,n) 162 | return MaskApi._rlesToLua(Q,1)[1] 163 | end 164 | 165 | -------------------------------------------------------------------------------- 166 | 167 | MaskApi.drawCirc = function( img, x, y, rad, clr ) 168 | assert(img:isContiguous() and img:dim()==3) 169 | local k, h, w, data = img:size(1), img:size(2), img:size(3), img:data() 170 | for dx=-rad,rad do for dy=-rad,rad do 171 | local xi, yi = torch.round(x+dx), torch.round(y+dy) 172 | if dx*dx+dy*dy<=rad*rad and xi>=0 and yi>=0 and xi=0 and yi>=0 and xi= 5.1", 17 | "torch >= 7.0", 18 | "lua-cjson" 19 | } 20 | 21 | build = { 22 | type = "builtin", 23 | modules = { 24 | ["coco.env"] = "LuaAPI/env.lua", 25 | ["coco.init"] = "LuaAPI/init.lua", 26 | ["coco.MaskApi"] = "LuaAPI/MaskApi.lua", 27 | ["coco.CocoApi"] = "LuaAPI/CocoApi.lua", 28 | libmaskapi = { 29 | sources = { "common/maskApi.c" }, 30 | incdirs = { "common/" } 31 | } 32 | } 33 | } 34 | 35 | -- luarocks make LuaAPI/rocks/coco-scm-1.rockspec 36 | -- https://github.com/pdollar/coco/raw/master/LuaAPI/rocks/coco-scm-1.rockspec 37 | -------------------------------------------------------------------------------- /cocoapi_ro/MatlabAPI/CocoApi.m: -------------------------------------------------------------------------------- 1 | classdef CocoApi 2 | % Interface for accessing the Microsoft COCO dataset. 3 | % 4 | % Microsoft COCO is a large image dataset designed for object detection, 5 | % segmentation, and caption generation. CocoApi.m is a Matlab API that 6 | % assists in loading, parsing and visualizing the annotations in COCO. 7 | % Please visit http://mscoco.org/ for more information on COCO, including 8 | % for the data, paper, and tutorials. The exact format of the annotations 9 | % is also described on the COCO website. For example usage of the CocoApi 10 | % please see cocoDemo.m. In addition to this API, please download both 11 | % the COCO images and annotations in order to run the demo. 12 | % 13 | % An alternative to using the API is to load the annotations directly 14 | % into a Matlab struct. This can be achieved via: 15 | % data = gason(fileread(annFile)); 16 | % Using the API provides additional utility functions. Note that this API 17 | % supports both *instance* and *caption* annotations. In the case of 18 | % captions not all functions are defined (e.g. categories are undefined). 19 | % 20 | % The following API functions are defined: 21 | % CocoApi - Load COCO annotation file and prepare data structures. 22 | % getAnnIds - Get ann ids that satisfy given filter conditions. 23 | % getCatIds - Get cat ids that satisfy given filter conditions. 24 | % getImgIds - Get img ids that satisfy given filter conditions. 25 | % loadAnns - Load anns with the specified ids. 26 | % loadCats - Load cats with the specified ids. 27 | % loadImgs - Load imgs with the specified ids. 28 | % showAnns - Display the specified annotations. 29 | % loadRes - Load algorithm results and create API for accessing them. 30 | % Throughout the API "ann"=annotation, "cat"=category, and "img"=image. 31 | % Help on each functions can be accessed by: "help CocoApi>function". 32 | % 33 | % See also CocoApi>CocoApi, CocoApi>getAnnIds, CocoApi>getCatIds, 34 | % CocoApi>getImgIds, CocoApi>loadAnns, CocoApi>loadCats, 35 | % CocoApi>loadImgs, CocoApi>showAnns, CocoApi>loadRes 36 | % 37 | % Microsoft COCO Toolbox. version 2.0 38 | % Data, paper, and tutorials available at: http://mscoco.org/ 39 | % Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 40 | % Licensed under the Simplified BSD License [see coco/license.txt] 41 | 42 | properties 43 | data % COCO annotation data structure 44 | inds % data structures for fast indexing 45 | end 46 | 47 | methods 48 | function coco = CocoApi( annFile ) 49 | % Load COCO annotation file and prepare data structures. 50 | % 51 | % USAGE 52 | % coco = CocoApi( annFile ) 53 | % 54 | % INPUTS 55 | % annFile - COCO annotation filename 56 | % 57 | % OUTPUTS 58 | % coco - initialized coco object 59 | fprintf('Loading and preparing annotations... '); clk=clock; 60 | if(isstruct(annFile)), coco.data=annFile; else 61 | coco.data=gason(fileread(annFile)); end 62 | is.imgIds = [coco.data.images.id]'; 63 | is.imgIdsMap = makeMap(is.imgIds); 64 | if( isfield(coco.data,'annotations') ) 65 | ann=coco.data.annotations; o=[ann.image_id]; 66 | if(isfield(ann,'category_id')), o=o*1e10+[ann.category_id]; end 67 | [~,o]=sort(o); ann=ann(o); coco.data.annotations=ann; 68 | s={'category_id','area','iscrowd','id','image_id'}; 69 | t={'annCatIds','annAreas','annIscrowd','annIds','annImgIds'}; 70 | for f=1:5, if(isfield(ann,s{f})), is.(t{f})=[ann.(s{f})]'; end; end 71 | is.annIdsMap = makeMap(is.annIds); 72 | is.imgAnnIdsMap = makeMultiMap(is.imgIds,... 73 | is.imgIdsMap,is.annImgIds,is.annIds,0); 74 | end 75 | if( isfield(coco.data,'categories') ) 76 | is.catIds = [coco.data.categories.id]'; 77 | is.catIdsMap = makeMap(is.catIds); 78 | if(isfield(is,'annCatIds')), is.catImgIdsMap = makeMultiMap(... 79 | is.catIds,is.catIdsMap,is.annCatIds,is.annImgIds,1); end 80 | end 81 | coco.inds=is; fprintf('DONE (t=%0.2fs).\n',etime(clock,clk)); 82 | 83 | function map = makeMap( keys ) 84 | % Make map from key to integer id associated with key. 85 | if(isempty(keys)), map=containers.Map(); return; end 86 | map=containers.Map(keys,1:length(keys)); 87 | end 88 | 89 | function map = makeMultiMap( keys, keysMap, keysAll, valsAll, sqz ) 90 | % Make map from keys to set of vals associated with each key. 91 | js=values(keysMap,num2cell(keysAll)); js=[js{:}]; 92 | m=length(js); n=length(keys); k=zeros(1,n); 93 | for i=1:m, j=js(i); k(j)=k(j)+1; end; vs=zeros(n,max(k)); k(:)=0; 94 | for i=1:m, j=js(i); k(j)=k(j)+1; vs(j,k(j))=valsAll(i); end 95 | map = containers.Map('KeyType','double','ValueType','any'); 96 | if(sqz), for j=1:n, map(keys(j))=unique(vs(j,1:k(j))); end 97 | else for j=1:n, map(keys(j))=vs(j,1:k(j)); end; end 98 | end 99 | end 100 | 101 | function ids = getAnnIds( coco, varargin ) 102 | % Get ann ids that satisfy given filter conditions. 103 | % 104 | % USAGE 105 | % ids = coco.getAnnIds( params ) 106 | % 107 | % INPUTS 108 | % params - filtering parameters (struct or name/value pairs) 109 | % setting any filter to [] skips that filter 110 | % .imgIds - [] get anns for given imgs 111 | % .catIds - [] get anns for given cats 112 | % .areaRng - [] get anns for given area range (e.g. [0 inf]) 113 | % .iscrowd - [] get anns for given crowd label (0 or 1) 114 | % 115 | % OUTPUTS 116 | % ids - integer array of ann ids 117 | def = {'imgIds',[],'catIds',[],'areaRng',[],'iscrowd',[]}; 118 | [imgIds,catIds,ar,iscrowd] = getPrmDflt(varargin,def,1); 119 | if( length(imgIds)==1 ) 120 | t = coco.loadAnns(coco.inds.imgAnnIdsMap(imgIds)); 121 | if(~isempty(catIds)), t = t(ismember([t.category_id],catIds)); end 122 | if(~isempty(ar)), a=[t.area]; t = t(a>=ar(1) & a<=ar(2)); end 123 | if(~isempty(iscrowd)), t = t([t.iscrowd]==iscrowd); end 124 | ids = [t.id]; 125 | else 126 | ids=coco.inds.annIds; K = true(length(ids),1); t = coco.inds; 127 | if(~isempty(imgIds)), K = K & ismember(t.annImgIds,imgIds); end 128 | if(~isempty(catIds)), K = K & ismember(t.annCatIds,catIds); end 129 | if(~isempty(ar)), a=t.annAreas; K = K & a>=ar(1) & a<=ar(2); end 130 | if(~isempty(iscrowd)), K = K & t.annIscrowd==iscrowd; end 131 | ids=ids(K); 132 | end 133 | end 134 | 135 | function ids = getCatIds( coco, varargin ) 136 | % Get cat ids that satisfy given filter conditions. 137 | % 138 | % USAGE 139 | % ids = coco.getCatIds( params ) 140 | % 141 | % INPUTS 142 | % params - filtering parameters (struct or name/value pairs) 143 | % setting any filter to [] skips that filter 144 | % .catNms - [] get cats for given cat names 145 | % .supNms - [] get cats for given supercategory names 146 | % .catIds - [] get cats for given cat ids 147 | % 148 | % OUTPUTS 149 | % ids - integer array of cat ids 150 | if(~isfield(coco.data,'categories')), ids=[]; return; end 151 | def={'catNms',[],'supNms',[],'catIds',[]}; t=coco.data.categories; 152 | [catNms,supNms,catIds] = getPrmDflt(varargin,def,1); 153 | if(~isempty(catNms)), t = t(ismember({t.name},catNms)); end 154 | if(~isempty(supNms)), t = t(ismember({t.supercategory},supNms)); end 155 | if(~isempty(catIds)), t = t(ismember([t.id],catIds)); end 156 | ids = [t.id]; 157 | end 158 | 159 | function ids = getImgIds( coco, varargin ) 160 | % Get img ids that satisfy given filter conditions. 161 | % 162 | % USAGE 163 | % ids = coco.getImgIds( params ) 164 | % 165 | % INPUTS 166 | % params - filtering parameters (struct or name/value pairs) 167 | % setting any filter to [] skips that filter 168 | % .imgIds - [] get imgs for given ids 169 | % .catIds - [] get imgs with all given cats 170 | % 171 | % OUTPUTS 172 | % ids - integer array of img ids 173 | def={'imgIds',[],'catIds',[]}; ids=coco.inds.imgIds; 174 | [imgIds,catIds] = getPrmDflt(varargin,def,1); 175 | if(~isempty(imgIds)), ids=intersect(ids,imgIds); end 176 | if(isempty(catIds)), return; end 177 | t=values(coco.inds.catImgIdsMap,num2cell(catIds)); 178 | for i=1:length(t), ids=intersect(ids,t{i}); end 179 | end 180 | 181 | function anns = loadAnns( coco, ids ) 182 | % Load anns with the specified ids. 183 | % 184 | % USAGE 185 | % anns = coco.loadAnns( ids ) 186 | % 187 | % INPUTS 188 | % ids - integer ids specifying anns 189 | % 190 | % OUTPUTS 191 | % anns - loaded ann objects 192 | ids = values(coco.inds.annIdsMap,num2cell(ids)); 193 | anns = coco.data.annotations([ids{:}]); 194 | end 195 | 196 | function cats = loadCats( coco, ids ) 197 | % Load cats with the specified ids. 198 | % 199 | % USAGE 200 | % cats = coco.loadCats( ids ) 201 | % 202 | % INPUTS 203 | % ids - integer ids specifying cats 204 | % 205 | % OUTPUTS 206 | % cats - loaded cat objects 207 | if(~isfield(coco.data,'categories')), cats=[]; return; end 208 | ids = values(coco.inds.catIdsMap,num2cell(ids)); 209 | cats = coco.data.categories([ids{:}]); 210 | end 211 | 212 | function imgs = loadImgs( coco, ids ) 213 | % Load imgs with the specified ids. 214 | % 215 | % USAGE 216 | % imgs = coco.loadImgs( ids ) 217 | % 218 | % INPUTS 219 | % ids - integer ids specifying imgs 220 | % 221 | % OUTPUTS 222 | % imgs - loaded img objects 223 | ids = values(coco.inds.imgIdsMap,num2cell(ids)); 224 | imgs = coco.data.images([ids{:}]); 225 | end 226 | 227 | function hs = showAnns( coco, anns ) 228 | % Display the specified annotations. 229 | % 230 | % USAGE 231 | % hs = coco.showAnns( anns ) 232 | % 233 | % INPUTS 234 | % anns - annotations to display 235 | % 236 | % OUTPUTS 237 | % hs - handles to segment graphic objects 238 | n=length(anns); if(n==0), return; end 239 | r=.4:.2:1; [r,g,b]=ndgrid(r,r,r); cs=[r(:) g(:) b(:)]; 240 | cs=cs(randperm(size(cs,1)),:); cs=repmat(cs,100,1); 241 | if( isfield( anns,'keypoints') ) 242 | for i=1:n 243 | a=anns(i); if(isfield(a,'iscrowd') && a.iscrowd), continue; end 244 | seg={}; if(isfield(a,'segmentation')), seg=a.segmentation; end 245 | k=a.keypoints; x=k(1:3:end)+1; y=k(2:3:end)+1; v=k(3:3:end); 246 | k=coco.loadCats(a.category_id); k=k.skeleton; c=cs(i,:); hold on 247 | p={'FaceAlpha',.25,'LineWidth',2,'EdgeColor',c}; % polygon 248 | for j=seg, xy=j{1}+.5; fill(xy(1:2:end),xy(2:2:end),c,p{:}); end 249 | p={'Color',c,'LineWidth',3}; % skeleton 250 | for j=k, s=j{1}; if(all(v(s)>0)), line(x(s),y(s),p{:}); end; end 251 | p={'MarkerSize',8,'MarkerFaceColor',c,'MarkerEdgeColor'}; % pnts 252 | plot(x(v>0),y(v>0),'o',p{:},'k'); 253 | plot(x(v>1),y(v>1),'o',p{:},c); hold off; 254 | end 255 | elseif( any(isfield(anns,{'segmentation','bbox'})) ) 256 | if(~isfield(anns,'iscrowd')), [anns(:).iscrowd]=deal(0); end 257 | if(~isfield(anns,'segmentation')), S={anns.bbox}; %#ok 258 | for i=1:n, x=S{i}(1); w=S{i}(3); y=S{i}(2); h=S{i}(4); 259 | anns(i).segmentation={[x,y,x,y+h,x+w,y+h,x+w,y]}; end; end 260 | S={anns.segmentation}; hs=zeros(10000,1); k=0; hold on; 261 | pFill={'FaceAlpha',.4,'LineWidth',3}; 262 | for i=1:n 263 | if(anns(i).iscrowd), C=[.01 .65 .40]; else C=rand(1,3); end 264 | if(isstruct(S{i})), M=double(MaskApi.decode(S{i})); k=k+1; 265 | hs(k)=imagesc(cat(3,M*C(1),M*C(2),M*C(3)),'Alphadata',M*.5); 266 | else for j=1:length(S{i}), P=S{i}{j}+.5; k=k+1; 267 | hs(k)=fill(P(1:2:end),P(2:2:end),C,pFill{:}); end 268 | end 269 | end 270 | hs=hs(1:k); hold off; 271 | elseif( isfield(anns,'caption') ) 272 | S={anns.caption}; 273 | for i=1:n, S{i}=[int2str(i) ') ' S{i} '\newline']; end 274 | S=[S{:}]; title(S,'FontSize',12); 275 | end 276 | end 277 | 278 | function cocoRes = loadRes( coco, resFile ) 279 | % Load algorithm results and create API for accessing them. 280 | % 281 | % The API for accessing and viewing algorithm results is identical to 282 | % the CocoApi for the ground truth. The single difference is that the 283 | % ground truth results are replaced by the algorithm results. 284 | % 285 | % USAGE 286 | % cocoRes = coco.loadRes( resFile ) 287 | % 288 | % INPUTS 289 | % resFile - COCO results filename 290 | % 291 | % OUTPUTS 292 | % cocoRes - initialized results API 293 | fprintf('Loading and preparing results... '); clk=clock; 294 | cdata=coco.data; R=gason(fileread(resFile)); m=length(R); 295 | valid=ismember([R.image_id],[cdata.images.id]); 296 | if(~all(valid)), error('Results provided for invalid images.'); end 297 | t={'segmentation','bbox','keypoints','caption'}; t=t{isfield(R,t)}; 298 | if(strcmp(t,'caption')) 299 | for i=1:m, R(i).id=i; end; imgs=cdata.images; 300 | cdata.images=imgs(ismember([imgs.id],[R.image_id])); 301 | else 302 | assert(all(isfield(R,{'category_id','score',t}))); 303 | s=cat(1,R.(t)); if(strcmp(t,'bbox')), a=s(:,3).*s(:,4); end 304 | if(strcmp(t,'segmentation')), a=MaskApi.area(s); end 305 | if(strcmp(t,'keypoints')), x=s(:,1:3:end)'; y=s(:,2:3:end)'; 306 | a=(max(x)-min(x)).*(max(y)-min(y)); end 307 | for i=1:m, R(i).area=a(i); R(i).id=i; end 308 | end 309 | fprintf('DONE (t=%0.2fs).\n',etime(clock,clk)); 310 | cdata.annotations=R; cocoRes=CocoApi(cdata); 311 | end 312 | end 313 | 314 | end 315 | -------------------------------------------------------------------------------- /cocoapi_ro/MatlabAPI/CocoUtils.m: -------------------------------------------------------------------------------- 1 | classdef CocoUtils 2 | % Utility functions for testing and validation of COCO code. 3 | % 4 | % The following utility functions are defined: 5 | % convertPascalGt - Convert ground truth for PASCAL to COCO format. 6 | % convertImageNetGt - Convert ground truth for ImageNet to COCO format. 7 | % convertPascalDt - Convert detections on PASCAL to COCO format. 8 | % convertImageNetDt - Convert detections on ImageNet to COCO format. 9 | % validateOnPascal - Validate COCO eval code against PASCAL code. 10 | % validateOnImageNet - Validate COCO eval code against ImageNet code. 11 | % generateFakeDt - Generate fake detections from ground truth. 12 | % validateMaskApi - Validate MaskApi against Matlab functions. 13 | % gasonSplit - Split JSON file into multiple JSON files. 14 | % gasonMerge - Merge JSON files into single JSON file. 15 | % Help on each functions can be accessed by: "help CocoUtils>function". 16 | % 17 | % See also CocoApi MaskApi CocoEval CocoUtils>convertPascalGt 18 | % CocoUtils>convertImageNetGt CocoUtils>convertPascalDt 19 | % CocoUtils>convertImageNetDt CocoUtils>validateOnPascal 20 | % CocoUtils>validateOnImageNet CocoUtils>generateFakeDt 21 | % CocoUtils>validateMaskApi CocoUtils>gasonSplit CocoUtils>gasonMerge 22 | % 23 | % Microsoft COCO Toolbox. version 2.0 24 | % Data, paper, and tutorials available at: http://mscoco.org/ 25 | % Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 26 | % Licensed under the Simplified BSD License [see coco/license.txt] 27 | 28 | methods( Static ) 29 | function convertPascalGt( dataDir, year, split, annFile ) 30 | % Convert ground truth for PASCAL to COCO format. 31 | % 32 | % USAGE 33 | % CocoUtils.convertPascalGt( dataDir, year, split, annFile ) 34 | % 35 | % INPUTS 36 | % dataDir - dir containing VOCdevkit/ 37 | % year - dataset year (e.g. '2007') 38 | % split - dataset split (e.g. 'val') 39 | % annFile - annotation file for writing results 40 | if(exist(annFile,'file')), return; end 41 | fprintf('Converting PASCAL VOC dataset... '); clk=tic; 42 | dev=[dataDir '/VOCdevkit/']; addpath(genpath([dev '/VOCcode'])); 43 | VOCinit; C=VOCopts.classes'; catsMap=containers.Map(C,1:length(C)); 44 | f=fopen([dev '/VOC' year '/ImageSets/Main/' split '.txt']); 45 | is=textscan(f,'%s %*s'); is=is{1}; fclose(f); n=length(is); 46 | data=CocoUtils.initData(C,n); 47 | for i=1:n, nm=[is{i} '.jpg']; 48 | f=[dev '/VOC' year '/Annotations/' is{i} '.xml']; 49 | R=PASreadrecord(f); hw=R.imgsize([2 1]); O=R.objects; 50 | id=is{i}; id(id=='_')=[]; id=str2double(id); 51 | ignore=[O.difficult]; bbs=cat(1,O.bbox); 52 | t=catsMap.values({O.class}); catIds=[t{:}]; iscrowd=ignore*0; 53 | data=CocoUtils.addData(data,nm,id,hw,catIds,ignore,iscrowd,bbs); 54 | end 55 | f=fopen(annFile,'w'); fwrite(f,gason(data)); fclose(f); 56 | fprintf('DONE (t=%0.2fs).\n',toc(clk)); 57 | end 58 | 59 | function convertImageNetGt( dataDir, year, split, annFile ) 60 | % Convert ground truth for ImageNet to COCO format. 61 | % 62 | % USAGE 63 | % CocoUtils.convertImageNetGt( dataDir, year, split, annFile ) 64 | % 65 | % INPUTS 66 | % dataDir - dir containing ILSVRC*/ folders 67 | % year - dataset year (e.g. '2013') 68 | % split - dataset split (e.g. 'val') 69 | % annFile - annotation file for writing results 70 | if(exist(annFile,'file')), return; end 71 | fprintf('Converting ImageNet dataset... '); clk=tic; 72 | dev=[dataDir '/ILSVRC' year '_devkit/']; 73 | addpath(genpath([dev '/evaluation/'])); 74 | t=[dev '/data/meta_det.mat']; 75 | t=load(t); synsets=t.synsets(1:200); catNms={synsets.name}; 76 | catsMap=containers.Map({synsets.WNID},1:length(catNms)); 77 | if(~strcmp(split,'val')), blacklist=cell(1,2); else 78 | f=[dev '/data/' 'ILSVRC' year '_det_validation_blacklist.txt']; 79 | f=fopen(f); blacklist=textscan(f,'%d %s'); fclose(f); 80 | t=catsMap.values(blacklist{2}); blacklist{2}=[t{:}]; 81 | end 82 | if(strcmp(split,'train')) 83 | dl=@(i) [dev '/data/det_lists/' split '_pos_' int2str(i) '.txt']; 84 | is=cell(1,200); for i=1:200, f=fopen(dl(i)); 85 | is{i}=textscan(f,'%s %*s'); is{i}=is{i}{1}; fclose(f); end 86 | is=unique(cat(1,is{:})); n=length(is); 87 | else 88 | f=fopen([dev '/data/det_lists/' split '.txt']); 89 | is=textscan(f,'%s %*s'); is=is{1}; fclose(f); n=length(is); 90 | end 91 | data=CocoUtils.initData(catNms,n); 92 | for i=1:n 93 | f=[dataDir '/ILSVRC' year '_DET_bbox_' split '/' is{i} '.xml']; 94 | R=VOCreadxml(f); R=R.annotation; nm=[is{i} '.JPEG']; 95 | hw=str2double({R.size.height R.size.width}); 96 | if(~isfield(R,'object')), catIds=[]; bbs=[]; else 97 | O=R.object; t=catsMap.values({O.name}); catIds=[t{:}]; 98 | b=[O.bndbox]; bbs=str2double({b.xmin; b.ymin; b.xmax; b.ymax})'; 99 | end 100 | j=blacklist{2}(blacklist{1}==i); m=numel(j); b=[0 0 hw(2) hw(1)]; 101 | catIds=[j catIds]; bbs=[repmat(b,m,1); bbs]; %#ok 102 | ignore=ismember(catIds,j); iscrowd=ignore*0; iscrowd(1:m)=1; 103 | data=CocoUtils.addData(data,nm,i,hw,catIds,ignore,iscrowd,bbs); 104 | end 105 | f=fopen(annFile,'w'); fwrite(f,gason(data)); fclose(f); 106 | fprintf('DONE (t=%0.2fs).\n',toc(clk)); 107 | end 108 | 109 | function convertPascalDt( srcFiles, tarFile ) 110 | % Convert detections on PASCAL to COCO format. 111 | % 112 | % USAGE 113 | % CocoUtils.convertPascalDt( srcFiles, tarFile ) 114 | % 115 | % INPUTS 116 | % srcFiles - source detection file(s) in PASCAL format 117 | % tarFile - target detection file in COCO format 118 | if(exist(tarFile,'file')), return; end; R=[]; 119 | for i=1:length(srcFiles), f=fopen(srcFiles{i},'r'); 120 | R1=textscan(f,'%d %f %f %f %f %f'); fclose(f); 121 | [~,~,x0,y0,x1,y1]=deal(R1{:}); b=[x0-1 y0-1 x1-x0+1 y1-y0+1]; 122 | b(:,3:4)=max(b(:,3:4),1); b=mat2cell(b,ones(1,size(b,1)),4); 123 | R=[R; struct('image_id',num2cell(R1{1}),'bbox',b,... 124 | 'category_id',i,'score',num2cell(R1{2}))]; %#ok 125 | end 126 | f=fopen(tarFile,'w'); fwrite(f,gason(R)); fclose(f); 127 | end 128 | 129 | function convertImageNetDt( srcFile, tarFile ) 130 | % Convert detections on ImageNet to COCO format. 131 | % 132 | % USAGE 133 | % CocoUtils.convertImageNetDt( srcFile, tarFile ) 134 | % 135 | % INPUTS 136 | % srcFile - source detection file in ImageNet format 137 | % tarFile - target detection file in COCO format 138 | if(exist(tarFile,'file')), return; end; f=fopen(srcFile,'r'); 139 | R=textscan(f,'%d %d %f %f %f %f %f'); fclose(f); 140 | [~,~,~,x0,y0,x1,y1]=deal(R{:}); b=[x0-1 y0-1 x1-x0+1 y1-y0+1]; 141 | b(:,3:4)=max(b(:,3:4),1); bbox=mat2cell(b,ones(1,size(b,1)),4); 142 | R=struct('image_id',num2cell(R{1}),'bbox',bbox,... 143 | 'category_id',num2cell(R{2}),'score',num2cell(R{3})); 144 | f=fopen(tarFile,'w'); fwrite(f,gason(R)); fclose(f); 145 | end 146 | 147 | function validateOnPascal( dataDir ) 148 | % Validate COCO eval code against PASCAL code. 149 | % 150 | % USAGE 151 | % CocoUtils.validateOnPascal( dataDir ) 152 | % 153 | % INPUTS 154 | % dataDir - dir containing VOCdevkit/ 155 | split='val'; year='2007'; thrs=0:.001:1; T=length(thrs); 156 | dev=[dataDir '/VOCdevkit/']; addpath(genpath([dev '/VOCcode/'])); 157 | d=pwd; cd(dev); VOCinit; cd(d); O=VOCopts; O.testset=split; 158 | O.detrespath=[O.detrespath(1:end-10) split '_%s.txt']; 159 | catNms=O.classes; K=length(catNms); ap=zeros(K,1); 160 | for i=1:K, [R,P]=VOCevaldet(O,'comp3',catNms{i},0); R1=[R; inf]; 161 | P1=[P; 0]; for t=1:T, ap(i)=ap(i)+max(P1(R1>=thrs(t)))/T; end; end 162 | srcFile=[dev '/results/VOC' year '/Main/comp3_det_' split]; 163 | resFile=[srcFile '.json']; annFile=[dev '/VOC2007/' split '.json']; 164 | sfs=cell(1,K); for i=1:K, sfs{i}=[srcFile '_' catNms{i} '.txt']; end 165 | CocoUtils.convertPascalGt(dataDir,year,split,annFile); 166 | CocoUtils.convertPascalDt(sfs,resFile); 167 | D=CocoApi(annFile); R=D.loadRes(resFile); E=CocoEval(D,R); 168 | p=E.params; p.recThrs=thrs; p.iouThrs=.5; p.areaRng=[0 inf]; 169 | p.useSegm=0; p.maxDets=inf; E.params=p; E.evaluate(); E.accumulate(); 170 | apCoco=squeeze(mean(E.eval.precision,2)); deltas=abs(apCoco-ap); 171 | fprintf('AP delta: mean=%.2e median=%.2e max=%.2e\n',... 172 | mean(deltas),median(deltas),max(deltas)) 173 | if(max(deltas)>1e-2), msg='FAILED'; else msg='PASSED'; end 174 | warning(['Eval code *' msg '* validation!']); 175 | end 176 | 177 | function validateOnImageNet( dataDir ) 178 | % Validate COCO eval code against ImageNet code. 179 | % 180 | % USAGE 181 | % CocoUtils.validateOnImageNet( dataDir ) 182 | % 183 | % INPUTS 184 | % dataDir - dir containing ILSVRC*/ folders 185 | warning(['Set pixelTolerance=0 in line 30 of eval_detection.m '... 186 | '(and delete cache) otherwise AP will differ by >1e-4!']); 187 | year='2013'; dev=[dataDir '/ILSVRC' year '_devkit/']; 188 | fs = { [dev 'evaluation/demo.val.pred.det.txt'] 189 | [dataDir '/ILSVRC' year '_DET_bbox_val/'] 190 | [dev 'data/meta_det.mat'] 191 | [dev 'data/det_lists/val.txt'] 192 | [dev 'data/ILSVRC' year '_det_validation_blacklist.txt'] 193 | [dev 'data/ILSVRC' year '_det_validation_cache.mat'] }; 194 | addpath(genpath([dev 'evaluation/'])); 195 | ap=eval_detection(fs{:})'; 196 | resFile=[fs{1}(1:end-3) 'json']; 197 | annFile=[dev 'data/ILSVRC' year '_val.json']; 198 | CocoUtils.convertImageNetDt(fs{1},resFile); 199 | CocoUtils.convertImageNetGt(dataDir,year,'val',annFile) 200 | D=CocoApi(annFile); R=D.loadRes(resFile); E=CocoEval(D,R); 201 | p=E.params; p.recThrs=0:.0001:1; p.iouThrs=.5; p.areaRng=[0 inf]; 202 | p.useSegm=0; p.maxDets=inf; E.params=p; E.evaluate(); E.accumulate(); 203 | apCoco=squeeze(mean(E.eval.precision,2)); deltas=abs(apCoco-ap); 204 | fprintf('AP delta: mean=%.2e median=%.2e max=%.2e\n',... 205 | mean(deltas),median(deltas),max(deltas)) 206 | if(max(deltas)>1e-4), msg='FAILED'; else msg='PASSED'; end 207 | warning(['Eval code *' msg '* validation!']); 208 | end 209 | 210 | function generateFakeDt( coco, dtFile, varargin ) 211 | % Generate fake detections from ground truth. 212 | % 213 | % USAGE 214 | % CocoUtils.generateFakeDt( coco, dtFile, varargin ) 215 | % 216 | % INPUTS 217 | % coco - instance of CocoApi containing ground truth 218 | % dtFile - target file for writing detection results 219 | % params - parameters (struct or name/value pairs) 220 | % .n - [100] number images for which to generate dets 221 | % .fn - [.20] false negative rate (00; if(~any(v)), continue; end 251 | x=o(1:3:end); y=o(2:3:end); x(~v)=mean(x(v)); y(~v)=mean(y(v)); 252 | x=max(0,min(w-1,x+dx)); o(1:3:end)=x; o(2:3:end)=y; 253 | end 254 | k=k+1; R(k).image_id=imgIds(i); R(k).category_id=catId; 255 | R(k).(opts.type)=o; R(k).score=round(rand(rstream)*1000)/1000; 256 | end 257 | end 258 | R=R(1:k); f=fopen(dtFile,'w'); fwrite(f,gason(R)); fclose(f); 259 | fprintf('DONE (t=%0.2fs).\n',toc(clk)); 260 | end 261 | 262 | function validateMaskApi( coco ) 263 | % Validate MaskApi against Matlab functions. 264 | % 265 | % USAGE 266 | % CocoUtils.validateMaskApi( coco ) 267 | % 268 | % INPUTS 269 | % coco - instance of CocoApi containing ground truth 270 | S=coco.data.annotations; S=S(~[S.iscrowd]); S={S.segmentation}; 271 | h=1000; n=1000; Z=cell(1,n); A=Z; B=Z; M=Z; IB=zeros(1,n); 272 | fprintf('Running MaskApi implementations... '); clk=tic; 273 | for i=1:n, A{i}=MaskApi.frPoly(S{i},h,h); end 274 | Ia=MaskApi.iou(A{1},[A{:}]); 275 | fprintf('DONE (t=%0.2fs).\n',toc(clk)); 276 | fprintf('Running Matlab implementations... '); clk=tic; 277 | for i=1:n, M1=0; for j=1:length(S{i}), x=S{i}{j}+.5; 278 | M1=M1+poly2mask(x(1:2:end),x(2:2:end),h,h); end 279 | M{i}=uint8(M1>0); B{i}=MaskApi.encode(M{i}); 280 | IB(i)=sum(sum(M{1}&M{i}))/sum(sum(M{1}|M{i})); 281 | end 282 | fprintf('DONE (t=%0.2fs).\n',toc(clk)); 283 | if(isequal(A,B)&&isequal(Ia,IB)), 284 | msg='PASSED'; else msg='FAILED'; end 285 | warning(['MaskApi *' msg '* validation!']); 286 | end 287 | 288 | function gasonSplit( name, k ) 289 | % Split JSON file into multiple JSON files. 290 | % 291 | % Splits file 'name.json' into multiple files 'name-*.json'. Only 292 | % works for JSON arrays. Memory efficient. Inverted by gasonMerge(). 293 | % 294 | % USAGE 295 | % CocoUtils.gasonSplit( name, k ) 296 | % 297 | % INPUTS 298 | % name - file containing JSON array (w/o '.json' ext) 299 | % k - number of files to split JSON into 300 | s=gasonMex('split',fileread([name '.json']),k); k=length(s); 301 | for i=1:k, f=fopen(sprintf('%s-%06i.json',name,i),'w'); 302 | fwrite(f,s{i}); fclose(f); end 303 | end 304 | 305 | function gasonMerge( name ) 306 | % Merge JSON files into single JSON file. 307 | % 308 | % Merge files 'name-*.json' into single file 'name.json'. Only works 309 | % for JSON arrays. Memory efficient. Inverted by gasonSplit(). 310 | % 311 | % USAGE 312 | % CocoUtils.gasonMerge( name ) 313 | % 314 | % INPUTS 315 | % name - files containing JSON arrays (w/o '.json' ext) 316 | s=dir([name '-*.json']); s=sort({s.name}); k=length(s); 317 | p=fileparts(name); for i=1:k, s{i}=fullfile(p,s{i}); end 318 | for i=1:k, s{i}=fileread(s{i}); end; s=gasonMex('merge',s); 319 | f=fopen([name '.json'],'w'); fwrite(f,s); fclose(f); 320 | end 321 | end 322 | 323 | methods( Static, Access=private ) 324 | function data = initData( catNms, n ) 325 | % Helper for convert() functions: init annotations. 326 | m=length(catNms); ms=num2cell(1:m); 327 | I = struct('file_name',0,'height',0,'width',0,'id',0); 328 | C = struct('supercategory','none','id',ms,'name',catNms); 329 | A = struct('segmentation',0,'area',0,'iscrowd',0,... 330 | 'image_id',0,'bbox',0,'category_id',0,'id',0,'ignore',0); 331 | I=repmat(I,1,n); A=repmat(A,1,n*20); 332 | data = struct('images',I,'type','instances',... 333 | 'annotations',A,'categories',C,'nImgs',0,'nAnns',0); 334 | end 335 | 336 | function data = addData( data,nm,id,hw,catIds,ignore,iscrowd,bbs ) 337 | % Helper for convert() functions: add annotations. 338 | data.nImgs=data.nImgs+1; 339 | data.images(data.nImgs)=struct('file_name',nm,... 340 | 'height',hw(1),'width',hw(2),'id',id); 341 | for j=1:length(catIds), data.nAnns=data.nAnns+1; k=data.nAnns; 342 | b=bbs(j,:); b=b-1; b(3:4)=b(3:4)-b(1:2)+1; 343 | x1=b(1); x2=b(1)+b(3); y1=b(2); y2=b(2)+b(4); 344 | S={{[x1 y1 x1 y2 x2 y2 x2 y1]}}; a=b(3)*b(4); 345 | data.annotations(k)=struct('segmentation',S,'area',a,... 346 | 'iscrowd',iscrowd(j),'image_id',id,'bbox',b,... 347 | 'category_id',catIds(j),'id',k,'ignore',ignore(j)); 348 | end 349 | if( data.nImgs == length(data.images) ) 350 | data.annotations=data.annotations(1:data.nAnns); 351 | data=rmfield(data,{'nImgs','nAnns'}); 352 | end 353 | end 354 | end 355 | 356 | end 357 | -------------------------------------------------------------------------------- /cocoapi_ro/MatlabAPI/MaskApi.m: -------------------------------------------------------------------------------- 1 | classdef MaskApi 2 | % Interface for manipulating masks stored in RLE format. 3 | % 4 | % RLE is a simple yet efficient format for storing binary masks. RLE 5 | % first divides a vector (or vectorized image) into a series of piecewise 6 | % constant regions and then for each piece simply stores the length of 7 | % that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 8 | % be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 9 | % (note that the odd counts are always the numbers of zeros). Instead of 10 | % storing the counts directly, additional compression is achieved with a 11 | % variable bitrate representation based on a common scheme called LEB128. 12 | % 13 | % Compression is greatest given large piecewise constant regions. 14 | % Specifically, the size of the RLE is proportional to the number of 15 | % *boundaries* in M (or for an image the number of boundaries in the y 16 | % direction). Assuming fairly simple shapes, the RLE representation is 17 | % O(sqrt(n)) where n is number of pixels in the object. Hence space usage 18 | % is substantially lower, especially for large simple objects (large n). 19 | % 20 | % Many common operations on masks can be computed directly using the RLE 21 | % (without need for decoding). This includes computations such as area, 22 | % union, intersection, etc. All of these operations are linear in the 23 | % size of the RLE, in other words they are O(sqrt(n)) where n is the area 24 | % of the object. Computing these operations on the original mask is O(n). 25 | % Thus, using the RLE can result in substantial computational savings. 26 | % 27 | % The following API functions are defined: 28 | % encode - Encode binary masks using RLE. 29 | % decode - Decode binary masks encoded via RLE. 30 | % merge - Compute union or intersection of encoded masks. 31 | % iou - Compute intersection over union between masks. 32 | % nms - Compute non-maximum suppression between ordered masks. 33 | % area - Compute area of encoded masks. 34 | % toBbox - Get bounding boxes surrounding encoded masks. 35 | % frBbox - Convert bounding boxes to encoded masks. 36 | % frPoly - Convert polygon to encoded mask. 37 | % 38 | % Usage: 39 | % Rs = MaskApi.encode( masks ) 40 | % masks = MaskApi.decode( Rs ) 41 | % R = MaskApi.merge( Rs, [intersect=false] ) 42 | % o = MaskApi.iou( dt, gt, [iscrowd=false] ) 43 | % keep = MaskApi.nms( dt, thr ) 44 | % a = MaskApi.area( Rs ) 45 | % bbs = MaskApi.toBbox( Rs ) 46 | % Rs = MaskApi.frBbox( bbs, h, w ) 47 | % R = MaskApi.frPoly( poly, h, w ) 48 | % 49 | % In the API the following formats are used: 50 | % R,Rs - [struct] Run-length encoding of binary mask(s) 51 | % masks - [hxwxn] Binary mask(s) (must have type uint8) 52 | % bbs - [nx4] Bounding box(es) stored as [x y w h] 53 | % poly - Polygon stored as {[x1 y1 x2 y2...],[x1 y1 ...],...} 54 | % dt,gt - May be either bounding boxes or encoded masks 55 | % Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 56 | % 57 | % Finally, a note about the intersection over union (iou) computation. 58 | % The standard iou of a ground truth (gt) and detected (dt) object is 59 | % iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 60 | % For "crowd" regions, we use a modified criteria. If a gt object is 61 | % marked as "iscrowd", we allow a dt to match any subregion of the gt. 62 | % Choosing gt' in the crowd gt that best matches the dt can be done using 63 | % gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 64 | % iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 65 | % For crowd gt regions we use this modified criteria above for the iou. 66 | % 67 | % To compile use the following (some precompiled binaries are included): 68 | % mex('CFLAGS=\$CFLAGS -Wall -std=c99','-largeArrayDims',... 69 | % 'private/maskApiMex.c','../common/maskApi.c',... 70 | % '-I../common/','-outdir','private'); 71 | % Please do not contact us for help with compiling. 72 | % 73 | % Microsoft COCO Toolbox. version 2.0 74 | % Data, paper, and tutorials available at: http://mscoco.org/ 75 | % Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 76 | % Licensed under the Simplified BSD License [see coco/license.txt] 77 | 78 | methods( Static ) 79 | function Rs = encode( masks ) 80 | Rs = maskApiMex( 'encode', masks ); 81 | end 82 | 83 | function masks = decode( Rs ) 84 | masks = maskApiMex( 'decode', Rs ); 85 | end 86 | 87 | function R = merge( Rs, varargin ) 88 | R = maskApiMex( 'merge', Rs, varargin{:} ); 89 | end 90 | 91 | function o = iou( dt, gt, varargin ) 92 | o = maskApiMex( 'iou', dt', gt', varargin{:} ); 93 | end 94 | 95 | function keep = nms( dt, thr ) 96 | keep = maskApiMex('nms',dt',thr); 97 | end 98 | 99 | function a = area( Rs ) 100 | a = maskApiMex( 'area', Rs ); 101 | end 102 | 103 | function bbs = toBbox( Rs ) 104 | bbs = maskApiMex( 'toBbox', Rs )'; 105 | end 106 | 107 | function Rs = frBbox( bbs, h, w ) 108 | Rs = maskApiMex( 'frBbox', bbs', h, w ); 109 | end 110 | 111 | function R = frPoly( poly, h, w ) 112 | R = maskApiMex( 'frPoly', poly, h , w ); 113 | end 114 | end 115 | 116 | end 117 | -------------------------------------------------------------------------------- /cocoapi_ro/MatlabAPI/cocoDemo.m: -------------------------------------------------------------------------------- 1 | %% Demo for the CocoApi (see CocoApi.m) 2 | 3 | %% initialize COCO api (please specify dataType/annType below) 4 | annTypes = { 'instances', 'captions', 'person_keypoints' }; 5 | dataType='val2014'; annType=annTypes{1}; % specify dataType/annType 6 | annFile=sprintf('../annotations/%s_%s.json',annType,dataType); 7 | coco=CocoApi(annFile); 8 | 9 | %% display COCO categories and supercategories 10 | if( ~strcmp(annType,'captions') ) 11 | cats = coco.loadCats(coco.getCatIds()); 12 | nms={cats.name}; fprintf('COCO categories: '); 13 | fprintf('%s, ',nms{:}); fprintf('\n'); 14 | nms=unique({cats.supercategory}); fprintf('COCO supercategories: '); 15 | fprintf('%s, ',nms{:}); fprintf('\n'); 16 | end 17 | 18 | %% get all images containing given categories, select one at random 19 | catIds = coco.getCatIds('catNms',{'person','dog','skateboard'}); 20 | imgIds = coco.getImgIds('catIds',catIds); 21 | imgId = imgIds(randi(length(imgIds))); 22 | 23 | %% load and display image 24 | img = coco.loadImgs(imgId); 25 | I = imread(sprintf('../images/%s/%s',dataType,img.file_name)); 26 | figure(1); imagesc(I); axis('image'); set(gca,'XTick',[],'YTick',[]) 27 | 28 | %% load and display annotations 29 | annIds = coco.getAnnIds('imgIds',imgId,'catIds',catIds,'iscrowd',[]); 30 | anns = coco.loadAnns(annIds); coco.showAnns(anns); 31 | -------------------------------------------------------------------------------- /cocoapi_ro/MatlabAPI/evalDemo.m: -------------------------------------------------------------------------------- 1 | %% Demo demonstrating the algorithm result formats for COCO 2 | 3 | %% select results type for demo (either bbox or segm) 4 | type = {'segm','bbox','keypoints'}; type = type{1}; % specify type here 5 | fprintf('Running demo for *%s* results.\n\n',type); 6 | 7 | %% initialize COCO ground truth api 8 | dataDir='../'; prefix='instances'; dataType='val2014'; 9 | if(strcmp(type,'keypoints')), prefix='person_keypoints'; end 10 | annFile=sprintf('%s/annotations/%s_%s.json',dataDir,prefix,dataType); 11 | cocoGt=CocoApi(annFile); 12 | 13 | %% initialize COCO detections api 14 | resFile='%s/results/%s_%s_fake%s100_results.json'; 15 | resFile=sprintf(resFile,dataDir,prefix,dataType,type); 16 | cocoDt=cocoGt.loadRes(resFile); 17 | 18 | %% visialuze gt and dt side by side 19 | imgIds=sort(cocoGt.getImgIds()); imgIds=imgIds(1:100); 20 | imgId = imgIds(randi(100)); img = cocoGt.loadImgs(imgId); 21 | I = imread(sprintf('%s/images/val2014/%s',dataDir,img.file_name)); 22 | figure(1); subplot(1,2,1); imagesc(I); axis('image'); axis off; 23 | annIds = cocoGt.getAnnIds('imgIds',imgId); title('ground truth') 24 | anns = cocoGt.loadAnns(annIds); cocoGt.showAnns(anns); 25 | figure(1); subplot(1,2,2); imagesc(I); axis('image'); axis off; 26 | annIds = cocoDt.getAnnIds('imgIds',imgId); title('results') 27 | anns = cocoDt.loadAnns(annIds); cocoDt.showAnns(anns); 28 | 29 | %% load raw JSON and show exact format for results 30 | fprintf('results structure have the following format:\n'); 31 | res = gason(fileread(resFile)); disp(res) 32 | 33 | %% the following command can be used to save the results back to disk 34 | if(0), f=fopen(resFile,'w'); fwrite(f,gason(res)); fclose(f); end 35 | 36 | %% run COCO evaluation code (see CocoEval.m) 37 | cocoEval=CocoEval(cocoGt,cocoDt,type); 38 | cocoEval.params.imgIds=imgIds; 39 | cocoEval.evaluate(); 40 | cocoEval.accumulate(); 41 | cocoEval.summarize(); 42 | 43 | %% generate Derek Hoiem style analyis of false positives (slow) 44 | if(0), cocoEval.analyze(); end 45 | -------------------------------------------------------------------------------- /cocoapi_ro/MatlabAPI/gason.m: -------------------------------------------------------------------------------- 1 | function out = gason( in ) 2 | % Convert between JSON strings and corresponding JSON objects. 3 | % 4 | % This parser is based on Gason written and maintained by Ivan Vashchaev: 5 | % https://github.com/vivkin/gason 6 | % Gason is a "lightweight and fast JSON parser for C++". Please see the 7 | % above link for license information and additional details about Gason. 8 | % 9 | % Given a JSON string, gason calls the C++ parser and converts the output 10 | % into an appropriate Matlab structure. As the parsing is performed in mex 11 | % the resulting parser is blazingly fast. Large JSON structs (100MB+) take 12 | % only a few seconds to parse (compared to hours for pure Matlab parsers). 13 | % 14 | % Given a JSON object, gason calls the C++ encoder to convert the object 15 | % back into a JSON string representation. Nearly any Matlab struct, cell 16 | % array, or numeric array represent a valid JSON object. Note that gason() 17 | % can be used to go both from JSON string to JSON object and back. 18 | % 19 | % Gason requires C++11 to compile (for GCC this requires version 4.7 or 20 | % later). The following command compiles the parser (may require tweaking): 21 | % mex('CXXFLAGS=\$CXXFLAGS -std=c++11 -Wall','-largeArrayDims',... 22 | % 'private/gasonMex.cpp','../common/gason.cpp',... 23 | % '-I../common/','-outdir','private'); 24 | % Note the use of the "-std=c++11" flag. A number of precompiled binaries 25 | % are included, please do not contact us for help with compiling. If needed 26 | % you can specify a compiler by adding the option 'CXX="/usr/bin/g++"'. 27 | % 28 | % Note that by default JSON arrays that contain only numbers are stored as 29 | % regular Matlab arrays. Likewise, JSON arrays that contain only objects of 30 | % the same type are stored as Matlab struct arrays. This is much faster and 31 | % can use considerably less memory than always using Matlab cell arrays. 32 | % 33 | % USAGE 34 | % object = gason( string ) 35 | % string = gason( object ) 36 | % 37 | % INPUTS/OUTPUTS 38 | % string - JSON string 39 | % object - JSON object 40 | % 41 | % EXAMPLE 42 | % o = struct('first',{'piotr','ty'},'last',{'dollar','lin'}) 43 | % s = gason( o ) % convert JSON object -> JSON string 44 | % p = gason( s ) % convert JSON string -> JSON object 45 | % 46 | % See also 47 | % 48 | % Microsoft COCO Toolbox. version 2.0 49 | % Data, paper, and tutorials available at: http://mscoco.org/ 50 | % Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 51 | % Licensed under the Simplified BSD License [see coco/license.txt] 52 | 53 | out = gasonMex( 'convert', in ); 54 | -------------------------------------------------------------------------------- /cocoapi_ro/MatlabAPI/private/gasonMex.cpp: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #include "gason.h" 8 | #include "mex.h" 9 | #include "string.h" 10 | #include "math.h" 11 | #include 12 | #include 13 | #include 14 | typedef std::ostringstream ostrm; 15 | typedef unsigned long siz; 16 | typedef unsigned short ushort; 17 | 18 | siz length( const JsonValue &a ) { 19 | // get number of elements in JSON_ARRAY or JSON_OBJECT 20 | siz k=0; auto n=a.toNode(); while(n) { k++; n=n->next; } return k; 21 | } 22 | 23 | bool isRegularObjArray( const JsonValue &a ) { 24 | // check if all JSON_OBJECTs in JSON_ARRAY have the same fields 25 | JsonValue o=a.toNode()->value; siz k, n; const char **keys; 26 | n=length(o); keys=new const char*[n]; 27 | k=0; for(auto j:o) keys[k++]=j->key; 28 | for( auto i:a ) { 29 | if(length(i->value)!=n) return false; k=0; 30 | for(auto j:i->value) if(strcmp(j->key,keys[k++])) return false; 31 | } 32 | delete [] keys; return true; 33 | } 34 | 35 | mxArray* json( const JsonValue &o ) { 36 | // convert JsonValue to Matlab mxArray 37 | siz k, m, n; mxArray *M; const char **keys; 38 | switch( o.getTag() ) { 39 | case JSON_NUMBER: 40 | return mxCreateDoubleScalar(o.toNumber()); 41 | case JSON_STRING: 42 | return mxCreateString(o.toString()); 43 | case JSON_ARRAY: { 44 | if(!o.toNode()) return mxCreateDoubleMatrix(1,0,mxREAL); 45 | JsonValue o0=o.toNode()->value; JsonTag tag=o0.getTag(); 46 | n=length(o); bool isRegular=true; 47 | for(auto i:o) isRegular=isRegular && i->value.getTag()==tag; 48 | if( isRegular && tag==JSON_OBJECT && isRegularObjArray(o) ) { 49 | m=length(o0); keys=new const char*[m]; 50 | k=0; for(auto j:o0) keys[k++]=j->key; 51 | M = mxCreateStructMatrix(1,n,m,keys); 52 | k=0; for(auto i:o) { m=0; for(auto j:i->value) 53 | mxSetFieldByNumber(M,k,m++,json(j->value)); k++; } 54 | delete [] keys; return M; 55 | } else if( isRegular && tag==JSON_NUMBER ) { 56 | M = mxCreateDoubleMatrix(1,n,mxREAL); double *p=mxGetPr(M); 57 | k=0; for(auto i:o) p[k++]=i->value.toNumber(); return M; 58 | } else { 59 | M = mxCreateCellMatrix(1,n); 60 | k=0; for(auto i:o) mxSetCell(M,k++,json(i->value)); 61 | return M; 62 | } 63 | } 64 | case JSON_OBJECT: 65 | if(!o.toNode()) return mxCreateStructMatrix(1,0,0,NULL); 66 | n=length(o); keys=new const char*[n]; 67 | k=0; for(auto i:o) keys[k++]=i->key; 68 | M = mxCreateStructMatrix(1,1,n,keys); k=0; 69 | for(auto i:o) mxSetFieldByNumber(M,0,k++,json(i->value)); 70 | delete [] keys; return M; 71 | case JSON_TRUE: 72 | return mxCreateDoubleScalar(1); 73 | case JSON_FALSE: 74 | return mxCreateDoubleScalar(0); 75 | case JSON_NULL: 76 | return mxCreateDoubleMatrix(0,0,mxREAL); 77 | default: return NULL; 78 | } 79 | } 80 | 81 | template ostrm& json( ostrm &S, T *A, siz n ) { 82 | // convert numeric array to JSON string with casting 83 | if(n==0) { S<<"[]"; return S; } if(n==1) { S< ostrm& json( ostrm &S, T *A, siz n ) { 89 | // convert numeric array to JSON string without casting 90 | return json(S,A,n); 91 | } 92 | 93 | ostrm& json( ostrm &S, const char *A ) { 94 | // convert char array to JSON string (handle escape characters) 95 | #define RPL(a,b) case a: { S << b; A++; break; } 96 | S << "\""; while( *A>0 ) switch( *A ) { 97 | RPL('"',"\\\""); RPL('\\',"\\\\"); RPL('/',"\\/"); RPL('\b',"\\b"); 98 | RPL('\f',"\\f"); RPL('\n',"\\n"); RPL('\r',"\\r"); RPL('\t',"\\t"); 99 | default: S << *A; A++; 100 | } 101 | S << "\""; return S; 102 | } 103 | 104 | ostrm& json( ostrm& S, const JsonValue *o ) { 105 | // convert JsonValue to JSON string 106 | switch( o->getTag() ) { 107 | case JSON_NUMBER: S << o->toNumber(); return S; 108 | case JSON_TRUE: S << "true"; return S; 109 | case JSON_FALSE: S << "false"; return S; 110 | case JSON_NULL: S << "null"; return S; 111 | case JSON_STRING: return json(S,o->toString()); 112 | case JSON_ARRAY: 113 | S << "["; for(auto i:*o) { 114 | json(S,&i->value) << (i->next ? "," : ""); } 115 | S << "]"; return S; 116 | case JSON_OBJECT: 117 | S << "{"; for(auto i:*o) { 118 | json(S,i->key) << ":"; 119 | json(S,&i->value) << (i->next ? "," : ""); } 120 | S << "}"; return S; 121 | default: return S; 122 | } 123 | } 124 | 125 | ostrm& json( ostrm& S, const mxArray *M ) { 126 | // convert Matlab mxArray to JSON string 127 | siz i, j, m, n=mxGetNumberOfElements(M); 128 | void *A=mxGetData(M); ostrm *nms; 129 | switch( mxGetClassID(M) ) { 130 | case mxDOUBLE_CLASS: return json(S,(double*) A,n); 131 | case mxSINGLE_CLASS: return json(S,(float*) A,n); 132 | case mxINT64_CLASS: return json(S,(int64_t*) A,n); 133 | case mxUINT64_CLASS: return json(S,(uint64_t*) A,n); 134 | case mxINT32_CLASS: return json(S,(int32_t*) A,n); 135 | case mxUINT32_CLASS: return json(S,(uint32_t*) A,n); 136 | case mxINT16_CLASS: return json(S,(int16_t*) A,n); 137 | case mxUINT16_CLASS: return json(S,(uint16_t*) A,n); 138 | case mxINT8_CLASS: return json(S,(int8_t*) A,n); 139 | case mxUINT8_CLASS: return json(S,(uint8_t*) A,n); 140 | case mxLOGICAL_CLASS: return json(S,(uint8_t*) A,n); 141 | case mxCHAR_CLASS: return json(S,mxArrayToString(M)); 142 | case mxCELL_CLASS: 143 | S << "["; for(i=0; i0) json(S,mxGetCell(M,n-1)); S << "]"; return S; 145 | case mxSTRUCT_CLASS: 146 | if(n==0) { S<<"{}"; return S; } m=mxGetNumberOfFields(M); 147 | if(m==0) { S<<"["; for(i=0; i1) S<<"["; nms=new ostrm[m]; 149 | for(j=0; j1) S<<"]"; delete [] nms; return S; 156 | default: 157 | mexErrMsgTxt( "Unknown type." ); return S; 158 | } 159 | } 160 | 161 | mxArray* mxCreateStringRobust( const char* str ) { 162 | // convert char* to Matlab string (robust version of mxCreateString) 163 | mxArray *M; ushort *c; mwSize n[2]={1,strlen(str)}; 164 | M=mxCreateCharArray(2,n); c=(ushort*) mxGetData(M); 165 | for( siz i=0; i1 ) mexErrMsgTxt("One output expected."); 182 | 183 | if(!strcmp(action,"convert")) { 184 | if( nr!=1 ) mexErrMsgTxt("One input expected."); 185 | if( mxGetClassID(pr[0])==mxCHAR_CLASS ) { 186 | // object = mexFunction( string ) 187 | char *str = mxArrayToStringRobust(pr[0]); 188 | int status = jsonParse(str, &endptr, &val, allocator); 189 | if( status != JSON_OK) mexErrMsgTxt(jsonStrError(status)); 190 | pl[0] = json(val); mxFree(str); 191 | } else { 192 | // string = mexFunction( object ) 193 | ostrm S; S << std::setprecision(12); json(S,pr[0]); 194 | pl[0]=mxCreateStringRobust(S.str().c_str()); 195 | } 196 | 197 | } else if(!strcmp(action,"split")) { 198 | // strings = mexFunction( string, k ) 199 | if( nr!=2 ) mexErrMsgTxt("Two input expected."); 200 | char *str = mxArrayToStringRobust(pr[0]); 201 | int status = jsonParse(str, &endptr, &val, allocator); 202 | if( status != JSON_OK) mexErrMsgTxt(jsonStrError(status)); 203 | if( val.getTag()!=JSON_ARRAY ) mexErrMsgTxt("Array expected"); 204 | siz i=0, t=0, n=length(val), k=(siz) mxGetScalar(pr[1]); 205 | k=(k>n)?n:(k<1)?1:k; k=ceil(n/ceil(double(n)/k)); 206 | pl[0]=mxCreateCellMatrix(1,k); ostrm S; S<value); t--; if(!o->next) t=0; S << (t ? "," : "]"); 210 | if(!t) mxSetCell(pl[0],i++,mxCreateStringRobust(S.str().c_str())); 211 | } 212 | 213 | } else if(!strcmp(action,"merge")) { 214 | // string = mexFunction( strings ) 215 | if( nr!=1 ) mexErrMsgTxt("One input expected."); 216 | if(!mxIsCell(pr[0])) mexErrMsgTxt("Cell array expected."); 217 | siz n = mxGetNumberOfElements(pr[0]); 218 | ostrm S; S << std::setprecision(12); S << "["; 219 | for( siz i=0; ivalue) << (j->next ? "," : ""); 225 | mxFree(str); if(i1) 14 | % [ param1 ... paramN ] = getPrmDflt( prm, dfs, [checkExtra] ) 15 | % 16 | % INPUTS 17 | % prm - param struct or cell of form {'name1' v1 'name2' v2 ...} 18 | % dfs - cell of form {'name1' def1 'name2' def2 ...} 19 | % checkExtra - [0] if 1 throw error if prm contains params not in dfs 20 | % if -1 if prm contains params not in dfs adds them 21 | % 22 | % OUTPUTS (nargout==1) 23 | % prm - parameter struct with fields 'name1' through 'nameN' assigned 24 | % 25 | % OUTPUTS (nargout>1) 26 | % param1 - value assigned to parameter with 'name1' 27 | % ... 28 | % paramN - value assigned to parameter with 'nameN' 29 | % 30 | % EXAMPLE 31 | % dfs = { 'x','REQ', 'y',0, 'z',[], 'eps',1e-3 }; 32 | % prm = getPrmDflt( struct('x',1,'y',1), dfs ) 33 | % [ x y z eps ] = getPrmDflt( {'x',2,'y',1}, dfs ) 34 | % 35 | % See also INPUTPARSER 36 | % 37 | % Piotr's Computer Vision Matlab Toolbox Version 2.60 38 | % Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] 39 | % Licensed under the Simplified BSD License [see external/bsd.txt] 40 | 41 | if( mod(length(dfs),2) ), error('odd number of default parameters'); end 42 | if nargin<=2, checkExtra = 0; end 43 | 44 | % get the input parameters as two cell arrays: prmVal and prmField 45 | if iscell(prm) && length(prm)==1, prm=prm{1}; end 46 | if iscell(prm) 47 | if(mod(length(prm),2)), error('odd number of parameters in prm'); end 48 | prmField = prm(1:2:end); prmVal = prm(2:2:end); 49 | else 50 | if(~isstruct(prm)), error('prm must be a struct or a cell'); end 51 | prmVal = struct2cell(prm); prmField = fieldnames(prm); 52 | end 53 | 54 | % get and update default values using quick for loop 55 | dfsField = dfs(1:2:end); dfsVal = dfs(2:2:end); 56 | if checkExtra>0 57 | for i=1:length(prmField) 58 | j = find(strcmp(prmField{i},dfsField)); 59 | if isempty(j), error('parameter %s is not valid', prmField{i}); end 60 | dfsVal(j) = prmVal(i); 61 | end 62 | elseif checkExtra<0 63 | for i=1:length(prmField) 64 | j = find(strcmp(prmField{i},dfsField)); 65 | if isempty(j), j=length(dfsVal)+1; dfsField{j}=prmField{i}; end 66 | dfsVal(j) = prmVal(i); 67 | end 68 | else 69 | for i=1:length(prmField) 70 | dfsVal(strcmp(prmField{i},dfsField)) = prmVal(i); 71 | end 72 | end 73 | 74 | % check for missing values 75 | if any(strcmp('REQ',dfsVal)) 76 | cmpArray = find(strcmp('REQ',dfsVal)); 77 | error(['Required field ''' dfsField{cmpArray(1)} ''' not specified.'] ); 78 | end 79 | 80 | % set output 81 | if nargout==1 82 | varargout{1} = cell2struct( dfsVal, dfsField, 2 ); 83 | else 84 | varargout = dfsVal; 85 | end 86 | -------------------------------------------------------------------------------- /cocoapi_ro/PythonAPI/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | # install pycocotools locally 3 | python setup.py build_ext --inplace 4 | rm -rf build 5 | 6 | install: 7 | # install pycocotools to the Python site-packages 8 | python setup.py build_ext install 9 | rm -rf build -------------------------------------------------------------------------------- /cocoapi_ro/PythonAPI/pycocoEvalDemo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%matplotlib inline\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "from pycocotools.coco import COCO\n", 14 | "from pycocotools.cocoeval import COCOeval\n", 15 | "import numpy as np\n", 16 | "import skimage.io as io\n", 17 | "import pylab\n", 18 | "pylab.rcParams['figure.figsize'] = (10.0, 8.0)" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [ 28 | { 29 | "name": "stdout", 30 | "output_type": "stream", 31 | "text": [ 32 | "Running demo for *bbox* results.\n" 33 | ] 34 | } 35 | ], 36 | "source": [ 37 | "annType = ['segm','bbox','keypoints']\n", 38 | "annType = annType[1] #specify type here\n", 39 | "prefix = 'person_keypoints' if annType=='keypoints' else 'instances'\n", 40 | "print 'Running demo for *%s* results.'%(annType)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 3, 46 | "metadata": { 47 | "collapsed": false 48 | }, 49 | "outputs": [ 50 | { 51 | "name": "stdout", 52 | "output_type": "stream", 53 | "text": [ 54 | "loading annotations into memory...\n", 55 | "Done (t=8.01s)\n", 56 | "creating index...\n", 57 | "index created!\n" 58 | ] 59 | } 60 | ], 61 | "source": [ 62 | "#initialize COCO ground truth api\n", 63 | "dataDir='../'\n", 64 | "dataType='val2014'\n", 65 | "annFile = '%s/annotations/%s_%s.json'%(dataDir,prefix,dataType)\n", 66 | "cocoGt=COCO(annFile)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 4, 72 | "metadata": { 73 | "collapsed": false 74 | }, 75 | "outputs": [ 76 | { 77 | "name": "stdout", 78 | "output_type": "stream", 79 | "text": [ 80 | "Loading and preparing results... \n", 81 | "DONE (t=0.05s)\n", 82 | "creating index...\n", 83 | "index created!\n" 84 | ] 85 | } 86 | ], 87 | "source": [ 88 | "#initialize COCO detections api\n", 89 | "resFile='%s/results/%s_%s_fake%s100_results.json'\n", 90 | "resFile = resFile%(dataDir, prefix, dataType, annType)\n", 91 | "cocoDt=cocoGt.loadRes(resFile)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 5, 97 | "metadata": { 98 | "collapsed": false 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "imgIds=sorted(cocoGt.getImgIds())\n", 103 | "imgIds=imgIds[0:100]\n", 104 | "imgId = imgIds[np.random.randint(100)]" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 6, 110 | "metadata": { 111 | "collapsed": false 112 | }, 113 | "outputs": [ 114 | { 115 | "name": "stdout", 116 | "output_type": "stream", 117 | "text": [ 118 | "Running per image evaluation... \n", 119 | "DONE (t=0.46s).\n", 120 | "Accumulating evaluation results... \n", 121 | "DONE (t=0.38s).\n", 122 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.505\n", 123 | " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.697\n", 124 | " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.573\n", 125 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.586\n", 126 | " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.519\n", 127 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.501\n", 128 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.387\n", 129 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.594\n", 130 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.595\n", 131 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.640\n", 132 | " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.566\n", 133 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.564\n" 134 | ] 135 | } 136 | ], 137 | "source": [ 138 | "# running evaluation\n", 139 | "cocoEval = COCOeval(cocoGt,cocoDt,annType)\n", 140 | "cocoEval.params.imgIds = imgIds\n", 141 | "cocoEval.evaluate()\n", 142 | "cocoEval.accumulate()\n", 143 | "cocoEval.summarize()" 144 | ] 145 | } 146 | ], 147 | "metadata": { 148 | "kernelspec": { 149 | "display_name": "Python 2", 150 | "language": "python", 151 | "name": "python2" 152 | }, 153 | "language_info": { 154 | "codemirror_mode": { 155 | "name": "ipython", 156 | "version": 2 157 | }, 158 | "file_extension": ".py", 159 | "mimetype": "text/x-python", 160 | "name": "python", 161 | "nbconvert_exporter": "python", 162 | "pygments_lexer": "ipython2", 163 | "version": "2.7.10" 164 | } 165 | }, 166 | "nbformat": 4, 167 | "nbformat_minor": 0 168 | } 169 | -------------------------------------------------------------------------------- /cocoapi_ro/PythonAPI/pycocotools_ro/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /cocoapi_ro/PythonAPI/pycocotools_ro/_mask.pyx: -------------------------------------------------------------------------------- 1 | # distutils: language = c 2 | # distutils: sources = ../common/maskApi.c 3 | 4 | #************************************************************************** 5 | # Microsoft COCO Toolbox. version 2.0 6 | # Data, paper, and tutorials available at: http://mscoco.org/ 7 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 8 | # Licensed under the Simplified BSD License [see coco/license.txt] 9 | #************************************************************************** 10 | 11 | __author__ = 'tsungyi' 12 | 13 | import sys 14 | PYTHON_VERSION = sys.version_info[0] 15 | 16 | # import both Python-level and C-level symbols of Numpy 17 | # the API uses Numpy to interface C and Python 18 | import numpy as np 19 | cimport numpy as np 20 | from libc.stdlib cimport malloc, free 21 | 22 | # intialized Numpy. must do. 23 | np.import_array() 24 | 25 | # import numpy C function 26 | # we use PyArray_ENABLEFLAGS to make Numpy ndarray responsible to memoery management 27 | cdef extern from "numpy/arrayobject.h": 28 | void PyArray_ENABLEFLAGS(np.ndarray arr, int flags) 29 | 30 | # Declare the prototype of the C functions in MaskApi.h 31 | cdef extern from "maskApi.h": 32 | ctypedef unsigned int uint 33 | ctypedef unsigned long siz 34 | ctypedef unsigned char byte 35 | ctypedef double* BB 36 | ctypedef struct RLE: 37 | siz h, 38 | siz w, 39 | siz m, 40 | uint* cnts, 41 | void rlesInit( RLE **R, siz n ) 42 | void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) 43 | void rleDecode( const RLE *R, byte *mask, siz n ) 44 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ) 45 | void rleArea( const RLE *R, siz n, uint *a ) 46 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) 47 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) 48 | void rbbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) 49 | void rleToBbox( const RLE *R, BB bb, siz n ) 50 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) 51 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) 52 | char* rleToString( const RLE *R ) 53 | void rleFrString( RLE *R, char *s, siz h, siz w ) 54 | 55 | # python class to wrap RLE array in C 56 | # the class handles the memory allocation and deallocation 57 | cdef class RLEs: 58 | cdef RLE *_R 59 | cdef siz _n 60 | 61 | def __cinit__(self, siz n =0): 62 | rlesInit(&self._R, n) 63 | self._n = n 64 | 65 | # free the RLE array here 66 | def __dealloc__(self): 67 | if self._R is not NULL: 68 | for i in range(self._n): 69 | free(self._R[i].cnts) 70 | free(self._R) 71 | def __getattr__(self, key): 72 | if key == 'n': 73 | return self._n 74 | raise AttributeError(key) 75 | 76 | # python class to wrap Mask array in C 77 | # the class handles the memory allocation and deallocation 78 | cdef class Masks: 79 | cdef byte *_mask 80 | cdef siz _h 81 | cdef siz _w 82 | cdef siz _n 83 | 84 | def __cinit__(self, h, w, n): 85 | self._mask = malloc(h*w*n* sizeof(byte)) 86 | self._h = h 87 | self._w = w 88 | self._n = n 89 | # def __dealloc__(self): 90 | # the memory management of _mask has been passed to np.ndarray 91 | # it doesn't need to be freed here 92 | 93 | # called when passing into np.array() and return an np.ndarray in column-major order 94 | def __array__(self): 95 | cdef np.npy_intp shape[1] 96 | shape[0] = self._h*self._w*self._n 97 | # Create a 1D array, and reshape it to fortran/Matlab column-major array 98 | ndarray = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT8, self._mask).reshape((self._h, self._w, self._n), order='F') 99 | # The _mask allocated by Masks is now handled by ndarray 100 | PyArray_ENABLEFLAGS(ndarray, np.NPY_OWNDATA) 101 | return ndarray 102 | 103 | # internal conversion from Python RLEs object to compressed RLE format 104 | def _toString(RLEs Rs): 105 | cdef siz n = Rs.n 106 | cdef bytes py_string 107 | cdef char* c_string 108 | objs = [] 109 | for i in range(n): 110 | c_string = rleToString( &Rs._R[i] ) 111 | py_string = c_string 112 | objs.append({ 113 | 'size': [Rs._R[i].h, Rs._R[i].w], 114 | 'counts': py_string 115 | }) 116 | free(c_string) 117 | return objs 118 | 119 | # internal conversion from compressed RLE format to Python RLEs object 120 | def _frString(rleObjs): 121 | cdef siz n = len(rleObjs) 122 | Rs = RLEs(n) 123 | cdef bytes py_string 124 | cdef char* c_string 125 | for i, obj in enumerate(rleObjs): 126 | if PYTHON_VERSION == 2: 127 | py_string = str(obj['counts']).encode('utf8') 128 | elif PYTHON_VERSION == 3: 129 | py_string = str.encode(obj['counts']) if type(obj['counts']) == str else obj['counts'] 130 | else: 131 | raise Exception('Python version must be 2 or 3') 132 | c_string = py_string 133 | rleFrString( &Rs._R[i], c_string, obj['size'][0], obj['size'][1] ) 134 | return Rs 135 | 136 | # encode mask to RLEs objects 137 | # list of RLE string can be generated by RLEs member function 138 | def encode(np.ndarray[np.uint8_t, ndim=3, mode='fortran'] mask): 139 | h, w, n = mask.shape[0], mask.shape[1], mask.shape[2] 140 | cdef RLEs Rs = RLEs(n) 141 | rleEncode(Rs._R,mask.data,h,w,n) 142 | objs = _toString(Rs) 143 | return objs 144 | 145 | # decode mask from compressed list of RLE string or RLEs object 146 | def decode(rleObjs): 147 | cdef RLEs Rs = _frString(rleObjs) 148 | h, w, n = Rs._R[0].h, Rs._R[0].w, Rs._n 149 | masks = Masks(h, w, n) 150 | rleDecode(Rs._R, masks._mask, n); 151 | return np.array(masks) 152 | 153 | def merge(rleObjs, intersect=0): 154 | cdef RLEs Rs = _frString(rleObjs) 155 | cdef RLEs R = RLEs(1) 156 | rleMerge(Rs._R, R._R, Rs._n, intersect) 157 | obj = _toString(R)[0] 158 | return obj 159 | 160 | def area(rleObjs): 161 | cdef RLEs Rs = _frString(rleObjs) 162 | cdef uint* _a = malloc(Rs._n* sizeof(uint)) 163 | rleArea(Rs._R, Rs._n, _a) 164 | cdef np.npy_intp shape[1] 165 | shape[0] = Rs._n 166 | a = np.array((Rs._n, ), dtype=np.uint8) 167 | a = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT32, _a) 168 | PyArray_ENABLEFLAGS(a, np.NPY_OWNDATA) 169 | return a 170 | 171 | # iou computation. support function overload (RLEs-RLEs and bbox-bbox). 172 | def iou( dt, gt, pyiscrowd ): 173 | def _preproc(objs): 174 | if len(objs) == 0: 175 | return objs 176 | if type(objs) == np.ndarray: 177 | if len(objs.shape) == 1: 178 | objs = objs.reshape((objs[0], 1)) 179 | # check if it's Nx4 bbox 180 | if not len(objs.shape) == 2 or not objs.shape[1] == 4: 181 | raise Exception('numpy ndarray input is only for *bounding boxes* and should have Nx4 dimension') 182 | objs = objs.astype(np.double) 183 | elif type(objs) == list: 184 | # check if list is in box format and convert it to np.ndarray 185 | isbox = np.all(np.array([(len(obj)==4) and ((type(obj)==list) or (type(obj)==np.ndarray)) for obj in objs])) 186 | isrle = np.all(np.array([type(obj) == dict for obj in objs])) 187 | if isbox: 188 | objs = np.array(objs, dtype=np.double) 189 | if len(objs.shape) == 1: 190 | objs = objs.reshape((1,objs.shape[0])) 191 | elif isrle: 192 | objs = _frString(objs) 193 | else: 194 | raise Exception('list input can be bounding box (Nx4) or RLEs ([RLE])') 195 | else: 196 | raise Exception('unrecognized type. The following type: RLEs (rle), np.ndarray (box), and list (box) are supported.') 197 | return objs 198 | def _rleIou(RLEs dt, RLEs gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou): 199 | rleIou( dt._R, gt._R, m, n, iscrowd.data, _iou.data ) 200 | def _bbIou(np.ndarray[np.double_t, ndim=2] dt, np.ndarray[np.double_t, ndim=2] gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou): 201 | bbIou( dt.data, gt.data, m, n, iscrowd.data, _iou.data ) 202 | def _len(obj): 203 | cdef siz N = 0 204 | if type(obj) == RLEs: 205 | N = obj.n 206 | elif len(obj)==0: 207 | pass 208 | elif type(obj) == np.ndarray: 209 | N = obj.shape[0] 210 | return N 211 | # convert iscrowd to numpy array 212 | cdef np.ndarray[np.uint8_t, ndim=1] iscrowd = np.array(pyiscrowd, dtype=np.uint8) 213 | # simple type checking 214 | cdef siz m, n 215 | dt = _preproc(dt) 216 | gt = _preproc(gt) 217 | m = _len(dt) 218 | n = _len(gt) 219 | if m == 0 or n == 0: 220 | return [] 221 | if not type(dt) == type(gt): 222 | raise Exception('The dt and gt should have the same data type, either RLEs, list or np.ndarray') 223 | 224 | # define local variables 225 | cdef double* _iou = 0 226 | cdef np.npy_intp shape[1] 227 | # check type and assign iou function 228 | if type(dt) == RLEs: 229 | _iouFun = _rleIou 230 | elif type(dt) == np.ndarray: 231 | _iouFun = _bbIou 232 | else: 233 | raise Exception('input data type not allowed.') 234 | _iou = malloc(m*n* sizeof(double)) 235 | iou = np.zeros((m*n, ), dtype=np.double) 236 | shape[0] = m*n 237 | iou = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _iou) 238 | PyArray_ENABLEFLAGS(iou, np.NPY_OWNDATA) 239 | _iouFun(dt, gt, iscrowd, m, n, iou) 240 | return iou.reshape((m,n), order='F') 241 | 242 | 243 | # iou computation. support function overload (RLEs-RLEs and bbox-bbox). 244 | def riou( dt, gt, pyiscrowd ): 245 | def _preproc(objs): 246 | if len(objs) == 0: 247 | return objs 248 | if type(objs) == np.ndarray: 249 | if len(objs.shape) == 1: 250 | objs = objs.reshape((objs[0], 1)) 251 | # check if it's Nx4 bbox 252 | if not objs.shape[1] == 5: 253 | raise Exception('numpy ndarray input is only for *rotate bounding boxes* and should have Nx5 dimension') 254 | objs = objs.astype(np.double) 255 | elif type(objs) == list: 256 | # check if list is in box format and convert it to np.ndarray 257 | isrbox = np.all(np.array([(len(obj)==5) and ((type(obj)==list) or (type(obj)==np.ndarray)) for obj in objs])) 258 | if isrbox: 259 | objs = np.array(objs, dtype=np.double) 260 | if len(objs.shape) == 1: 261 | objs = objs.reshape((1,objs.shape[0])) 262 | else: 263 | raise Exception('list input can be rotate bounding box (Nx4)') 264 | else: 265 | raise Exception('unrecognized type. The following type: RLEs (rle), np.ndarray (box), and list (box) are supported.') 266 | return objs 267 | def _rbbIou(np.ndarray[np.double_t, ndim=2] dt, np.ndarray[np.double_t, ndim=2] gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou): 268 | rbbIou( dt.data, gt.data, m, n, iscrowd.data, _iou.data ) 269 | def _len(obj): 270 | cdef siz N = 0 271 | if type(obj) == RLEs: 272 | N = obj.n 273 | elif len(obj)==0: 274 | pass 275 | elif type(obj) == np.ndarray: 276 | N = obj.shape[0] 277 | return N 278 | # convert iscrowd to numpy array 279 | cdef np.ndarray[np.uint8_t, ndim=1] iscrowd = np.array(pyiscrowd, dtype=np.uint8) 280 | # simple type checking 281 | cdef siz m, n 282 | dt = _preproc(dt) 283 | gt = _preproc(gt) 284 | m = _len(dt) 285 | n = _len(gt) 286 | if m == 0 or n == 0: 287 | return [] 288 | if not type(dt) == type(gt): 289 | raise Exception('The dt and gt should have the same data type, either RLEs, list or np.ndarray') 290 | 291 | # define local variables 292 | cdef double* _iou = 0 293 | cdef np.npy_intp shape[1] 294 | # check type and assign iou function 295 | if type(dt) == np.ndarray: 296 | _iouFun = _rbbIou 297 | else: 298 | raise Exception('input data type not allowed.') 299 | _iou = malloc(m*n* sizeof(double)) 300 | iou = np.zeros((m*n, ), dtype=np.double) 301 | shape[0] = m*n 302 | iou = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _iou) 303 | PyArray_ENABLEFLAGS(iou, np.NPY_OWNDATA) 304 | _iouFun(dt, gt, iscrowd, m, n, iou) 305 | return iou.reshape((m,n), order='F') 306 | 307 | 308 | def toBbox( rleObjs ): 309 | cdef RLEs Rs = _frString(rleObjs) 310 | cdef siz n = Rs.n 311 | cdef BB _bb = malloc(4*n* sizeof(double)) 312 | rleToBbox( Rs._R, _bb, n ) 313 | cdef np.npy_intp shape[1] 314 | shape[0] = 4*n 315 | bb = np.array((1,4*n), dtype=np.double) 316 | bb = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _bb).reshape((n, 4)) 317 | PyArray_ENABLEFLAGS(bb, np.NPY_OWNDATA) 318 | return bb 319 | 320 | def frBbox(np.ndarray[np.double_t, ndim=2] bb, siz h, siz w ): 321 | cdef siz n = bb.shape[0] 322 | Rs = RLEs(n) 323 | rleFrBbox( Rs._R, bb.data, h, w, n ) 324 | objs = _toString(Rs) 325 | return objs 326 | 327 | def frPoly( poly, siz h, siz w ): 328 | cdef np.ndarray[np.double_t, ndim=1] np_poly 329 | n = len(poly) 330 | Rs = RLEs(n) 331 | for i, p in enumerate(poly): 332 | np_poly = np.array(p, dtype=np.double, order='F') 333 | rleFrPoly( &Rs._R[i], np_poly.data, int(len(p)/2), h, w ) 334 | objs = _toString(Rs) 335 | return objs 336 | 337 | def frUncompressedRLE(ucRles, siz h, siz w): 338 | cdef np.ndarray[np.uint32_t, ndim=1] cnts 339 | cdef RLE R 340 | cdef uint *data 341 | n = len(ucRles) 342 | objs = [] 343 | for i in range(n): 344 | Rs = RLEs(1) 345 | cnts = np.array(ucRles[i]['counts'], dtype=np.uint32) 346 | # time for malloc can be saved here but it's fine 347 | data = malloc(len(cnts)* sizeof(uint)) 348 | for j in range(len(cnts)): 349 | data[j] = cnts[j] 350 | R = RLE(ucRles[i]['size'][0], ucRles[i]['size'][1], len(cnts), data) 351 | Rs._R[0] = R 352 | objs.append(_toString(Rs)[0]) 353 | return objs 354 | 355 | def frPyObjects(pyobj, h, w): 356 | # encode rle from a list of python objects 357 | if type(pyobj) == np.ndarray: 358 | objs = frBbox(pyobj, h, w) 359 | elif type(pyobj) == list and len(pyobj[0]) == 4: 360 | objs = frBbox(pyobj, h, w) 361 | elif type(pyobj) == list and len(pyobj[0]) > 4: 362 | objs = frPoly(pyobj, h, w) 363 | elif type(pyobj) == list and type(pyobj[0]) == dict \ 364 | and 'counts' in pyobj[0] and 'size' in pyobj[0]: 365 | objs = frUncompressedRLE(pyobj, h, w) 366 | # encode rle from single python object 367 | elif type(pyobj) == list and len(pyobj) == 4: 368 | objs = frBbox([pyobj], h, w)[0] 369 | elif type(pyobj) == list and len(pyobj) > 4: 370 | objs = frPoly([pyobj], h, w)[0] 371 | elif type(pyobj) == dict and 'counts' in pyobj and 'size' in pyobj: 372 | objs = frUncompressedRLE([pyobj], h, w)[0] 373 | else: 374 | raise Exception('input type is not supported.') 375 | return objs 376 | -------------------------------------------------------------------------------- /cocoapi_ro/PythonAPI/pycocotools_ro/coco.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'tylin' 3 | __version__ = '2.0' 4 | # Interface for accessing the Microsoft COCO dataset. 5 | 6 | # Microsoft COCO is a large image dataset designed for object detection, 7 | # segmentation, and caption generation. pycocotools is a Python API that 8 | # assists in loading, parsing and visualizing the annotations in COCO. 9 | # Please visit http://mscoco.org/ for more information on COCO, including 10 | # for the data, paper, and tutorials. The exact format of the annotations 11 | # is also described on the COCO website. For example usage of the pycocotools 12 | # please see pycocotools_demo.ipynb. In addition to this API, please download both 13 | # the COCO images and annotations in order to run the demo. 14 | 15 | # An alternative to using the API is to load the annotations directly 16 | # into Python dictionary 17 | # Using the API provides additional utility functions. Note that this API 18 | # supports both *instance* and *caption* annotations. In the case of 19 | # captions not all functions are defined (e.g. categories are undefined). 20 | 21 | # The following API functions are defined: 22 | # COCO - COCO api class that loads COCO annotation file and prepare data structures. 23 | # decodeMask - Decode binary mask M encoded via run-length encoding. 24 | # encodeMask - Encode binary mask M using run-length encoding. 25 | # getAnnIds - Get ann ids that satisfy given filter conditions. 26 | # getCatIds - Get cat ids that satisfy given filter conditions. 27 | # getImgIds - Get img ids that satisfy given filter conditions. 28 | # loadAnns - Load anns with the specified ids. 29 | # loadCats - Load cats with the specified ids. 30 | # loadImgs - Load imgs with the specified ids. 31 | # annToMask - Convert segmentation in an annotation to binary mask. 32 | # showAnns - Display the specified annotations. 33 | # loadRes - Load algorithm results and create API for accessing them. 34 | # download - Download COCO images from mscoco.org server. 35 | # Throughout the API "ann"=annotation, "cat"=category, and "img"=image. 36 | # Help on each functions can be accessed by: "help COCO>function". 37 | 38 | # See also COCO>decodeMask, 39 | # COCO>encodeMask, COCO>getAnnIds, COCO>getCatIds, 40 | # COCO>getImgIds, COCO>loadAnns, COCO>loadCats, 41 | # COCO>loadImgs, COCO>annToMask, COCO>showAnns 42 | 43 | # Microsoft COCO Toolbox. version 2.0 44 | # Data, paper, and tutorials available at: http://mscoco.org/ 45 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2014. 46 | # Licensed under the Simplified BSD License [see bsd.txt] 47 | 48 | import json 49 | import time 50 | import matplotlib.pyplot as plt 51 | from matplotlib.collections import PatchCollection 52 | from matplotlib.patches import Polygon 53 | import numpy as np 54 | import copy 55 | import itertools 56 | from . import mask as maskUtils 57 | import os 58 | from collections import defaultdict 59 | import sys 60 | PYTHON_VERSION = sys.version_info[0] 61 | if PYTHON_VERSION == 2: 62 | from urllib import urlretrieve 63 | elif PYTHON_VERSION == 3: 64 | from urllib.request import urlretrieve 65 | 66 | 67 | def _isArrayLike(obj): 68 | return hasattr(obj, '__iter__') and hasattr(obj, '__len__') 69 | 70 | 71 | class COCO: 72 | def __init__(self, annotation_file=None): 73 | """ 74 | Constructor of Microsoft COCO helper class for reading and visualizing annotations. 75 | :param annotation_file (str): location of annotation file 76 | :param image_folder (str): location to the folder that hosts images. 77 | :return: 78 | """ 79 | # load dataset 80 | self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict() 81 | self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list) 82 | if not annotation_file == None: 83 | print('loading annotations into memory...') 84 | tic = time.time() 85 | dataset = json.load(open(annotation_file, 'r')) 86 | assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset)) 87 | print('Done (t={:0.2f}s)'.format(time.time()- tic)) 88 | self.dataset = dataset 89 | self.createIndex() 90 | 91 | def createIndex(self): 92 | # create index 93 | print('creating index...') 94 | anns, cats, imgs = {}, {}, {} 95 | imgToAnns,catToImgs = defaultdict(list),defaultdict(list) 96 | if 'annotations' in self.dataset: 97 | for ann in self.dataset['annotations']: 98 | imgToAnns[ann['image_id']].append(ann) 99 | anns[ann['id']] = ann 100 | 101 | if 'images' in self.dataset: 102 | for img in self.dataset['images']: 103 | imgs[img['id']] = img 104 | 105 | if 'categories' in self.dataset: 106 | for cat in self.dataset['categories']: 107 | cats[cat['id']] = cat 108 | 109 | if 'annotations' in self.dataset and 'categories' in self.dataset: 110 | for ann in self.dataset['annotations']: 111 | catToImgs[ann['category_id']].append(ann['image_id']) 112 | 113 | print('index created!') 114 | 115 | # create class members 116 | self.anns = anns 117 | self.imgToAnns = imgToAnns 118 | self.catToImgs = catToImgs 119 | self.imgs = imgs 120 | self.cats = cats 121 | 122 | def info(self): 123 | """ 124 | Print information about the annotation file. 125 | :return: 126 | """ 127 | for key, value in self.dataset['info'].items(): 128 | print('{}: {}'.format(key, value)) 129 | 130 | def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None): 131 | """ 132 | Get ann ids that satisfy given filter conditions. default skips that filter 133 | :param imgIds (int array) : get anns for given imgs 134 | catIds (int array) : get anns for given cats 135 | areaRng (float array) : get anns for given area range (e.g. [0 inf]) 136 | iscrowd (boolean) : get anns for given crowd label (False or True) 137 | :return: ids (int array) : integer array of ann ids 138 | """ 139 | imgIds = imgIds if _isArrayLike(imgIds) else [imgIds] 140 | catIds = catIds if _isArrayLike(catIds) else [catIds] 141 | 142 | if len(imgIds) == len(catIds) == len(areaRng) == 0: 143 | anns = self.dataset['annotations'] 144 | else: 145 | if not len(imgIds) == 0: 146 | lists = [self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns] 147 | anns = list(itertools.chain.from_iterable(lists)) 148 | else: 149 | anns = self.dataset['annotations'] 150 | anns = anns if len(catIds) == 0 else [ann for ann in anns if ann['category_id'] in catIds] 151 | anns = anns if len(areaRng) == 0 else [ann for ann in anns if ann['area'] > areaRng[0] and ann['area'] < areaRng[1]] 152 | if not iscrowd == None: 153 | ids = [ann['id'] for ann in anns if ann['iscrowd'] == iscrowd] 154 | else: 155 | ids = [ann['id'] for ann in anns] 156 | return ids 157 | 158 | def getCatIds(self, catNms=[], supNms=[], catIds=[]): 159 | """ 160 | filtering parameters. default skips that filter. 161 | :param catNms (str array) : get cats for given cat names 162 | :param supNms (str array) : get cats for given supercategory names 163 | :param catIds (int array) : get cats for given cat ids 164 | :return: ids (int array) : integer array of cat ids 165 | """ 166 | catNms = catNms if _isArrayLike(catNms) else [catNms] 167 | supNms = supNms if _isArrayLike(supNms) else [supNms] 168 | catIds = catIds if _isArrayLike(catIds) else [catIds] 169 | 170 | if len(catNms) == len(supNms) == len(catIds) == 0: 171 | cats = self.dataset['categories'] 172 | else: 173 | cats = self.dataset['categories'] 174 | cats = cats if len(catNms) == 0 else [cat for cat in cats if cat['name'] in catNms] 175 | cats = cats if len(supNms) == 0 else [cat for cat in cats if cat['supercategory'] in supNms] 176 | cats = cats if len(catIds) == 0 else [cat for cat in cats if cat['id'] in catIds] 177 | ids = [cat['id'] for cat in cats] 178 | return ids 179 | 180 | def getImgIds(self, imgIds=[], catIds=[]): 181 | ''' 182 | Get img ids that satisfy given filter conditions. 183 | :param imgIds (int array) : get imgs for given ids 184 | :param catIds (int array) : get imgs with all given cats 185 | :return: ids (int array) : integer array of img ids 186 | ''' 187 | imgIds = imgIds if _isArrayLike(imgIds) else [imgIds] 188 | catIds = catIds if _isArrayLike(catIds) else [catIds] 189 | 190 | if len(imgIds) == len(catIds) == 0: 191 | ids = self.imgs.keys() 192 | else: 193 | ids = set(imgIds) 194 | for i, catId in enumerate(catIds): 195 | if i == 0 and len(ids) == 0: 196 | ids = set(self.catToImgs[catId]) 197 | else: 198 | ids &= set(self.catToImgs[catId]) 199 | return list(ids) 200 | 201 | def loadAnns(self, ids=[]): 202 | """ 203 | Load anns with the specified ids. 204 | :param ids (int array) : integer ids specifying anns 205 | :return: anns (object array) : loaded ann objects 206 | """ 207 | if _isArrayLike(ids): 208 | return [self.anns[id] for id in ids] 209 | elif type(ids) == int: 210 | return [self.anns[ids]] 211 | 212 | def loadCats(self, ids=[]): 213 | """ 214 | Load cats with the specified ids. 215 | :param ids (int array) : integer ids specifying cats 216 | :return: cats (object array) : loaded cat objects 217 | """ 218 | if _isArrayLike(ids): 219 | return [self.cats[id] for id in ids] 220 | elif type(ids) == int: 221 | return [self.cats[ids]] 222 | 223 | def loadImgs(self, ids=[]): 224 | """ 225 | Load anns with the specified ids. 226 | :param ids (int array) : integer ids specifying img 227 | :return: imgs (object array) : loaded img objects 228 | """ 229 | if _isArrayLike(ids): 230 | return [self.imgs[id] for id in ids] 231 | elif type(ids) == int: 232 | return [self.imgs[ids]] 233 | 234 | def showAnns(self, anns): 235 | """ 236 | Display the specified annotations. 237 | :param anns (array of object): annotations to display 238 | :return: None 239 | """ 240 | if len(anns) == 0: 241 | return 0 242 | if 'segmentation' in anns[0] or 'keypoints' in anns[0]: 243 | datasetType = 'instances' 244 | elif 'caption' in anns[0]: 245 | datasetType = 'captions' 246 | else: 247 | raise Exception('datasetType not supported') 248 | if datasetType == 'instances': 249 | ax = plt.gca() 250 | ax.set_autoscale_on(False) 251 | polygons = [] 252 | color = [] 253 | for ann in anns: 254 | c = (np.random.random((1, 3))*0.6+0.4).tolist()[0] 255 | if 'segmentation' in ann: 256 | if type(ann['segmentation']) == list: 257 | # polygon 258 | for seg in ann['segmentation']: 259 | poly = np.array(seg).reshape((int(len(seg)/2), 2)) 260 | polygons.append(Polygon(poly)) 261 | color.append(c) 262 | else: 263 | # mask 264 | t = self.imgs[ann['image_id']] 265 | if type(ann['segmentation']['counts']) == list: 266 | rle = maskUtils.frPyObjects([ann['segmentation']], t['height'], t['width']) 267 | else: 268 | rle = [ann['segmentation']] 269 | m = maskUtils.decode(rle) 270 | img = np.ones( (m.shape[0], m.shape[1], 3) ) 271 | if ann['iscrowd'] == 1: 272 | color_mask = np.array([2.0,166.0,101.0])/255 273 | if ann['iscrowd'] == 0: 274 | color_mask = np.random.random((1, 3)).tolist()[0] 275 | for i in range(3): 276 | img[:,:,i] = color_mask[i] 277 | ax.imshow(np.dstack( (img, m*0.5) )) 278 | if 'keypoints' in ann and type(ann['keypoints']) == list: 279 | # turn skeleton into zero-based index 280 | sks = np.array(self.loadCats(ann['category_id'])[0]['skeleton'])-1 281 | kp = np.array(ann['keypoints']) 282 | x = kp[0::3] 283 | y = kp[1::3] 284 | v = kp[2::3] 285 | for sk in sks: 286 | if np.all(v[sk]>0): 287 | plt.plot(x[sk],y[sk], linewidth=3, color=c) 288 | plt.plot(x[v>0], y[v>0],'o',markersize=8, markerfacecolor=c, markeredgecolor='k',markeredgewidth=2) 289 | plt.plot(x[v>1], y[v>1],'o',markersize=8, markerfacecolor=c, markeredgecolor=c, markeredgewidth=2) 290 | p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.4) 291 | ax.add_collection(p) 292 | p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2) 293 | ax.add_collection(p) 294 | elif datasetType == 'captions': 295 | for ann in anns: 296 | print(ann['caption']) 297 | 298 | def loadRes(self, resFile): 299 | """ 300 | Load result file and return a result api object. 301 | :param resFile (str) : file name of result file 302 | :return: res (obj) : result api object 303 | """ 304 | res = COCO() 305 | res.dataset['images'] = [img for img in self.dataset['images']] 306 | 307 | print('Loading and preparing results...') 308 | tic = time.time() 309 | if type(resFile) == str: 310 | anns = json.load(open(resFile)) 311 | elif type(resFile) == np.ndarray: 312 | anns = self.loadNumpyAnnotations(resFile) 313 | else: 314 | anns = resFile 315 | assert type(anns) == list, 'results in not an array of objects' 316 | annsImgIds = [ann['image_id'] for ann in anns] 317 | assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ 318 | 'Results do not correspond to current coco set' 319 | if 'caption' in anns[0]: 320 | imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns]) 321 | res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds] 322 | for id, ann in enumerate(anns): 323 | ann['id'] = id+1 324 | elif 'bbox' in anns[0] and not anns[0]['bbox'] == []: 325 | res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) 326 | for id, ann in enumerate(anns): 327 | bb = ann['bbox'] 328 | x1, x2, y1, y2 = [bb[0], bb[0]+bb[2], bb[1], bb[1]+bb[3]] 329 | if not 'segmentation' in ann: 330 | ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] 331 | ann['area'] = bb[2]*bb[3] 332 | ann['id'] = id+1 333 | ann['iscrowd'] = 0 334 | elif 'rbbox' in anns[0]: 335 | # 旋转框 336 | res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) 337 | for id, ann in enumerate(anns): 338 | bb = ann['rbbox'] 339 | # cx, cy, w, h, angle = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] 340 | # if not 'segmentation' in ann: 341 | # ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] 342 | ann['area'] = bb[2] * bb[3] 343 | ann['id'] = id + 1 344 | ann['iscrowd'] = 0 345 | elif 'segmentation' in anns[0]: 346 | res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) 347 | for id, ann in enumerate(anns): 348 | # now only support compressed RLE format as segmentation results 349 | ann['area'] = maskUtils.area(ann['segmentation']) 350 | if not 'bbox' in ann: 351 | ann['bbox'] = maskUtils.toBbox(ann['segmentation']) 352 | ann['id'] = id+1 353 | ann['iscrowd'] = 0 354 | elif 'keypoints' in anns[0]: 355 | res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) 356 | for id, ann in enumerate(anns): 357 | s = ann['keypoints'] 358 | x = s[0::3] 359 | y = s[1::3] 360 | x0,x1,y0,y1 = np.min(x), np.max(x), np.min(y), np.max(y) 361 | ann['area'] = (x1-x0)*(y1-y0) 362 | ann['id'] = id + 1 363 | ann['bbox'] = [x0,y0,x1-x0,y1-y0] 364 | 365 | print('DONE (t={:0.2f}s)'.format(time.time()- tic)) 366 | 367 | res.dataset['annotations'] = anns 368 | res.createIndex() 369 | return res 370 | 371 | def download(self, tarDir = None, imgIds = [] ): 372 | ''' 373 | Download COCO images from mscoco.org server. 374 | :param tarDir (str): COCO results directory name 375 | imgIds (list): images to be downloaded 376 | :return: 377 | ''' 378 | if tarDir is None: 379 | print('Please specify target directory') 380 | return -1 381 | if len(imgIds) == 0: 382 | imgs = self.imgs.values() 383 | else: 384 | imgs = self.loadImgs(imgIds) 385 | N = len(imgs) 386 | if not os.path.exists(tarDir): 387 | os.makedirs(tarDir) 388 | for i, img in enumerate(imgs): 389 | tic = time.time() 390 | fname = os.path.join(tarDir, img['file_name']) 391 | if not os.path.exists(fname): 392 | urlretrieve(img['coco_url'], fname) 393 | print('downloaded {}/{} images (t={:0.1f}s)'.format(i, N, time.time()- tic)) 394 | 395 | def loadNumpyAnnotations(self, data): 396 | """ 397 | Convert result data from a numpy array [Nx7] where each row contains {imageID,x1,y1,w,h,score,class} 398 | :param data (numpy.ndarray) 399 | :return: annotations (python nested list) 400 | """ 401 | print('Converting ndarray to lists...') 402 | assert(type(data) == np.ndarray) 403 | print(data.shape) 404 | assert(data.shape[1] == 7) 405 | N = data.shape[0] 406 | ann = [] 407 | for i in range(N): 408 | if i % 1000000 == 0: 409 | print('{}/{}'.format(i,N)) 410 | ann += [{ 411 | 'image_id' : int(data[i, 0]), 412 | 'bbox' : [ data[i, 1], data[i, 2], data[i, 3], data[i, 4] ], 413 | 'score' : data[i, 5], 414 | 'category_id': int(data[i, 6]), 415 | }] 416 | return ann 417 | 418 | def annToRLE(self, ann): 419 | """ 420 | Convert annotation which can be polygons, uncompressed RLE to RLE. 421 | :return: binary mask (numpy 2D array) 422 | """ 423 | t = self.imgs[ann['image_id']] 424 | h, w = t['height'], t['width'] 425 | segm = ann['segmentation'] 426 | if type(segm) == list: 427 | # polygon -- a single object might consist of multiple parts 428 | # we merge all parts into one mask rle code 429 | rles = maskUtils.frPyObjects(segm, h, w) 430 | rle = maskUtils.merge(rles) 431 | elif type(segm['counts']) == list: 432 | # uncompressed RLE 433 | rle = maskUtils.frPyObjects(segm, h, w) 434 | else: 435 | # rle 436 | rle = ann['segmentation'] 437 | return rle 438 | 439 | def annToMask(self, ann): 440 | """ 441 | Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask. 442 | :return: binary mask (numpy 2D array) 443 | """ 444 | rle = self.annToRLE(ann) 445 | m = maskUtils.decode(rle) 446 | return m -------------------------------------------------------------------------------- /cocoapi_ro/PythonAPI/pycocotools_ro/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | import pycocotools_ro._mask as _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | iou = _mask.iou 77 | riou = _mask.riou 78 | merge = _mask.merge 79 | frPyObjects = _mask.frPyObjects 80 | 81 | def encode(bimask): 82 | if len(bimask.shape) == 3: 83 | return _mask.encode(bimask) 84 | elif len(bimask.shape) == 2: 85 | h, w = bimask.shape 86 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] 87 | 88 | def decode(rleObjs): 89 | if type(rleObjs) == list: 90 | return _mask.decode(rleObjs) 91 | else: 92 | return _mask.decode([rleObjs])[:,:,0] 93 | 94 | def area(rleObjs): 95 | if type(rleObjs) == list: 96 | return _mask.area(rleObjs) 97 | else: 98 | return _mask.area([rleObjs])[0] 99 | 100 | def toBbox(rleObjs): 101 | if type(rleObjs) == list: 102 | return _mask.toBbox(rleObjs) 103 | else: 104 | return _mask.toBbox([rleObjs])[0] -------------------------------------------------------------------------------- /cocoapi_ro/PythonAPI/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | import numpy as np 3 | 4 | # To compile and install locally run "python setup.py build_ext --inplace" 5 | # To install library to Python site-packages run "python setup.py build_ext install" 6 | 7 | ext_modules = [ 8 | Extension( 9 | 'pycocotools_ro._mask', 10 | sources=['../common/maskApi.c', 'pycocotools_ro/_mask.pyx'], 11 | include_dirs = [np.get_include(), '../common'], 12 | extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99', "-fopenmp"], 13 | ) 14 | ] 15 | 16 | setup( 17 | name='pycocotools_ro', 18 | packages=['pycocotools_ro'], 19 | package_dir = {'pycocotools_ro': 'pycocotools_ro'}, 20 | install_requires=[ 21 | 'setuptools>=18.0', 22 | 'cython>=0.27.3', 23 | 'matplotlib>=2.1.0' 24 | ], 25 | version='1.0', 26 | ext_modules= ext_modules 27 | ) 28 | -------------------------------------------------------------------------------- /cocoapi_ro/README.txt: -------------------------------------------------------------------------------- 1 | COCO API - http://cocodataset.org/ 2 | 3 | COCO is a large image dataset designed for object detection, segmentation, person keypoints detection, stuff segmentation, and caption generation. This package provides Matlab, Python, and Lua APIs that assists in loading, parsing, and visualizing the annotations in COCO. Please visit http://cocodataset.org/ for more information on COCO, including for the data, paper, and tutorials. The exact format of the annotations is also described on the COCO website. The Matlab and Python APIs are complete, the Lua API provides only basic functionality. 4 | 5 | In addition to this API, please download both the COCO images and annotations in order to run the demos and use the API. Both are available on the project website. 6 | -Please download, unzip, and place the images in: coco/images/ 7 | -Please download and place the annotations in: coco/annotations/ 8 | For substantially more details on the API please see http://cocodataset.org/#download. 9 | 10 | After downloading the images and annotations, run the Matlab, Python, or Lua demos for example usage. 11 | 12 | To install: 13 | -For Matlab, add coco/MatlabApi to the Matlab path (OSX/Linux binaries provided) 14 | -For Python, run "make" under coco/PythonAPI 15 | -For Lua, run “luarocks make LuaAPI/rocks/coco-scm-1.rockspec” under coco/ 16 | -------------------------------------------------------------------------------- /cocoapi_ro/common/gason.cpp: -------------------------------------------------------------------------------- 1 | // https://github.com/vivkin/gason - pulled January 10, 2016 2 | #include "gason.h" 3 | #include 4 | 5 | #define JSON_ZONE_SIZE 4096 6 | #define JSON_STACK_SIZE 32 7 | 8 | const char *jsonStrError(int err) { 9 | switch (err) { 10 | #define XX(no, str) \ 11 | case JSON_##no: \ 12 | return str; 13 | JSON_ERRNO_MAP(XX) 14 | #undef XX 15 | default: 16 | return "unknown"; 17 | } 18 | } 19 | 20 | void *JsonAllocator::allocate(size_t size) { 21 | size = (size + 7) & ~7; 22 | 23 | if (head && head->used + size <= JSON_ZONE_SIZE) { 24 | char *p = (char *)head + head->used; 25 | head->used += size; 26 | return p; 27 | } 28 | 29 | size_t allocSize = sizeof(Zone) + size; 30 | Zone *zone = (Zone *)malloc(allocSize <= JSON_ZONE_SIZE ? JSON_ZONE_SIZE : allocSize); 31 | if (zone == nullptr) 32 | return nullptr; 33 | zone->used = allocSize; 34 | if (allocSize <= JSON_ZONE_SIZE || head == nullptr) { 35 | zone->next = head; 36 | head = zone; 37 | } else { 38 | zone->next = head->next; 39 | head->next = zone; 40 | } 41 | return (char *)zone + sizeof(Zone); 42 | } 43 | 44 | void JsonAllocator::deallocate() { 45 | while (head) { 46 | Zone *next = head->next; 47 | free(head); 48 | head = next; 49 | } 50 | } 51 | 52 | static inline bool isspace(char c) { 53 | return c == ' ' || (c >= '\t' && c <= '\r'); 54 | } 55 | 56 | static inline bool isdelim(char c) { 57 | return c == ',' || c == ':' || c == ']' || c == '}' || isspace(c) || !c; 58 | } 59 | 60 | static inline bool isdigit(char c) { 61 | return c >= '0' && c <= '9'; 62 | } 63 | 64 | static inline bool isxdigit(char c) { 65 | return (c >= '0' && c <= '9') || ((c & ~' ') >= 'A' && (c & ~' ') <= 'F'); 66 | } 67 | 68 | static inline int char2int(char c) { 69 | if (c <= '9') 70 | return c - '0'; 71 | return (c & ~' ') - 'A' + 10; 72 | } 73 | 74 | static double string2double(char *s, char **endptr) { 75 | char ch = *s; 76 | if (ch == '-') 77 | ++s; 78 | 79 | double result = 0; 80 | while (isdigit(*s)) 81 | result = (result * 10) + (*s++ - '0'); 82 | 83 | if (*s == '.') { 84 | ++s; 85 | 86 | double fraction = 1; 87 | while (isdigit(*s)) { 88 | fraction *= 0.1; 89 | result += (*s++ - '0') * fraction; 90 | } 91 | } 92 | 93 | if (*s == 'e' || *s == 'E') { 94 | ++s; 95 | 96 | double base = 10; 97 | if (*s == '+') 98 | ++s; 99 | else if (*s == '-') { 100 | ++s; 101 | base = 0.1; 102 | } 103 | 104 | unsigned int exponent = 0; 105 | while (isdigit(*s)) 106 | exponent = (exponent * 10) + (*s++ - '0'); 107 | 108 | double power = 1; 109 | for (; exponent; exponent >>= 1, base *= base) 110 | if (exponent & 1) 111 | power *= base; 112 | 113 | result *= power; 114 | } 115 | 116 | *endptr = s; 117 | return ch == '-' ? -result : result; 118 | } 119 | 120 | static inline JsonNode *insertAfter(JsonNode *tail, JsonNode *node) { 121 | if (!tail) 122 | return node->next = node; 123 | node->next = tail->next; 124 | tail->next = node; 125 | return node; 126 | } 127 | 128 | static inline JsonValue listToValue(JsonTag tag, JsonNode *tail) { 129 | if (tail) { 130 | auto head = tail->next; 131 | tail->next = nullptr; 132 | return JsonValue(tag, head); 133 | } 134 | return JsonValue(tag, nullptr); 135 | } 136 | 137 | int jsonParse(char *s, char **endptr, JsonValue *value, JsonAllocator &allocator) { 138 | JsonNode *tails[JSON_STACK_SIZE]; 139 | JsonTag tags[JSON_STACK_SIZE]; 140 | char *keys[JSON_STACK_SIZE]; 141 | JsonValue o; 142 | int pos = -1; 143 | bool separator = true; 144 | JsonNode *node; 145 | *endptr = s; 146 | 147 | while (*s) { 148 | while (isspace(*s)) { 149 | ++s; 150 | if (!*s) break; 151 | } 152 | *endptr = s++; 153 | switch (**endptr) { 154 | case '-': 155 | if (!isdigit(*s) && *s != '.') { 156 | *endptr = s; 157 | return JSON_BAD_NUMBER; 158 | } 159 | case '0': 160 | case '1': 161 | case '2': 162 | case '3': 163 | case '4': 164 | case '5': 165 | case '6': 166 | case '7': 167 | case '8': 168 | case '9': 169 | o = JsonValue(string2double(*endptr, &s)); 170 | if (!isdelim(*s)) { 171 | *endptr = s; 172 | return JSON_BAD_NUMBER; 173 | } 174 | break; 175 | case '"': 176 | o = JsonValue(JSON_STRING, s); 177 | for (char *it = s; *s; ++it, ++s) { 178 | int c = *it = *s; 179 | if (c == '\\') { 180 | c = *++s; 181 | switch (c) { 182 | case '\\': 183 | case '"': 184 | case '/': 185 | *it = c; 186 | break; 187 | case 'b': 188 | *it = '\b'; 189 | break; 190 | case 'f': 191 | *it = '\f'; 192 | break; 193 | case 'n': 194 | *it = '\n'; 195 | break; 196 | case 'r': 197 | *it = '\r'; 198 | break; 199 | case 't': 200 | *it = '\t'; 201 | break; 202 | case 'u': 203 | c = 0; 204 | for (int i = 0; i < 4; ++i) { 205 | if (isxdigit(*++s)) { 206 | c = c * 16 + char2int(*s); 207 | } else { 208 | *endptr = s; 209 | return JSON_BAD_STRING; 210 | } 211 | } 212 | if (c < 0x80) { 213 | *it = c; 214 | } else if (c < 0x800) { 215 | *it++ = 0xC0 | (c >> 6); 216 | *it = 0x80 | (c & 0x3F); 217 | } else { 218 | *it++ = 0xE0 | (c >> 12); 219 | *it++ = 0x80 | ((c >> 6) & 0x3F); 220 | *it = 0x80 | (c & 0x3F); 221 | } 222 | break; 223 | default: 224 | *endptr = s; 225 | return JSON_BAD_STRING; 226 | } 227 | } else if ((unsigned int)c < ' ' || c == '\x7F') { 228 | *endptr = s; 229 | return JSON_BAD_STRING; 230 | } else if (c == '"') { 231 | *it = 0; 232 | ++s; 233 | break; 234 | } 235 | } 236 | if (!isdelim(*s)) { 237 | *endptr = s; 238 | return JSON_BAD_STRING; 239 | } 240 | break; 241 | case 't': 242 | if (!(s[0] == 'r' && s[1] == 'u' && s[2] == 'e' && isdelim(s[3]))) 243 | return JSON_BAD_IDENTIFIER; 244 | o = JsonValue(JSON_TRUE); 245 | s += 3; 246 | break; 247 | case 'f': 248 | if (!(s[0] == 'a' && s[1] == 'l' && s[2] == 's' && s[3] == 'e' && isdelim(s[4]))) 249 | return JSON_BAD_IDENTIFIER; 250 | o = JsonValue(JSON_FALSE); 251 | s += 4; 252 | break; 253 | case 'n': 254 | if (!(s[0] == 'u' && s[1] == 'l' && s[2] == 'l' && isdelim(s[3]))) 255 | return JSON_BAD_IDENTIFIER; 256 | o = JsonValue(JSON_NULL); 257 | s += 3; 258 | break; 259 | case ']': 260 | if (pos == -1) 261 | return JSON_STACK_UNDERFLOW; 262 | if (tags[pos] != JSON_ARRAY) 263 | return JSON_MISMATCH_BRACKET; 264 | o = listToValue(JSON_ARRAY, tails[pos--]); 265 | break; 266 | case '}': 267 | if (pos == -1) 268 | return JSON_STACK_UNDERFLOW; 269 | if (tags[pos] != JSON_OBJECT) 270 | return JSON_MISMATCH_BRACKET; 271 | if (keys[pos] != nullptr) 272 | return JSON_UNEXPECTED_CHARACTER; 273 | o = listToValue(JSON_OBJECT, tails[pos--]); 274 | break; 275 | case '[': 276 | if (++pos == JSON_STACK_SIZE) 277 | return JSON_STACK_OVERFLOW; 278 | tails[pos] = nullptr; 279 | tags[pos] = JSON_ARRAY; 280 | keys[pos] = nullptr; 281 | separator = true; 282 | continue; 283 | case '{': 284 | if (++pos == JSON_STACK_SIZE) 285 | return JSON_STACK_OVERFLOW; 286 | tails[pos] = nullptr; 287 | tags[pos] = JSON_OBJECT; 288 | keys[pos] = nullptr; 289 | separator = true; 290 | continue; 291 | case ':': 292 | if (separator || keys[pos] == nullptr) 293 | return JSON_UNEXPECTED_CHARACTER; 294 | separator = true; 295 | continue; 296 | case ',': 297 | if (separator || keys[pos] != nullptr) 298 | return JSON_UNEXPECTED_CHARACTER; 299 | separator = true; 300 | continue; 301 | case '\0': 302 | continue; 303 | default: 304 | return JSON_UNEXPECTED_CHARACTER; 305 | } 306 | 307 | separator = false; 308 | 309 | if (pos == -1) { 310 | *endptr = s; 311 | *value = o; 312 | return JSON_OK; 313 | } 314 | 315 | if (tags[pos] == JSON_OBJECT) { 316 | if (!keys[pos]) { 317 | if (o.getTag() != JSON_STRING) 318 | return JSON_UNQUOTED_KEY; 319 | keys[pos] = o.toString(); 320 | continue; 321 | } 322 | if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode))) == nullptr) 323 | return JSON_ALLOCATION_FAILURE; 324 | tails[pos] = insertAfter(tails[pos], node); 325 | tails[pos]->key = keys[pos]; 326 | keys[pos] = nullptr; 327 | } else { 328 | if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode) - sizeof(char *))) == nullptr) 329 | return JSON_ALLOCATION_FAILURE; 330 | tails[pos] = insertAfter(tails[pos], node); 331 | } 332 | tails[pos]->value = o; 333 | } 334 | return JSON_BREAKING_BAD; 335 | } 336 | -------------------------------------------------------------------------------- /cocoapi_ro/common/gason.h: -------------------------------------------------------------------------------- 1 | // https://github.com/vivkin/gason - pulled January 10, 2016 2 | #pragma once 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | enum JsonTag { 9 | JSON_NUMBER = 0, 10 | JSON_STRING, 11 | JSON_ARRAY, 12 | JSON_OBJECT, 13 | JSON_TRUE, 14 | JSON_FALSE, 15 | JSON_NULL = 0xF 16 | }; 17 | 18 | struct JsonNode; 19 | 20 | #define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL 21 | #define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL 22 | #define JSON_VALUE_TAG_MASK 0xF 23 | #define JSON_VALUE_TAG_SHIFT 47 24 | 25 | union JsonValue { 26 | uint64_t ival; 27 | double fval; 28 | 29 | JsonValue(double x) 30 | : fval(x) { 31 | } 32 | JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) { 33 | assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK); 34 | ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload; 35 | } 36 | bool isDouble() const { 37 | return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK; 38 | } 39 | JsonTag getTag() const { 40 | return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK); 41 | } 42 | uint64_t getPayload() const { 43 | assert(!isDouble()); 44 | return ival & JSON_VALUE_PAYLOAD_MASK; 45 | } 46 | double toNumber() const { 47 | assert(getTag() == JSON_NUMBER); 48 | return fval; 49 | } 50 | char *toString() const { 51 | assert(getTag() == JSON_STRING); 52 | return (char *)getPayload(); 53 | } 54 | JsonNode *toNode() const { 55 | assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT); 56 | return (JsonNode *)getPayload(); 57 | } 58 | }; 59 | 60 | struct JsonNode { 61 | JsonValue value; 62 | JsonNode *next; 63 | char *key; 64 | }; 65 | 66 | struct JsonIterator { 67 | JsonNode *p; 68 | 69 | void operator++() { 70 | p = p->next; 71 | } 72 | bool operator!=(const JsonIterator &x) const { 73 | return p != x.p; 74 | } 75 | JsonNode *operator*() const { 76 | return p; 77 | } 78 | JsonNode *operator->() const { 79 | return p; 80 | } 81 | }; 82 | 83 | inline JsonIterator begin(JsonValue o) { 84 | return JsonIterator{o.toNode()}; 85 | } 86 | inline JsonIterator end(JsonValue) { 87 | return JsonIterator{nullptr}; 88 | } 89 | 90 | #define JSON_ERRNO_MAP(XX) \ 91 | XX(OK, "ok") \ 92 | XX(BAD_NUMBER, "bad number") \ 93 | XX(BAD_STRING, "bad string") \ 94 | XX(BAD_IDENTIFIER, "bad identifier") \ 95 | XX(STACK_OVERFLOW, "stack overflow") \ 96 | XX(STACK_UNDERFLOW, "stack underflow") \ 97 | XX(MISMATCH_BRACKET, "mismatch bracket") \ 98 | XX(UNEXPECTED_CHARACTER, "unexpected character") \ 99 | XX(UNQUOTED_KEY, "unquoted key") \ 100 | XX(BREAKING_BAD, "breaking bad") \ 101 | XX(ALLOCATION_FAILURE, "allocation failure") 102 | 103 | enum JsonErrno { 104 | #define XX(no, str) JSON_##no, 105 | JSON_ERRNO_MAP(XX) 106 | #undef XX 107 | }; 108 | 109 | const char *jsonStrError(int err); 110 | 111 | class JsonAllocator { 112 | struct Zone { 113 | Zone *next; 114 | size_t used; 115 | } *head = nullptr; 116 | 117 | public: 118 | JsonAllocator() = default; 119 | JsonAllocator(const JsonAllocator &) = delete; 120 | JsonAllocator &operator=(const JsonAllocator &) = delete; 121 | JsonAllocator(JsonAllocator &&x) : head(x.head) { 122 | x.head = nullptr; 123 | } 124 | JsonAllocator &operator=(JsonAllocator &&x) { 125 | head = x.head; 126 | x.head = nullptr; 127 | return *this; 128 | } 129 | ~JsonAllocator() { 130 | deallocate(); 131 | } 132 | void *allocate(size_t size); 133 | void deallocate(); 134 | }; 135 | 136 | int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator); 137 | -------------------------------------------------------------------------------- /cocoapi_ro/common/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | 9 | typedef unsigned int uint; 10 | typedef unsigned long siz; 11 | typedef unsigned char byte; 12 | typedef double* BB; 13 | typedef struct { siz h, w, m; uint *cnts; } RLE; 14 | typedef struct Line 15 | { 16 | int crossnum;//0:ignore; -1:all inner point; 2:two crossing point; 1:one crossing point 17 | int p1;//index of the start point 18 | int p2;//index of the end point 19 | int d[2][2];//the index of the start point after division 20 | double length;//the length after division 21 | } Line; 22 | 23 | /* Initialize/destroy RLE. */ 24 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 25 | void rleFree( RLE *R ); 26 | 27 | /* Initialize/destroy RLE array. */ 28 | void rlesInit( RLE **R, siz n ); 29 | void rlesFree( RLE **R, siz n ); 30 | 31 | /* Encode binary masks using RLE. */ 32 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 33 | 34 | /* Decode binary masks encoded via RLE. */ 35 | void rleDecode( const RLE *R, byte *mask, siz n ); 36 | 37 | /* Compute union or intersection of encoded masks. */ 38 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ); 39 | 40 | /* Compute area of encoded masks. */ 41 | void rleArea( const RLE *R, siz n, uint *a ); 42 | 43 | /* Compute intersection over union between masks. */ 44 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 45 | 46 | /* Compute non-maximum suppression between bounding masks */ 47 | void rleNms( RLE *dt, siz n, uint *keep, double thr ); 48 | 49 | /* Compute intersection over union between bounding boxes. */ 50 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 51 | 52 | /* Compute intersection over union between rotate bounding boxes. */ 53 | void rbbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 54 | 55 | /* Compute non-maximum suppression between bounding boxes */ 56 | void bbNms( BB dt, siz n, uint *keep, double thr ); 57 | 58 | /* Get bounding boxes surrounding encoded masks. */ 59 | void rleToBbox( const RLE *R, BB bb, siz n ); 60 | 61 | /* Convert bounding boxes to encoded masks. */ 62 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 63 | 64 | /* Convert polygon to encoded mask. */ 65 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 66 | 67 | /* Get compressed string representation of encoded mask. */ 68 | char* rleToString( const RLE *R ); 69 | 70 | /* Convert from compressed string representation of encoded mask. */ 71 | void rleFrString( RLE *R, char *s, siz h, siz w ); 72 | 73 | 74 | -------------------------------------------------------------------------------- /cocoapi_ro/license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | 24 | The views and conclusions contained in the software and documentation are those 25 | of the authors and should not be interpreted as representing official policies, 26 | either expressed or implied, of the FreeBSD Project. 27 | -------------------------------------------------------------------------------- /images/drn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anymake/DRN_CVPR2020/6d5c75895d75601ec92cece3cda516c43abe032a/images/drn.png -------------------------------------------------------------------------------- /images/sku110k_r.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anymake/DRN_CVPR2020/6d5c75895d75601ec92cece3cda516c43abe032a/images/sku110k_r.png -------------------------------------------------------------------------------- /rotate_augment.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import os 3 | import sys 4 | 5 | from multiprocessing import Pool 6 | import cv2 7 | import imgaug as ia 8 | from imgaug import augmenters as iaa 9 | import numpy as np 10 | import PIL 11 | from PIL import Image 12 | PIL.Image.MAX_IMAGE_PIXELS = 200000000 13 | 14 | 15 | process_num = 64 16 | ia.seed(1) 17 | 18 | def preprocess_handler(img_name, img_dir, rot_list, out_img_dir=None): 19 | img_path = os.path.join(img_dir, img_name) 20 | try: 21 | img = Image.open(img_path).convert('RGB') 22 | img = np.array(img) 23 | except: 24 | try: 25 | img = cv2.imread(img_path) 26 | except: 27 | print(img_path) 28 | 29 | for ang in rot_list: 30 | seq = iaa.Sequential([ 31 | iaa.Affine( 32 | rotate=ang, 33 | fit_output=True 34 | ) 35 | ]) 36 | 37 | seq_det = seq.to_deterministic() 38 | 39 | image_aug = seq_det.augment_images([img])[0] 40 | out_img_name = 'rotate_aug_{}_'.format(str(ang)) 41 | out_img_name = out_img_name + img_name 42 | if out_img_dir is None: 43 | out_dir = os.path.join(img_dir, out_img_name) 44 | else: 45 | out_dir = os.path.join(out_img_dir, out_img_name) 46 | cv2.imwrite(out_dir,image_aug,[int(cv2.IMWRITE_JPEG_QUALITY),81]) 47 | 48 | 49 | def main(img_dir): 50 | rotate_angle_list = [-45, -30, -15, 15, 30, 45] 51 | p = Pool(process_num) 52 | for img_name in os.listdir(img_dir): 53 | p.apply_async(preprocess_handler, args=(img_name, img_dir,rotate_angle_list)) 54 | p.close() 55 | p.join() 56 | 57 | if __name__ == '__main__': 58 | root_img_dir = sys.argv[1] 59 | main(root_img_dir) 60 | -------------------------------------------------------------------------------- /rotation conv layer/rotation_conv_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | 7 | import numpy as np 8 | from external.DCNv2.modules.modulated_deform_conv import ModulatedDeformConv 9 | from external.DCNv2.functions.modulated_deform_conv_func import ModulatedDeformConvFunction 10 | 11 | 12 | 13 | class convolution(nn.Module): 14 | def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True): 15 | super(convolution, self).__init__() 16 | 17 | pad = (k - 1) // 2 18 | self.conv = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(pad, pad), stride=(stride, stride), bias=not with_bn) 19 | self.bn = nn.BatchNorm2d(out_dim) if with_bn else nn.Sequential() 20 | self.relu = nn.ReLU(inplace=True) 21 | 22 | def forward(self, x): 23 | conv = self.conv(x) 24 | bn = self.bn(conv) 25 | relu = self.relu(bn) 26 | return relu 27 | 28 | class RotationConvLayer(ModulatedDeformConv): 29 | '''Rotation convolution layer 30 | 31 | Construct rotation convolution layer, which is based on deformable convolution. 32 | ''' 33 | def __init__(self, in_channels, out_channels, 34 | kernel_size, stride, padding, 35 | dilation=1, groups=1,deformable_groups=1, 36 | im2col_step=64, bias=True): 37 | super(RotationConvLayer, self).__init__(in_channels, out_channels, 38 | kernel_size, stride, padding, dilation, groups, deformable_groups,im2col_step,bias) 39 | channels_ = self.deformable_groups * 1 * self.kernel_size[0] * self.kernel_size[1] 40 | self.conv_mask = nn.Conv2d(self.in_channels, channels_, 41 | kernel_size=self.kernel_size, 42 | stride=self.stride, 43 | padding=self.padding, 44 | bias=True) 45 | self.init_mask() 46 | 47 | def init_mask(self): 48 | self.conv_mask.weight.data.zero_() 49 | self.conv_mask.bias.data.zero_() 50 | 51 | def gene_offset(self, b, h, w, angle): 52 | """ Obtain the offset tensor for dcn module in accordance with angle tensor. 53 | 54 | Take a 3x3 kernel case, the offset tensor for one location is: 55 | off = [x0, y0, x1, y1, x1,y1, ..., x8,y8]. 56 | For a conventional convolution, off = [0,0,0,0,...] 57 | The regular grid receptive field R for each position is: 58 | R = [(-1,-1), (-1,0), (-1,1), ..., (1,0),(1,1)] 59 | With the predicted angle, we first obtain the rotation matrix M: 60 | -- -- | 61 | M=|cos\theta sin\theta| --|------->y 62 | |-sin\theta cos\theta| | 63 | -- -- Vx 64 | After otatiron, the offset tensor offset OFF_M: 65 | OFF_M = M * R - R 66 | = (M - I)*R 67 | 68 | :param b: The batchsize of input tensor. 69 | :param h: The height of feature. 70 | :param w: The width of feature. 71 | :param angle: The predict angle tensor for each object (at every location). 72 | :return: The offset tensor used in dcn module. 73 | """ 74 | x_v = (self.kernel_size[0]-1)//2 75 | y_v = (self.kernel_size[1]-1)//2 76 | x_axis = torch.arange(-x_v, x_v+1) 77 | y_axis = torch.arange(-y_v, y_v+1) 78 | x_coor, y_coor = torch.meshgrid(x_axis, y_axis) 79 | x_coor = x_coor.float().contiguous().view(-1, 1) 80 | y_coor = y_coor.float().contiguous().view(-1, 1) 81 | coor = torch.cat((x_coor, y_coor), dim=1).unsqueeze(2).cuda() 82 | oH = (h + 2 * self.padding[0] - self.kernel_size[0]) // self.stride[0] + 1 83 | oW = (w + 2 * self.padding[1] - self.kernel_size[1]) // self.stride[1] + 1 84 | sH = self.kernel_size[0] // 2 - self.padding[0] 85 | sW = self.kernel_size[1] // 2 - self.padding[1] 86 | angle = angle[:, :, sH:sH+oH, sW:sW+oW] 87 | cos_theta = torch.cos(angle).unsqueeze(-1) 88 | # cos_theta = cos_theta[:,:,sH:sH+oH, sW:sW+oW] 89 | sin_theta = torch.sin(angle).unsqueeze(-1) 90 | # sin_theta = sin_theta[:, :, sH:sH + oH, sW:sW + oW] 91 | rot_theta = torch.cat((cos_theta-1, sin_theta, -sin_theta, cos_theta-1), dim=-1) 92 | rot_theta = rot_theta.contiguous().view(-1, 1, 2, 2) 93 | offset = torch.matmul(rot_theta, coor).reshape(b,oH,oW,-1).permute(0,3,1,2).contiguous() 94 | return offset 95 | 96 | def forward(self, input, angle=None, offset=None, mask=None, fp16=False): 97 | b, _, h, w = input.size() 98 | if angle is None: 99 | angle = torch.zeros_like(input)[:,:1,:,:] 100 | if offset is None: 101 | offset = self.gene_offset(b, h, w, angle) 102 | offset = offset.detach() 103 | 104 | if mask is None: 105 | mask = self.conv_mask(input) 106 | mask = torch.sigmoid(mask) 107 | 108 | return ModulatedDeformConvFunction.apply(input, 109 | offset, 110 | mask, 111 | self.weight, 112 | self.bias, 113 | self.stride, 114 | self.padding, 115 | self.dilation, 116 | self.groups, 117 | self.deformable_groups, 118 | self.im2col_step) 119 | 120 | # class FeatureSelectionModule(nn.Module): 121 | # '''Feature Selection Module 122 | # 123 | # Fuse multiple information from different branches where each neurons take different receptive fields. 124 | # ''' 125 | # def __init__(self, dim_in, rot=False): 126 | # super(FeatureSelectionModule, self).__init__() 127 | # self.rot = rot 128 | # self._init_layers(dim_in) 129 | # 130 | # def make_branch_layer(self, dim_in, dim_out, kernel, padding): 131 | # """ Construct the structure of a branch. 132 | # 133 | # Conduct feature aggregation using rotation convolution layer and obtain 1-channel attention map for 134 | # subsequent feature fusion. 135 | # 136 | # :param dim_in: The channel of input feature. 137 | # :param dim_out: The channel of output feature. 138 | # :param kernel: THe kernel size of rotation convolution layer. 139 | # :param padding: The padding for convolution. 140 | # :return: Object of nn.Modulelist. 141 | # """ 142 | # if self.rot: 143 | # branch_fea = RotationConvLayer(dim_in, dim_out, kernel, stride=1, padding=padding,bias=False) 144 | # else: 145 | # branch_fea = nn.Conv2d(dim_in, dim_out,kernel_size=kernel, padding=padding) 146 | # branch_att = nn.Sequential(nn.ReLU(inplace=True), 147 | # nn.Conv2d(1, 1, kernel_size=3, stride=1, padding=1) 148 | # ) 149 | # 150 | # return nn.ModuleList([branch_fea, branch_att]) 151 | # 152 | # def _init_layers(self, dim_in): 153 | # # branch_ker = [(3, 3)] 154 | # # branch_pad = [(1, 1)] 155 | # branch_ker = [(3, 3), (3, 1),(1,3)] 156 | # branch_pad = [(1, 1), (1, 0),(0,1)] 157 | # 158 | # self.branches = nn.ModuleList() 159 | # for ker_size, pad in zip(branch_ker, branch_pad): 160 | # self.branches.append(self.make_branch_layer(dim_in//4, dim_in//4, ker_size, pad)) 161 | # self.conv_cmp = convolution(1, dim_in, dim_in//4) 162 | # self.conv_out = convolution(3, dim_in // 4, dim_in) 163 | # self.pi = np.pi 164 | # self._init_weights() 165 | # 166 | # def _init_weights(self): 167 | # for name, m in self.named_modules(): 168 | # if isinstance(m, nn.Conv2d): 169 | # nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu') 170 | # if isinstance(m, nn.BatchNorm2d): 171 | # nn.init.constant_(m.weight, 1.0) 172 | # nn.init.constant_(m.bias, 0) 173 | # 174 | # def forward(self, x, angle=None): 175 | # x_cmp = self.conv_cmp(x) 176 | # branch_fea = [] 177 | # branch_att = [] 178 | # for br in self.branches: 179 | # br_fea = br[0](x_cmp, angle=angle) 180 | # branch_fea.append(br_fea) 181 | # br_att = br[1](torch.mean(br_fea,dim=1,keepdim=True)) 182 | # branch_att.append(br_att) 183 | # 184 | # att =F.softmax(torch.cat(tuple(branch_att), dim=1), dim=1) 185 | # split_att = torch.split(att, 1, dim=1) 186 | # br_out = sum([fea * att for att, fea in zip(split_att, branch_fea)]) 187 | # return self.conv_out(br_out), att 188 | 189 | class FeatureSelectionModule(nn.Module): 190 | '''Feature Selection Module 191 | 192 | Fuse multiple information from different branches where each neurons take different receptive fields. 193 | ''' 194 | def __init__(self, dim_in, rot=False): 195 | super(FeatureSelectionModule, self).__init__() 196 | self.rot = rot 197 | self._init_layers(dim_in) 198 | 199 | def make_branch_layer(self, dim_in, dim_out, kernel, padding): 200 | """ Construct the structure of a branch. 201 | 202 | Conduct feature aggregation using rotation convolution layer and obtain 1-channel attention map for 203 | subsequent feature fusion. 204 | 205 | :param dim_in: The channel of input feature. 206 | :param dim_out: The channel of output feature. 207 | :param kernel: THe kernel size of rotation convolution layer. 208 | :param padding: The padding for convolution. 209 | :return: Object of nn.Modulelist. 210 | """ 211 | if self.rot: 212 | branch_fea = RotationConvLayer(dim_in, dim_out, kernel, stride=1, padding=padding,bias=False) 213 | else: 214 | branch_fea = nn.Conv2d(dim_in, dim_out,kernel_size=kernel, padding=padding) 215 | 216 | return nn.ModuleList([branch_fea]) 217 | 218 | def _init_layers(self, dim_in, red_r=4): 219 | # branch_ker = [(3, 3)] 220 | # branch_pad = [(1, 1)] 221 | branch_ker = [(3, 3), (3, 1),(1,3)] 222 | branch_pad = [(1, 1), (1, 0),(0,1)] 223 | self.split = int(len(branch_ker)) 224 | self.branches = nn.ModuleList() 225 | for ker_size, pad in zip(branch_ker, branch_pad): 226 | self.branches.extend(self.make_branch_layer(dim_in//4, dim_in//4, ker_size, pad)) 227 | self.conv_cmp = convolution(1, dim_in, dim_in//4) 228 | self.conv_out = convolution(3, dim_in // 4, dim_in) 229 | self.att = nn.Sequential( 230 | nn.AdaptiveAvgPool2d((1, 1)), 231 | convolution(1, dim_in//4, dim_in//4 //red_r, with_bn=True), 232 | nn.Conv2d(dim_in//4 //red_r, dim_in//4*self.split , 1) 233 | ) 234 | self.pi = np.pi 235 | self._init_weights() 236 | 237 | def _init_weights(self): 238 | for name, m in self.named_modules(): 239 | if isinstance(m, nn.Conv2d): 240 | nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu') 241 | 242 | def forward(self, x, angle=None): 243 | x_cmp = self.conv_cmp(x) 244 | branch_fea = [] 245 | for br in self.branches: 246 | br_fea = br(x_cmp, angle=angle) 247 | branch_fea.append(br_fea) 248 | att = self.att(sum(branch_fea)) 249 | b = att.size(0) 250 | att = att.view(b,-1,self.split,1) 251 | 252 | att =F.softmax(att, dim=2) 253 | split_att = torch.split(att, 1, dim=2) 254 | br_out = sum([fea * att for att, fea in zip(split_att, branch_fea)]) 255 | return self.conv_out(br_out), att 256 | -------------------------------------------------------------------------------- /rotation conv layer/test_rcl.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | import torch.nn as nn 4 | from rotation_conv_utils import RotationConvLayer 5 | import numpy as np 6 | 7 | 8 | kH = 3 9 | kW = 1 10 | kernel = (kH,kW) 11 | pH = 1 12 | pW = 0 13 | padding = (pH,pW) 14 | iH = iW = 3 15 | oH = (iH + 2 * pH - kH)//1 +1 16 | oW = (iW + 2 * pW - kW)//1 +1 17 | 18 | deformable_groups = 1 19 | N, inC, inH, inW = 1, 1, 3, 3 20 | outC = 1 21 | 22 | 23 | # check_mdconv_zero_offset() 24 | test_rcl = RotationConvLayer(1, 1, kernel, stride=1, padding=padding, bias=False).cuda() 25 | 26 | input = torch.arange(0,iH*iW).view(1,1,iH,iW).cuda().float() 27 | input[0,0,2,1] = 9 28 | input[0,0,1,2] = 10 29 | 30 | print('input') 31 | print(input.squeeze().data.cpu().numpy()) 32 | 33 | nn.init.constant_(test_rcl.weight, 1.0) 34 | nn.init.constant_(test_rcl.bias, 0.) 35 | angle = torch.zeros_like(input) 36 | # offset = [0,0,0,0,0,0,0,0,0,0] 37 | offset = [0,0,0,0,0,0] 38 | offset = torch.Tensor(offset).view(2*kH*kW,1) 39 | offset = offset.expand(2*kH*kW,oH*oW).contiguous().view(-1).view(1,2*kH*kW,oH,oW).cuda() 40 | mask = torch.ones(N,kH*kW,oH,oW).cuda() 41 | 42 | # conventional conv 43 | output = test_rcl(input, angle, offset, mask) 44 | print('output') 45 | print(output.squeeze().data.cpu().numpy()) 46 | 47 | # with rotation angle pi/2 via modify offset directly 48 | offset1 = [1,1,0,0,-1,-1] 49 | offset1= torch.Tensor(offset1).view(2*kH*kW,1) 50 | offset1 = offset1.expand(2*kH*kW,oH*oW).contiguous().view(1,2*kH*kW,oH,oW).cuda() 51 | output1 = test_rcl(input, angle, offset1, mask) 52 | print('output_.5pi_off') 53 | print(output1.squeeze().data.cpu().numpy()) 54 | 55 | angle1 = torch.ones_like(input)*np.pi*0.5 56 | output_half_pi = test_rcl(input, angle=angle1, mask=mask) 57 | print('output_.5pi') 58 | print(output_half_pi.squeeze().data.cpu().numpy()) 59 | 60 | angle2 = torch.ones_like(input)*np.pi*1.0 61 | output_pi = test_rcl(input, angle=angle2, mask=mask) 62 | print('output_.pi') 63 | print(output_pi.squeeze().data.cpu().numpy()) 64 | 65 | angle3 = torch.ones_like(input)*np.pi*1.5 66 | output_one_half_pi = test_rcl(input, angle=angle3, mask=mask) 67 | print('output_1.5pi') 68 | print(output_one_half_pi.squeeze().data.cpu().numpy()) 69 | 70 | print('done.') --------------------------------------------------------------------------------