├── .gitignore
├── LICENSE
├── README.md
├── data
├── 0000_rgb_raw.jpg
├── 0001_rgb_raw.jpg
├── all_select_2d_bboxes.mat
├── edge_detection
│ └── LSD
│ │ ├── 0000_edge.txt
│ │ ├── 0000_edges.jpg
│ │ ├── 0001_edge.txt
│ │ └── 0001_edges.jpg
└── frame_full_infos.mat
├── detect_cuboid.m
├── filter_match_2d_boxes.m
├── illustrations.pdf
├── init_setup.m
├── preprocessing
├── 2D_object_detect
│ ├── yolov2
│ │ ├── darknet.c
│ │ ├── detector.c
│ │ ├── image.c
│ │ └── image.h
│ └── yolov3
│ │ ├── darknet.c
│ │ ├── darknet.h
│ │ ├── detector.c
│ │ └── image.c
└── README.md
└── utils
├── VP_support_edge_infos.m
├── box_edge_alignment_angle_error.m
├── box_edge_sum_dists.m
├── box_edge_sum_dists2.m
├── change_2d_corner_to_3d_object.m
├── common_utils
├── bbox_overlap_ratio.m
├── check_inside_box.m
├── getVanishingPoints.m
├── get_wall_plane_equation.m
├── normalize_to_pi.m
├── plane_hits_3d.m
├── ray_plane_interact.m
└── smooth_jump_angles.m
├── cuboid_utils
├── compute3D_BoxCorner.m
├── get_cuboid_draw_edge_markers.m
├── get_object_edge_visibility.m
└── similarityTransformation.m
├── draw_utils
├── get_id_color.m
├── plot_image_with_cuboids.m
├── plot_image_with_edges.m
├── save_figure_to_img.m
└── subtightplot.m
├── fuse_normalize_scores.m
├── geometry_util
├── EulerZYX_to_Rot.m
├── EulerZYX_to_quat.m
├── PoseQuat_to_Mat.m
├── Rot_to_EulerZYX.m
├── Rot_to_quat.m
├── exptwist.m
├── homo_to_real_coord.m
├── quat_to_EulerZYX.m
├── quat_to_Rot.m
├── real_to_homo_coord.m
└── skew_matrix.m
└── line_utils
├── align_left_right_edges.m
├── lineSegmentIntersect.m
├── merge_break_lines_v2.m
├── merge_break_proj_lines.m
├── point_distproj_line.m
├── remove_short_lines.m
└── seg_hit_boundary.m
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | temp/
3 | data/notes.txt
4 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | ### License ###
2 | [This software is BSD licensed.](http://opensource.org/licenses/BSD-3-Clause)
3 |
4 | Copyright (c) 2018, Carnegie Mellon University
5 | All rights reserved.
6 |
7 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
10 |
11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
12 |
13 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
14 |
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
16 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Matlab cuboid detect
2 |
3 | Matlab code to detect cuboid object. There are two main parts: cuboid proposal generation, and proposal scoring.
4 |
5 | **NOTE** We also provide a C++ version of cuboid detection and multi-view object SLAM. Please see [cube_slam](https://github.com/shichaoy/cube_slam).
6 |
7 |
8 | **Authors:** [Shichao Yang](http://www.frc.ri.cmu.edu/~syang/)
9 |
10 | **Related Paper:**
11 |
12 | * **CubeSLAM: Monocular 3D Object Detection and SLAM without Prior Models**, Arxiv 2018, S. Yang, S. Scherer [**PDF**](https://arxiv.org/abs/1806.00557)
13 |
14 |
15 |
16 |
17 |
18 | ## How to run:
19 | ```git clone git@github.com:shichaoy/matlab_cuboid_detect.git```
20 |
21 | then open Matlab
22 |
23 | ```bash
24 | cd matlab_cuboid_detect
25 | init_setup.m
26 | detect_cuboid.m
27 | ```
28 |
29 |
30 |
31 | ### Notes
32 |
33 | 1. **Overview:** See ```illustrations.pdf``` for corner indexing in generating proposals and 3D coordinate system.
34 |
35 | 2. ```data/``` folder contains some preprocessing results. Edge detection is from this ros package [line_lbd](https://github.com/shichaoy/pop_up_slam/tree/master/line_lbd). The mat contains the 2D object bounding boxes. We use Yolo to detect 2D objects. Other similar methods can also be used. ```preprocessing/2D_object_detect``` is our prediction code to save images and txts. Sometimes there might be overlapping box of the same object instance. We need to filter and clean some detections. ```filter_match_2d_boxes.m``` for more details.
36 |
37 |
38 |
--------------------------------------------------------------------------------
/data/0000_rgb_raw.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shichaoy/matlab_cuboid_detect/74727788a0b7222546eafcf012f89e336ebb5611/data/0000_rgb_raw.jpg
--------------------------------------------------------------------------------
/data/0001_rgb_raw.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shichaoy/matlab_cuboid_detect/74727788a0b7222546eafcf012f89e336ebb5611/data/0001_rgb_raw.jpg
--------------------------------------------------------------------------------
/data/all_select_2d_bboxes.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shichaoy/matlab_cuboid_detect/74727788a0b7222546eafcf012f89e336ebb5611/data/all_select_2d_bboxes.mat
--------------------------------------------------------------------------------
/data/edge_detection/LSD/0000_edge.txt:
--------------------------------------------------------------------------------
1 | 467.435 0.526885 453.015 285.683
2 | 457.721 286.853 473.672 0.516197
3 | 486.396 0.507959 484.43 29.548
4 | 552.206 0.412684 540.56 119.369
5 | 559.175 202.957 582.687 0.429034
6 | 611.06 118.194 629.666 0.473482
7 | 633.842 0.356378 616.641 114.528
8 | 653.367 0.472589 622.693 203.438
9 | 631.611 205.585 663.051 0.0400169
10 | 681.564 5.99359 651.88 182.297
11 | 366.166 6.87097 366.446 44.3782
12 | 319.239 10.869 338.15 8.31004
13 | 583.837 100.716 595.578 9.20856
14 | 459.351 43.2217 461.929 10.5307
15 | 523.724 71.8136 529.516 10.4025
16 | 591.446 10.5717 580.248 100.733
17 | 598.506 14.4255 594.026 48.2444
18 | 604.86 61.7663 611.452 15.3872
19 | 209.715 23.0991 210.953 39.35
20 | 484.432 26.752 482.982 55.5548
21 | 214.878 33.0809 217.19 59.4569
22 | 688.617 58.5008 692.39 41.0057
23 | 428.131 41.8102 456.982 44.4654
24 | 455.571 120.69 459.684 45.5735
25 | 366.315 46.8746 366.247 135.625
26 | 624.932 115.907 635.756 46.5055
27 | 610.564 48.1185 608.291 69.3928
28 | 217.728 63.1586 220.482 95.6371
29 | 482.172 66.8154 471.138 243.235
30 | 509.563 223.357 522.322 75.4479
31 | 725.625 89.38 709.373 89.4109
32 | 605.044 90.5431 601.491 115.747
33 | 216.902 94.3732 218.511 118.099
34 | 586.657 100.251 582.84 115.234
35 | 221.277 101.816 224.773 140.589
36 | 533.75 101.917 532.498 120.667
37 | 578.315 118.149 568.363 198.31
38 | 579.781 144.471 581.239 119.338
39 | 590.672 124.588 610.533 120.209
40 | 712.299 210.706 729.459 120.88
41 | 453.294 148.031 455.58 121.762
42 | 539.807 126.794 530.908 217.026
43 | 528.362 144.38 528.69 128.111
44 | 220.272 133.159 225.28 185.658
45 | 573.252 200.349 579.674 145.223
46 | 404.183 155.851 445.618 148.086
47 | 326.824 174.092 453.226 151.183
48 | 375.711 161.135 403.27 156.487
49 | 238.067 185.304 351.964 164.87
50 | 351.968 167.397 283.174 179.651
51 | 223.612 188.224 221.488 171.925
52 | 284.153 181.946 325.583 174.153
53 | 651.905 182.83 714.521 189.185
54 | 266.806 182.726 236.833 187.88
55 | 391.876 185.719 354.353 186.137
56 | 235.561 190.214 268.245 185.147
57 | 409.48 188.755 389.215 185.321
58 | 239.074 194.309 259.438 189.649
59 | 398.078 200.424 413.686 196.775
60 | 91.9186 212.116 184.329 195.372
61 | 308.734 202.457 325.988 196.716
62 | 333.225 198.602 361.822 202.286
63 | 183.061 197.768 88.1926 214.752
64 | 168.331 203.044 193.22 198.662
65 | 361.84 202.225 398.083 200.811
66 | 180.821 208.749 198.18 203.3
67 | 188.084 204.203 171.867 208.091
68 | 235.224 213.154 287.956 204.587
69 | 557.011 204.703 534.261 214.098
70 | 723.162 216.595 634.328 204.73
71 | 289.048 205.872 301.56 222.418
72 | 531.322 219.32 561.247 206.643
73 | 675.559 207.547 693.136 209.266
74 | 63.0748 211.573 78.0763 209.082
75 | 169.429 209.795 154.356 211.726
76 | 156.88 218.154 200.803 210.383
77 | 326.11 211.092 340.37 217.447
78 | 51.7698 223.808 124.323 210.346
79 | 234.266 210.301 206.909 219.476
80 | 415.706 212.152 399.384 216.906
81 | 135.378 217.1 151.877 213.131
82 | 209.449 222.097 234.556 213.663
83 | 320.834 217.35 361.757 227.428
84 | 376.374 227.002 422.123 218.16
85 | 0.520666 228.804 83.0066 213.727
86 | 124.289 213.749 91.8874 218.215
87 | 85.5905 215.433 46.9644 222.372
88 | 159.339 215.403 115.529 222.535
89 | 187.991 215.103 150.695 224.649
90 | 399.373 217.965 340.63 218.573
91 | 90.7002 218.375 70.637 224.415
92 | 74.3259 225.384 109.396 218.23
93 | 130.559 222.521 155.401 219.815
94 | 191.907 219.56 175.712 222.375
95 | 0.451213 223.617 15.5187 220.161
96 | 111.894 220.744 90.5814 224.098
97 | 44.334 222.898 26.9479 226.03
98 | 35.5469 226.485 50.6903 223.451
99 | 109.459 226.177 129.381 223.162
100 | 150.628 225.45 128.134 225.08
101 | 415.38 224.888 404.198 246.81
102 | 506.871 225.615 486.613 233.719
103 | 0.562763 232.759 32.7703 227.287
104 | 44.3724 228.395 64.3827 228.59
105 | 124.347 227.322 139.367 228.247
106 | 18.0571 227.741 0.659563 230.82
107 | 472.513 244.177 511.211 227.806
108 | 329.752 254.334 327.026 229.496
109 | 54.4391 230.935 33.0693 235.356
110 | 80.6609 237.091 114.514 231.462
111 | 403.702 229.546 387.172 239.852
112 | 0.598427 236.682 18.1108 234.272
113 | 124.271 232.532 99.3804 236.906
114 | 16.9671 243.546 53.3886 235.58
115 | 99.2966 237.262 78.1077 239.184
116 | 199.536 258.142 199.228 238.127
117 | 78.0071 240.215 58.2071 245.91
118 | 518.44 251.241 540.84 239.796
119 | 41.8469 240.529 20.9324 246.672
120 | 394.596 242.336 380.489 249.09
121 | 403.33 247.134 387.885 259.397
122 | 111.809 253.956 141.855 249.249
123 | 412.935 270.616 413.856 251.85
124 | 211.384 294.408 208.6 253.177
125 | 155.999 260.663 173.143 255.686
126 | 387.796 259.665 369.865 265.807
127 | 170.712 264.683 188.224 259.725
128 | 199.631 261.854 202.973 303.137
129 | 205.085 303.167 201.854 261.877
130 | 76.883 263.709 58.1194 263.965
131 | 369.369 267.005 339.318 265.523
132 | 205.512 265.553 207.499 296.756
133 | 333.772 265.694 334.7 290.752
134 | 96.7386 273.954 118.171 267.018
135 | 130.67 272.008 111.94 278.319
136 | 274.769 282.226 293.318 277.659
137 | 70.6911 279.26 90.6961 278.094
138 | 24.4854 288.319 41.9951 278.336
139 | 90.6402 284.455 108.218 281.113
140 | 249.632 290.142 274.37 281.86
141 | 42.7921 282.392 26.9682 289.58
142 | 29.4174 291.968 50.7796 282.214
143 | 292.862 291.341 306.903 284.432
144 | 280.764 287.321 244.172 298.723
145 | 141.849 288.036 122.087 293.847
146 | 227.059 301.025 251.495 289.799
147 | 412.132 308.612 448.68 289.309
148 | 48.209 291.163 24.2618 294.9
149 | 26.8776 296.892 44.3562 294.25
150 | 42.8722 301.311 59.2037 293.993
151 | 286.879 294.386 263.823 302.587
152 | 308.115 296.176 288.099 295.813
153 | 48.2238 296.321 26.8719 299.353
154 | 121.76 302.721 143.391 296.561
155 | 343.224 303.859 318.419 299.095
156 | 110.761 298.526 94.6609 303.97
157 | 172.691 308.627 191.603 297.657
158 | 18.5526 306.697 33.2137 300.847
159 | 265.099 302.466 235.516 314.098
160 | 72.021 309.736 88.1578 303.206
161 | 96.9378 309.673 113.253 306.234
162 | 187.342 307.555 173.329 317.172
163 | 371.742 308.923 356.946 306.449
164 | 145.667 314.485 160.817 308.622
165 | 353.189 307.808 368.083 310.831
166 | 115.7 310.883 85.6903 319.6
167 | 407.991 368.187 411.21 310.588
168 | 141.85 313.058 125.497 319.023
169 | 148.047 320.282 170.524 315.179
170 | 181.661 325.229 201.845 314.32
171 | 268.527 317.955 252.62 323.875
172 | 118.147 331.93 147.22 320.232
173 | 166.396 317.977 99.4653 340.889
174 | 255.818 329.862 275.253 322.183
175 | 353.427 348.864 382.985 325.798
176 | 371.46 327.587 353.058 341.788
177 | 101.999 332.305 116.846 328.026
178 | 210.003 332.41 220.443 344.532
179 | 139.362 341.811 165.767 336.306
180 | 151.948 348.284 178.348 336.108
181 | 221.317 338.376 249.97 349.023
182 | 165.885 338.962 135.336 348.443
183 | 377.828 340.535 380.43 368.143
184 | 274.931 361.601 221.755 342.204
185 | 221.756 344.699 270.793 362.668
186 | 273.227 366.601 219.101 346.357
187 | 318.505 365.372 353.072 349.261
188 | 373.654 346.832 375.594 370.729
189 | 92.5139 348.182 72.131 358.625
190 | 249.036 349.081 275.897 358.608
191 | 369.644 375.594 366.469 348.172
192 | 85.5089 357.816 107.009 349.731
193 | 204.688 349.375 204.688 379.375
194 | 337.03 349.983 317.928 354.852
195 | 65.8805 351.939 52.2667 363.351
196 | 317.906 355.788 299.643 359.459
197 | 124.445 364.515 144.929 354.238
198 | 117.82 354.868 103.185 360.773
199 | 133.43 356.291 108.038 367.936
200 | 282.395 359.155 312.874 368.91
201 | 41.7406 357.74 16.7299 366.459
202 | 167.083 360.061 142.901 367.386
203 | 207.99 361.879 208.464 378.115
204 | 160.272 366.86 175.482 362.614
205 | 309.032 371.729 284.108 363.983
206 | 180.476 363.878 156.967 370.932
207 | 200.068 409.378 199.833 368.122
208 | 12.808 377.154 35.7229 369.675
209 | 394.442 380.968 408.261 369.539
210 | 40.9535 385.108 73.146 370.672
211 | 155.838 371.137 141.897 376.927
212 | 309.736 379.646 285.586 370.731
213 | 296.733 371.63 311.786 373.758
214 | 15.8621 384.852 40.3976 372.667
215 | 45.1382 372.532 31.2754 383.917
216 | 292.761 424.435 284.323 372.929
217 | 352.067 393.407 380.436 374.098
218 | 381.821 374.411 382.473 398.143
219 | 179.156 374.989 163.088 380.517
220 | 343.025 375.855 358.986 382.772
221 | 120.52 378.934 100.467 383.712
222 | 126.846 385.558 141.783 379.159
223 | 88.979 391.915 120.902 381.472
224 | 145.78 381.017 125.491 389.036
225 | 341.714 394.737 310.536 380.824
226 | 246.914 395.697 272.915 381.49
227 | 372.342 381.883 372.51 399.381
228 | 80.3992 385.001 61.8197 391.722
229 | 283.137 384.373 286.351 401.971
230 | 125.572 391.727 143.071 385.474
231 | 293.226 386.657 328.251 402.852
232 | 318.156 386.808 338.411 396.263
233 | 144.069 387.455 126.754 395.359
234 | 311.968 387.932 334.139 398.616
235 | 162.115 401.303 190.807 391.138
236 | 40.7415 392.187 19.6363 400.074
237 | 330.739 401.636 309.551 391.506
238 | 66.6326 406.39 90.1364 394.647
239 | 189.683 395.128 163.891 405.693
240 | 275.447 394.403 285.352 458.167
241 | 96.8818 396.89 63.0787 411.77
242 | 294.9 430.771 343.874 398.611
243 | 346.7 399.815 369.555 408.924
244 | 383.503 399.3 387.025 417.093
245 | 208.97 405.567 206.449 423.064
246 | 335.394 406.12 360.222 417.737
247 | 160.938 407.78 145.609 413.079
248 | 287.047 406.85 289.455 423.113
249 | 359.49 420.381 334.192 408.516
250 | 168.263 435.89 200.006 419.43
251 | 316.932 421.785 332.988 432.088
252 | 49.1224 421.463 35.7848 429.636
253 | 241.029 448.034 209.111 424.737
254 | 213.232 431.718 281.758 478.297
255 | 302.22 479.315 294.211 433.154
256 | 291.3 434.48 299.309 478.137
257 | 380.394 435.032 364.173 441.356
258 | 148.32 447.251 168.174 436.97
259 | 145.798 439.716 127.363 449.085
260 | 273.161 480.573 215.386 439.713
261 | 219.058 439.825 275.469 479.596
262 | 293.727 464.272 290.074 443.005
263 | 132.383 454.155 147.436 446.728
264 | 286.71 478.37 241.881 448.115
265 | 278.357 479.043 235.741 449.209
266 | 55.4982 471.694 73.1043 459.345
267 | 105.429 468.944 122.763 461.078
268 | 106.692 461.497 90.0913 469.521
269 | 87.7399 478.692 104.881 469.022
270 | 60.1669 493.292 85.4947 479.142
271 | 6.32532 522.105 43.2938 502.188
272 |
--------------------------------------------------------------------------------
/data/edge_detection/LSD/0000_edges.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shichaoy/matlab_cuboid_detect/74727788a0b7222546eafcf012f89e336ebb5611/data/edge_detection/LSD/0000_edges.jpg
--------------------------------------------------------------------------------
/data/edge_detection/LSD/0001_edge.txt:
--------------------------------------------------------------------------------
1 | 92.4007 1.61188 104.699 10.6374
2 | 164.624 29.3484 161.53 0.394724
3 | 299.78 70.4832 296.361 0.406112
4 | 366.19 0.61799 366.412 58.1364
5 | 373.845 0.622262 374.068 59.3762
6 | 376.984 89.3657 377.711 0.611381
7 | 406.691 0.622117 406.619 99.3748
8 | 415.58 0.569976 413.47 98.1595
9 | 456.267 0.618081 456.011 23.1294
10 | 457.413 64.4022 460.642 0.56265
11 | 465.103 0.416984 460.609 65.7099
12 | 465.603 99.4446 471.207 0.587147
13 | 481.539 0.518461 474.896 99.41
14 | 509.444 96.9964 518.331 0.413908
15 | 540.282 0.461016 534.093 60.5959
16 | 566.611 115.59 581.879 0.459785
17 | 591.104 0.340577 573.662 124.275
18 | 359.936 19.3799 359.82 3.12182
19 | 251.358 4.30607 253.605 28.1974
20 | 402.188 54.375 402.188 4.375
21 | 433.157 58.1778 434.278 4.39904
22 | 338.026 56.9344 335.768 5.61872
23 | 176.793 10.4528 196.867 11.9758
24 | 568.137 9.87018 549.368 10.3356
25 | 616.205 74.679 627.047 9.40362
26 | 189.891 39.7176 214.22 12.7397
27 | 212.699 12.0674 233.656 36.4185
28 | 567.422 25.7018 569.559 10.4754
29 | 547.699 11.9605 546.145 26.929
30 | 196.896 15.188 176.882 14.224
31 | 488.127 15.7231 504.398 15.3287
32 | 170.605 17.4282 191.872 18.1995
33 | 364.164 24.1131 349.179 22.7016
34 | 348.139 29.8182 364.384 30.093
35 | 450.025 31.8175 446.89 64.4964
36 | 360.647 56.8818 360.515 33.1187
37 | 441.114 33.1162 440.811 59.3771
38 | 234.005 37.0582 249.897 55.0188
39 | 171.496 61.4463 191.838 40.6183
40 | 624.766 48.1954 620.938 69.4314
41 | 195.408 58.1252 179.824 67.6131
42 | 255.805 58.0032 261.032 119.34
43 | 374.255 59.5413 300.571 63.3276
44 | 253.273 60.0022 238.227 63.5572
45 | 399.468 59.3417 398.64 88.1039
46 | 430.76 101.93 432.447 59.2987
47 | 234.769 69.2538 254.295 62.8795
48 | 195.628 65.691 223.184 64.2697
49 | 335.626 64.3964 369.376 63.1469
50 | 462.696 62.9887 459.722 99.6063
51 | 178.694 74.1208 196.506 66.0605
52 | 533.752 64.1786 528.614 113.176
53 | 371.891 67.0977 299.314 72.2692
54 | 402.188 88.125 402.188 68.125
55 | 175.96 96.0353 170.853 69.0947
56 | 453.432 99.6146 455.886 70.8607
57 | 619.317 72.3295 643.124 73.1454
58 | 200.248 74.8557 179.468 80.9427
59 | 673.137 73.7126 728.142 74.738
60 | 235.431 78.2 254.486 75.0477
61 | 728.149 78.0799 620.616 76.1228
62 | 179.187 84.3054 231.866 76.8108
63 | 255.655 82.2112 239.425 83.6799
64 | 342.984 88.8903 366.071 82.1556
65 | 213.107 89.0956 179.335 91.2487
66 | 125.939 94.9672 107.268 86.052
67 | 327.221 93.3235 342.541 89.6616
68 | 372.901 90.1747 398.011 92.9175
69 | 399.422 89.5388 379.351 88.6765
70 | 194.415 95.9083 230.646 90.7715
71 | 360.54 122.234 352.896 93.7653
72 | 374.818 94.923 381.992 125.001
73 | 382.706 118.136 382.965 98.1068
74 | 510.641 99.7256 429.329 103.328
75 | 526.598 108.611 508.15 98.0812
76 | 405.645 101.338 421.873 101.93
77 | 179.849 131.798 174.96 101.578
78 | 352.052 109.914 319.367 106.961
79 | 461.933 113.572 526.857 111.345
80 | 211.908 112.016 239.459 112.846
81 | 258.143 110.773 241.983 112.756
82 | 421.868 112.37 461.895 112.916
83 | 239.521 112.878 255.393 122.267
84 | 526.942 114.616 568.176 117.356
85 | 316.869 120.067 343.101 119.767
86 | 461.898 116.975 431.745 123.803
87 | 544.449 123.318 463.072 117.626
88 | 300.53 121.636 315.708 120.664
89 | 256.911 122.953 289.416 121.871
90 | 390.607 120.891 414.417 122.496
91 | 571.907 125.205 545.429 123.162
92 | 320.606 122.923 301.788 124.711
93 | 260.324 124.853 266.034 143.384
94 | 288.14 125.026 264.374 125.562
95 | 293.643 159.521 291.051 124.344
96 | 298.884 124.405 300.946 158.182
97 | 415.723 127.311 396.8 131.541
98 | 529.516 148.453 579.891 126.826
99 | 344.403 133.28 326.885 133.624
100 | 310.195 153.341 308.708 133.174
101 | 321.806 138.979 358.022 141.902
102 | 571.846 138.473 728.26 151.501
103 | 183.273 155.774 181.025 139.152
104 | 387.007 142.042 463.343 151.335
105 | 496.963 143.734 527.972 147.99
106 | 728.461 160.918 559.363 144.495
107 | 486.683 160.926 391.744 149.187
108 | 157.979 148.555 143.007 151.212
109 | 281.911 149.544 267.052 152.707
110 | 350.391 148.431 214.452 182.185
111 | 527.482 149.573 545.629 154.36
112 | 464.183 151.247 489.431 153.833
113 | 126.96 153.647 79.2935 161.372
114 | 199.384 180.978 191.677 153.893
115 | 545.964 154.32 605.815 169.893
116 | 472.668 196.666 491.293 156.035
117 | 412.092 161.339 356.867 178.097
118 | 75.5289 162.48 54.1936 165.656
119 | 401.919 170.748 424.236 162.737
120 | 208.132 174.357 197.465 163.78
121 | 66.9403 170.152 48.0089 171.742
122 | 86.8546 171.173 68.0841 170.474
123 | 108.258 182.505 168.216 169.806
124 | 48.2369 171.363 26.7236 174.627
125 | 101.568 175.596 86.8683 171.902
126 | 165.752 173.734 133.102 180.516
127 | 25.7435 174.959 0.757854 180.03
128 | 328.852 196.647 387.03 176.062
129 | 609.998 175.385 592.588 220.898
130 | 30.6538 179.598 51.8723 176.854
131 | 457.964 185.16 475.87 178.969
132 | 351.922 179.532 330.349 185.947
133 | 132.676 180.827 109.33 185.394
134 | 111.952 202.194 196.856 181.794
135 | 214.223 182.534 199.251 186.393
136 | 300.67 183.158 283.147 183.767
137 | 315.074 194.987 301.264 182.554
138 | 110.031 199.493 107.066 184.586
139 | 200.946 185.674 173.28 192.502
140 | 224.661 287.175 457.389 185.195
141 | 281.111 185.174 294.948 200.094
142 | 455.213 198.638 475.457 189.022
143 | 162.703 195.218 112.756 207.924
144 | 331.789 201.618 350.538 195.363
145 | 312.904 202.47 328.265 197.292
146 | 290.879 200.055 269.368 208.107
147 | 427.9 211.364 446.365 203.219
148 | 465.405 231.601 470.527 204.124
149 | 709.362 208.221 690.617 205.683
150 | 214.359 210.432 240.534 208.272
151 | 367.922 213.66 349.26 218.973
152 | 246.884 220.654 264.22 215.137
153 | 253.336 220.175 267.008 226.592
154 | 377.246 234.147 403.736 221.952
155 | 591.353 220.931 566.843 279.362
156 | 194.578 228.713 211.755 222.778
157 | 51.2457 224.926 105.764 243.978
158 | 453.13 227.652 415.634 227.26
159 | 414.477 228.464 396.701 233.794
160 | 399.291 240.07 456.617 231.413
161 | 156.778 232.818 134.522 239.842
162 | 254.498 236.321 218.277 242.735
163 | 220.599 245.498 250.862 239.276
164 | 345.582 238.021 326.941 245.784
165 | 436.726 238.18 451.478 277.807
166 | 70.5992 244.465 54.2511 239.81
167 | 425.685 239.689 409.314 242.806
168 | 440.26 282.865 427.54 239.547
169 | 484.744 268.181 467.031 239.279
170 | 302.308 270.201 359.562 242.258
171 | 199.53 242.266 218.345 245.655
172 | 47.0989 249.358 65.69 245.983
173 | 106.158 244.152 224.769 287.036
174 | 161.809 242.922 144.507 248.532
175 | 413.093 244.373 411.012 279.324
176 | 25.2054 266.325 47.4578 249.36
177 | 202.685 253.312 227.631 257.393
178 | 227.879 256.53 249.389 253.216
179 | 414.856 275.656 416.294 253.168
180 | 225.463 261.604 198.09 257.084
181 | 124.485 259.134 144.334 268.214
182 | 271.997 274.633 303.362 259.879
183 | 118.246 261.392 70.3605 268.949
184 | 145.538 274.596 114.852 262.601
185 | 260.699 260.854 244.161 266.215
186 | 210.686 262.051 188.997 269.528
187 | 477.717 270.534 462.085 265.029
188 | 448.073 264.55 490.859 277.344
189 | 494.428 275.496 478.78 269.037
190 | 69.7093 269.288 34.0613 279.533
191 | 8.63186 281.325 24.8626 269.572
192 | 290.476 274.969 306.471 271.342
193 | 395.747 277.036 360.626 273.112
194 | 194.577 280.099 175.728 272.857
195 | 131.301 282.639 146.661 277.487
196 | 264.059 288.863 289.893 276.727
197 | 358.06 275.973 420.499 287.555
198 | 33.7254 279.481 12.8381 288.727
199 | 243.746 286.896 260.83 278.544
200 | 429.401 282.998 414.272 279.873
201 | 567.967 279.831 562.161 309.674
202 | 534.446 281.358 506.843 284.047
203 | 159.133 281.95 174.588 331.809
204 | 220.566 289.533 199.469 281.623
205 | 463.237 282.367 480.725 284.944
206 | 14.4962 349.626 0.272279 285.703
207 | 175.707 355.98 154.21 285.295
208 | 174.35 285.692 214.309 300.8
209 | 229.401 294.429 243.516 287.694
210 | 228.383 306.169 263.787 289.384
211 | 497.28 287.267 478.322 294.867
212 | 489.13 305.918 551.687 289.889
213 | 10.296 290.943 14.4074 311.373
214 | 131.357 302.742 112.223 291.293
215 | 192.125 305.408 179.952 291.374
216 | 199.263 295.645 203.333 318.087
217 | 309.369 297.111 293.127 297.206
218 | 213.216 300.411 227.565 306.499
219 | 206.146 357.015 195.859 303.32
220 | 139.107 304.437 146.118 334.838
221 | 78.806 323.95 131.774 306.568
222 | 139.548 330.837 134.585 305.584
223 | 563.084 309.096 568.048 330.643
224 | 195.392 318.173 200.834 344.332
225 | 188.348 319.328 196.811 359.123
226 | 25.5232 323.27 65.5836 326.193
227 | 62.8697 325.469 79.6997 322.413
228 | 405.542 353.189 380.389 320.808
229 | 204.722 323.076 207.002 339.357
230 | 16.9669 327.172 25.8599 361.815
231 | 174.681 332.142 185.808 371.469
232 | 20.1757 369.878 14.3652 350.258
233 | 428.159 374.34 407.15 354.09
234 | 179.254 379.388 176.727 356.614
235 | 527.324 360.928 506.256 368.607
236 | 215.26 367.243 199.619 373.715
237 | 96.8795 369.452 78.0477 370.552
238 | 29.188 371.912 32.975 390.904
239 | 60.5506 372.313 34.2919 374.717
240 | 214.842 404.294 209.189 371.908
241 | 37.2862 433.921 20.9065 373.957
242 | 199.603 374.033 205.208 398.222
243 | 490.409 385.881 539.552 375.186
244 | 543.047 374.379 543.931 393.205
245 | 203.899 399.055 165.078 379.195
246 | 207.823 381.93 211.943 404.362
247 | 161.723 383.751 177.862 393.207
248 | 428.184 394.728 457.977 389.742
249 | 526.874 389.373 502.873 398.728
250 | 33.4414 391.478 40.4367 419.106
251 | 390.551 401.493 419.428 395.896
252 | 181.968 397.981 199.459 409.245
253 | 424.476 397.417 384.467 404.869
254 | 495.564 396.972 532.529 420.319
255 | 384.114 407.965 429.188 399.617
256 | 476.623 403.074 468.266 444.403
257 | 468.219 407.985 391.879 414.426
258 | 275.272 443.678 219.45 408.008
259 | 455.77 476.645 469.431 410.896
260 | 563.823 451.695 486.321 411.698
261 | 209.933 417.769 225.596 423.209
262 | 530.627 421.083 565.323 442.367
263 | 233.767 420.98 249.822 432.503
264 | 443.14 422.383 418.125 423.132
265 | 413.125 423.125 394.354 423.787
266 | 225.957 423.785 245.002 434.512
267 | 504.298 437.081 485.405 429.96
268 | 246.682 432.702 272.221 448.828
269 | 477.922 436.829 464.063 498.054
270 | 533.572 440.975 555.1 451.664
271 | 578.466 445.563 576.92 468.298
272 | 448.772 498.602 439.45 486.816
273 |
--------------------------------------------------------------------------------
/data/edge_detection/LSD/0001_edges.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shichaoy/matlab_cuboid_detect/74727788a0b7222546eafcf012f89e336ebb5611/data/edge_detection/LSD/0001_edges.jpg
--------------------------------------------------------------------------------
/data/frame_full_infos.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shichaoy/matlab_cuboid_detect/74727788a0b7222546eafcf012f89e336ebb5611/data/frame_full_infos.mat
--------------------------------------------------------------------------------
/filter_match_2d_boxes.m:
--------------------------------------------------------------------------------
1 | % file to filter and clean 2D object detections. There might be duplicated 2d bbox of the same object instance. remove one of them.
2 | % can also be used to associate 2D bouding box with the ground truth 2D bbox
3 |
4 | % This file doesn't use 3D cuboids!! Only based on 2d bounding box!!
5 |
6 | clear;
7 | close all;
8 |
9 | dataset_name = 'cabinet'; % 'cabinet'
10 | switch dataset_name
11 | case 'cabinet' % one sample dataset
12 | data_proc_root_dir = 'path/to/TUM_RGBD/fr3_cabinet/'; % set up path here.
13 | yolo_det_thre = 0.15;
14 | end
15 |
16 |
17 | % important parameters
18 | filtering_object_prob = 0.15; % delete objects smaller than this
19 | whether_remove_close_boundary_box = false;
20 | whether_remove_overlapping_box = true;
21 |
22 |
23 | data_yolo_objdec_dir = [data_proc_root_dir 'yolov2_obj_txts/']; % this is the output of raw 2d object detections
24 |
25 | saved_img_filter_match_2d_boxes_all_dir = [data_proc_root_dir 'filter_match_2d_boxes/']; % saved image folder
26 | saved_select_2dbox_match_mat_name = [data_proc_root_dir 'mats/select_2d_boxes_and_matches.mat']; % saved mat name
27 | if (~exist(saved_img_filter_match_2d_boxes_all_dir)) mkdir(saved_img_filter_match_2d_boxes_all_dir); end % create folder if not having
28 | if (~exist([data_proc_root_dir 'mats'])) mkdir([data_proc_root_dir 'mats']); end
29 |
30 | write_box_into_imgs = true; whether_plot_images=false; % true false
31 | save_box_into_mat = false;
32 |
33 | object_colors={'red','green','blue','cyan','magenta','yellow','red','green','blue','cyan'};
34 |
35 |
36 | %% read yolo 2D object detection, remove some bad, overlapped boxes. all in 2D space.
37 | whether_reselect_yolo_2d_boxes = true;
38 | if (whether_reselect_yolo_2d_boxes)
39 | total_img_num = size(dir([data_proc_root_dir 'raw_imgs/' '*.jpg']),1);
40 | all_select_2d_bboxes = cell(total_img_num,1);
41 | all_select_2dboxes_classes = cell(total_img_num,1);
42 | for frame_index = 0:total_img_num-1
43 | if (mod(frame_index,100)==0)
44 | disp(['Processing image ',num2str(frame_index)]);
45 | end
46 |
47 | %% read input images
48 | rgb_img = imread([data_proc_root_dir 'raw_imgs/' sprintf('%04d_rgb_raw.jpg',frame_index)]);
49 | im_width = size(rgb_img,2); im_height = size(rgb_img,1);
50 |
51 | % read 2D object detection
52 | obj_bbox_structs = importdata([data_yolo_objdec_dir sprintf('%04d_yolo2_%.2f.txt',frame_index,yolo_det_thre)]); % [x1 y1 x_width y_height prob] x1 y1 is top-left coordinate
53 |
54 | if (size(obj_bbox_structs,1)>0) % if found objects...
55 | obj_bbox_class_raw=obj_bbox_structs.textdata; obj_bbox_coors_raw=obj_bbox_structs.data;obj_bbox_coors_raw(:,1:2)=obj_bbox_coors_raw(:,1:2)+1; % +1 as c++ cnn index starts at 0
56 |
57 | if ( sum(sum(obj_bbox_coors_raw<=0))>0 )
58 | disp(['Finding negative coordinate.... ',num2str(frame_index)]);
59 | end
60 |
61 | % check object localtion not exceed image width and height
62 | x_exceed = (obj_bbox_coors_raw(:,1)+obj_bbox_coors_raw(:,3))>im_width;
63 | y_exceed = (obj_bbox_coors_raw(:,2)+obj_bbox_coors_raw(:,4))>im_height;
64 | if any(x_exceed)
65 | bad_x_ind = find(x_exceed);
66 | for jj=bad_x_ind' % row vec
67 | fprintf('X exceeds max width at Frame %04d, line %d, exceed %d\n',frame_index, jj, obj_bbox_coors_raw(jj,1)+obj_bbox_coors_raw(jj,3)-im_width);
68 | end
69 | obj_bbox_coors_raw(bad_x_ind,3) = im_width - obj_bbox_coors_raw(bad_x_ind,1);
70 | end
71 | if any(y_exceed)
72 | bad_y_ind = find(y_exceed);
73 | for jj=bad_y_ind'
74 | fprintf('Y exceeds max height at Frame %04d, line %d, exceed %d\n',frame_index, jj, obj_bbox_coors_raw(jj,2)+obj_bbox_coors_raw(jj,4)-im_height);
75 | end
76 | obj_bbox_coors_raw(bad_y_ind,4) = im_height - obj_bbox_coors_raw(bad_y_ind,2);
77 | end
78 | else
79 | obj_bbox_class_raw=cell(0,1);obj_bbox_coors_raw=zeros(0,5);
80 | end
81 |
82 | if(whether_plot_images)
83 | figure(10);clf;imshow(rgb_img);title('Raw Detected 2D object');hold on;
84 | if (size(obj_bbox_structs,1)>0) % if found objects...
85 | for id=1:size(obj_bbox_coors_raw,1)
86 | rectangle('Position',obj_bbox_coors_raw(id,1:4),'EdgeColor',object_colors{id},'LineWidth',2);
87 | text(obj_bbox_coors_raw(id,1)+5,obj_bbox_coors_raw(id,2)-10, sprintf('%s---%.2f',obj_bbox_class_raw{id},obj_bbox_coors_raw(id,5)),'Color',object_colors{id})
88 | % pause();
89 | end
90 | pause(0.5);
91 | end
92 | end
93 | obj_bbox_coors = obj_bbox_coors_raw;
94 | obj_bbox_class = obj_bbox_class_raw;
95 |
96 |
97 | % Reject some object bounding box.
98 | obj_bbox_coors_s1 = obj_bbox_coors_raw(obj_bbox_coors_raw(:,5)>filtering_object_prob,:); % delete object with low prob
99 | obj_bbox_class_s1 = obj_bbox_class_raw(obj_bbox_coors_raw(:,5)>filtering_object_prob,:);
100 | if (whether_remove_close_boundary_box) % if it is left or right close to boundary, delete it
101 | close_bound_ids = [];
102 | close_bound_margin = 10;
103 | for i=1:size(obj_bbox_coors_s1,1)
104 | obj_rect = obj_bbox_coors_s1(i,1:4);
105 | if ((obj_rect(1)im_width-close_bound_margin) || (obj_rect(2)im_height-close_bound_margin) )
106 | close_bound_ids = [close_bound_ids;i];
107 | end
108 | end
109 | obj_bbox_coors_s1(close_bound_ids,:)=[];
110 | obj_bbox_class_s1(close_bound_ids,:)=[];
111 | end
112 |
113 | obj_bbox_coors = obj_bbox_coors_s1;
114 | obj_bbox_class = obj_bbox_class_s1;
115 |
116 | % most important part. if two bbox overlapping is large, remove one of them, with low prob. if prob is similar, remove small area one.
117 | if ( whether_remove_overlapping_box )
118 | can_force_merge = 1;
119 | counter = 0;
120 | delete_ids=[];
121 | while ( (can_force_merge==1) && (counter<500))
122 | counter=counter+1;
123 | can_force_merge=0;
124 | for id1=1:(size(obj_bbox_coors_s1,1)-1)
125 | for id2=(id1+1):size(obj_bbox_coors_s1,1)
126 | [overlap_1,overlap_2]= bbox_overlap_ratio(obj_bbox_coors_s1(id1,1:4),obj_bbox_coors_s1(id2,1:4));
127 | if (overlap_1>0.5 || overlap_2>0.5) % large overlap , need to delete one.
128 | if (obj_bbox_coors_s1(id1,5)-obj_bbox_coors_s1(id2,5)>0.20) % obj1 prob is better
129 | delete_ids=id2;
130 | elseif (obj_bbox_coors_s1(id2,5)-obj_bbox_coors_s1(id1,5)>0.20) % obj2 prob is better
131 | delete_ids=id1;
132 | else % if prob is similar. remove the one that being overlapped more...
133 | if overlap_1>overlap_2
134 | delete_ids=id1;
135 | else
136 | delete_ids=id2;
137 | end
138 | end
139 | obj_bbox_coors_s1(delete_ids,:)=[];
140 | obj_bbox_class_s1(delete_ids,:)=[];
141 | can_force_merge=1;
142 | break;
143 | end
144 | end
145 | if (can_force_merge==1)
146 | break;
147 | end
148 | end
149 | end
150 |
151 | if(whether_plot_images)
152 | figure(15);clf;imshow(rgb_img);title('Selected non-overlap 2D object');hold on;
153 | if (size(obj_bbox_coors_s1,1)>0)
154 | for id=1:size(obj_bbox_coors_s1,1)
155 | rectangle('Position',obj_bbox_coors_s1(id,1:4),'EdgeColor',object_colors{id},'LineWidth',2);
156 | text(obj_bbox_coors_s1(id,1)+5,obj_bbox_coors_s1(id,2)-10, sprintf('%s---%.2f',obj_bbox_class_s1{id},obj_bbox_coors_s1(id,5)),'Color',object_colors{id})
157 | end
158 | pause(0.5);
159 | end
160 | end
161 |
162 | obj_bbox_coors = obj_bbox_coors_s1;
163 | obj_bbox_class = obj_bbox_class_s1;
164 |
165 | if (size(obj_bbox_coors,1)==0)
166 | fprintf('Not found valid object of frame %d \n',frame_index);
167 | end
168 | end
169 | if (write_box_into_imgs) % don't need plot, directly generate images
170 | rgb_img_cp=rgb_img;
171 | if (size(obj_bbox_coors,1)>0)
172 | for id=1:size(obj_bbox_coors,1)
173 | box_position = int32(obj_bbox_coors(id,1:4));rgb_img_cp = insertShape(rgb_img_cp, 'Rectangle',box_position,'LineWidth',3,'Color',get_id_color(id));
174 | end
175 | end
176 | imwrite(rgb_img_cp,[saved_img_filter_match_2d_boxes_all_dir sprintf('%04d',frame_index) '_yolo_2dobj_select.jpg'])
177 | end
178 |
179 | all_select_2d_bboxes{frame_index+1} = obj_bbox_coors;
180 | all_select_2dboxes_classes{frame_index+1} = obj_bbox_class;
181 | % if(whether_plot_images)
182 | % pause();
183 | % end
184 | end
185 | if (save_box_into_mat)
186 | save(saved_select_2dbox_match_mat_name,'all_select_2d_bboxes','all_select_2dboxes_classes')
187 | end
188 | else
189 | load(saved_select_2dbox_match_mat_name);
190 | end
191 |
192 |
193 | % save the selected boxes to txt. matlab coordinate index. not raw c++ index
194 | if (0)
195 | %load(saved_select_2dbox_match_mat_name);
196 | saved_img_filter_match_2d_boxes_txts_all_dir = [data_proc_root_dir 'mats/filter_match_2d_boxes_txts/']; % save txt folder
197 | if (~exist(saved_img_filter_match_2d_boxes_txts_all_dir))
198 | mkdir(saved_img_filter_match_2d_boxes_txts_all_dir);
199 | end
200 | for frame_index=0:length(all_select_2d_bboxes)-1
201 | if (mod(frame_index,10)==0)
202 | disp(['Processing image ',num2str(frame_index)]);
203 | end
204 | obj_bbox_coors = all_select_2d_bboxes{frame_index+1};
205 | obj_bbox_class = all_select_2dboxes_classes{frame_index+1};
206 |
207 | save_selected_obj_txt_name = [saved_img_filter_match_2d_boxes_txts_all_dir sprintf('%04d_yolo2_%.2f.txt',frame_index,yolo_det_thre)];
208 | fid_2 = fopen(save_selected_obj_txt_name, 'w');
209 | for line_id = 1:size(obj_bbox_coors,1)
210 | for j = 1:4
211 | fprintf(fid_2,'%d\t',obj_bbox_coors(line_id,j));
212 | end
213 | fprintf(fid_2,'%.2f\n',obj_bbox_coors(line_id,end));
214 | end
215 | fclose(fid_2);
216 | end
217 | end
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 | %% Another similar function as above. Match selected 2D yolo bounding box with ground truth bounding box, based on overlapping ratio.
227 | % need to prepare many things: ground truth objects. filtered predicted 2d bounding box.
228 | whether_rematch_2d_boxes = false;
229 | save_box_into_imgs = false; whether_plot_images = false; % true false
230 | save_box_into_mat = false;
231 | if (whether_rematch_2d_boxes)
232 | all_pred_matched_truths = cell(total_img_num,1); % the row of each cell is the selected yolo object number
233 | frame_find_matches = zeros(total_img_num,1);
234 | for frame_index=0:(total_img_num-1)
235 | % close all;
236 | if mod(frame_index,100)==0
237 | disp(['Processing image ',num2str(frame_index)]);
238 | end
239 |
240 | % load ground truth boxes
241 | frame_calib_mat = frame_full_infos(frame_index+1);
242 | if (strcmp(dataset_name,'kitti_object'))
243 | truth_object_num = length(frame_calib_mat.truth_objects);
244 | end
245 |
246 | if (truth_object_num==0)
247 | % disp(['zero truth box ',num2str(frame_index)]);
248 | end
249 |
250 | % load predicted object
251 | pred_objs = all_select_2d_bboxes{frame_index+1};
252 |
253 | rgb_img = imread(frame_calib_mat.rgbpath);
254 | if (whether_plot_images) % plot truth bbox sun rgbd data format.
255 | rgb_img = imread(frame_calib_mat.rgbpath);
256 | figure(1); imshow(rgb_img); hold on;
257 | for kk =1:length(frame_calib_mat.groundtruth3DBB_tight)
258 | rectangle('Position', [frame_calib_mat.groundtruth3DBB_tight(kk).gtBb2D(1) frame_calib_mat.groundtruth3DBB_tight(kk).gtBb2D(2) frame_calib_mat.groundtruth3DBB_tight(kk).gtBb2D(3) frame_calib_mat.groundtruth3DBB_tight(kk).gtBb2D(4)],'edgecolor','y');
259 | text(frame_calib_mat.groundtruth3DBB_tight(kk).gtBb2D(1),frame_calib_mat.groundtruth3DBB_tight(kk).gtBb2D(2),frame_calib_mat.groundtruth3DBB_tight(kk).classname,'BackgroundColor','y')
260 | end
261 | pause(0.5);
262 | if (save_box_into_imgs)
263 | edge3d_img_title = sprintf('%d, truth 2D boxes',frame_index);
264 | saved_3d_img_name = [saved_img_filter_match_2d_boxes_all_dir sprintf('%04d_truth_box',frame_index)];
265 | save_figure_to_img(1, edge3d_img_title, saved_3d_img_name);
266 | end
267 | end
268 |
269 | if (whether_plot_images) % plot predict bbox
270 | figure(15);imshow(rgb_img);title('Predicted 2D boxes');hold on;
271 | for object_id = 1:size(pred_objs,1) % for each yolo detected object
272 | rectangle('Position',pred_objs(object_id,1:4),'EdgeColor',object_colors{object_id},'LineWidth',2); % randomize image color?
273 | end
274 | pause(0.5);
275 | if (save_box_into_img)
276 | edge3d_img_title = sprintf('%d, pred 2D boxes',frame_index);
277 | saved_3d_img_name = [saved_img_filter_match_2d_boxes_all_dir sprintf('%04d_pred_box',frame_index)];
278 | save_figure_to_img(15, edge3d_img_title, saved_3d_img_name);
279 | end
280 | end
281 |
282 | % for each predicted object,
283 | truth_used = zeros(truth_object_num,1);
284 | pred_matched_truth = ones(pred_object_num,2)*(-1); % even if there is no 3D cuboid samples, give it -1
285 | iou_2d_thre = 0.7; % 0.5 nearly the same
286 | for object_id = 1:pred_object_num % for each yolo detected object
287 | yolo_2d_rect = pred_objs(object_id,1:4);
288 |
289 | overlapRatios = zeros(truth_object_num,1);
290 | for kk =1:truth_object_num
291 | if (size(frame_calib_mat.groundtruth3DBB_tight(kk).gtBb2D,1)>0)
292 | overlapRatios(kk) = bboxOverlapRatio(yolo_2d_rect,frame_calib_mat.groundtruth3DBB_tight(kk).gtBb2D);
293 | end
294 | end
295 | [max_iou,max_id]=max(overlapRatios);
296 | if (max_iou>iou_2d_thre)
297 | if (~truth_used(max_id)) % the truth object is not assigned to other cuboids
298 | pred_matched_truth(object_id,:)=[max_id max_iou]; % start from 1
299 | truth_used(max_id)=1;
300 | else
301 | % disp(['truth object used ',num2str(frame_index)]);
302 | end
303 | else
304 | % disp(['max iou small ',num2str(frame_index),' IoU ',num2str(max_iou)]);
305 | end
306 | end
307 | all_pred_matched_truths{frame_index+1} = pred_matched_truth;
308 |
309 | % % if this is no matched pair
310 | if ~(any(pred_matched_truth(:,1)>0))
311 | disp(['No matching pair ',num2str(frame_index)]);
312 | else
313 | frame_find_matches(frame_index+1)=1;
314 | end
315 |
316 | % draw dotted rect on it (with probability), same color with the matched ones, then save the image, test to see each image.
317 | if (false)
318 | if (frame_find_matches(frame_index+1)==1) % if has matches!!!
319 | rgb_img = imread(frame_calib_mat.rgbpath);
320 | rgb_img_cp = rgb_img;
321 | for object_id = 1:pred_object_num % for each yolo detected object
322 | if (pred_matched_truth(object_id,1)>0)
323 | rgb_img_cp = insertShape(rgb_img_cp, 'Rectangle',pred_objs(object_id,1:4),'LineWidth',3,'Color',object_colors{mod(object_id,10)+1});
324 | rgb_img_cp = insertShape(rgb_img_cp, 'Rectangle',frame_calib_mat.groundtruth3DBB_tight(pred_matched_truth(object_id,1)).gtBb2D,'LineWidth',1,'Color',object_colors{mod(object_id,10)+1});
325 | end
326 | end
327 | % pause(0.5);
328 |
329 | if (save_box_into_img)
330 | % could also plot then save images, but slow. see previous example.
331 | saved_3d_img_name = [saved_img_filter_match_2d_boxes_all_dir sprintf('%04d_%.2f_matched_box.jpg',frame_index,iou_2d_thre)];
332 | imwrite(rgb_img_cp,saved_3d_img_name);
333 | end
334 | end
335 | end
336 | end
337 | if (save_box_into_mat)
338 | save(saved_select_2dbox_match_mat_name,'all_select_2d_bboxes','all_select_2dboxes_classes','all_pred_matched_truths','frame_find_matches')
339 | end
340 | % else
341 | % load(saved_select_2dbox_match_mat_name);
342 | end
343 |
344 |
345 | % save as truth matches into txt
346 | if (0)
347 | for frame_index=0:size(all_select_2dboxes,1)-1
348 | pred_truth_matches = all_pred_matched_truths{frame_index+1};
349 | gmu_2d_box_match_txt = [data_proc_root_dir 'mats/gmu_2dboxes_matches_txts/' sprintf('%04d_truth_matches.txt',frame_index)];
350 | dlmwrite(gmu_2d_box_match_txt,pred_truth_matches,'delimiter','\t','precision',3);
351 | end
352 | end
353 |
--------------------------------------------------------------------------------
/illustrations.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shichaoy/matlab_cuboid_detect/74727788a0b7222546eafcf012f89e336ebb5611/illustrations.pdf
--------------------------------------------------------------------------------
/init_setup.m:
--------------------------------------------------------------------------------
1 | %% Set up the workspace with relevant paths (and additional packages that this system needs)
2 | addpath(genpath(strcat(pwd)));
3 | addpath(genpath(strcat(pwd,'/utils')));
4 | addpath(genpath(strcat(pwd,'/utils/geometry_util')));
5 | addpath(genpath(strcat(pwd,'/utils/line_utils')));
6 | addpath(genpath(strcat(pwd,'/utils/common_utils')));
7 | addpath(genpath(strcat(pwd,'/utils/draw_utils')));
8 | addpath(genpath(strcat(pwd,'/utils/cuboid_utils')));
9 |
--------------------------------------------------------------------------------
/preprocessing/2D_object_detect/yolov2/darknet.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "parser.h"
6 | #include "utils.h"
7 | #include "cuda.h"
8 | #include "blas.h"
9 | #include "connected_layer.h"
10 |
11 | #ifdef OPENCV
12 | #include "opencv2/highgui/highgui_c.h"
13 | #endif
14 |
15 | extern void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top);
16 | extern void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh);
17 | extern void test_detector_folder(char *datacfg, char *cfgfile, char *weightfile, char *input_folder, char *output_folder, float thresh, float hier_thresh);
18 | extern void run_voxel(int argc, char **argv);
19 | extern void run_yolo(int argc, char **argv);
20 | extern void run_detector(int argc, char **argv);
21 | extern void run_coco(int argc, char **argv);
22 | extern void run_writing(int argc, char **argv);
23 | extern void run_captcha(int argc, char **argv);
24 | extern void run_nightmare(int argc, char **argv);
25 | extern void run_dice(int argc, char **argv);
26 | extern void run_compare(int argc, char **argv);
27 | extern void run_classifier(int argc, char **argv);
28 | extern void run_char_rnn(int argc, char **argv);
29 | extern void run_vid_rnn(int argc, char **argv);
30 | extern void run_tag(int argc, char **argv);
31 | extern void run_cifar(int argc, char **argv);
32 | extern void run_go(int argc, char **argv);
33 | extern void run_art(int argc, char **argv);
34 | extern void run_super(int argc, char **argv);
35 |
36 | void average(int argc, char *argv[])
37 | {
38 | char *cfgfile = argv[2];
39 | char *outfile = argv[3];
40 | gpu_index = -1;
41 | network net = parse_network_cfg(cfgfile);
42 | network sum = parse_network_cfg(cfgfile);
43 |
44 | char *weightfile = argv[4];
45 | load_weights(&sum, weightfile);
46 |
47 | int i, j;
48 | int n = argc - 5;
49 | for(i = 0; i < n; ++i){
50 | weightfile = argv[i+5];
51 | load_weights(&net, weightfile);
52 | for(j = 0; j < net.n; ++j){
53 | layer l = net.layers[j];
54 | layer out = sum.layers[j];
55 | if(l.type == CONVOLUTIONAL){
56 | int num = l.n*l.c*l.size*l.size;
57 | axpy_cpu(l.n, 1, l.biases, 1, out.biases, 1);
58 | axpy_cpu(num, 1, l.weights, 1, out.weights, 1);
59 | if(l.batch_normalize){
60 | axpy_cpu(l.n, 1, l.scales, 1, out.scales, 1);
61 | axpy_cpu(l.n, 1, l.rolling_mean, 1, out.rolling_mean, 1);
62 | axpy_cpu(l.n, 1, l.rolling_variance, 1, out.rolling_variance, 1);
63 | }
64 | }
65 | if(l.type == CONNECTED){
66 | axpy_cpu(l.outputs, 1, l.biases, 1, out.biases, 1);
67 | axpy_cpu(l.outputs*l.inputs, 1, l.weights, 1, out.weights, 1);
68 | }
69 | }
70 | }
71 | n = n+1;
72 | for(j = 0; j < net.n; ++j){
73 | layer l = sum.layers[j];
74 | if(l.type == CONVOLUTIONAL){
75 | int num = l.n*l.c*l.size*l.size;
76 | scal_cpu(l.n, 1./n, l.biases, 1);
77 | scal_cpu(num, 1./n, l.weights, 1);
78 | if(l.batch_normalize){
79 | scal_cpu(l.n, 1./n, l.scales, 1);
80 | scal_cpu(l.n, 1./n, l.rolling_mean, 1);
81 | scal_cpu(l.n, 1./n, l.rolling_variance, 1);
82 | }
83 | }
84 | if(l.type == CONNECTED){
85 | scal_cpu(l.outputs, 1./n, l.biases, 1);
86 | scal_cpu(l.outputs*l.inputs, 1./n, l.weights, 1);
87 | }
88 | }
89 | save_weights(sum, outfile);
90 | }
91 |
92 | void speed(char *cfgfile, int tics)
93 | {
94 | if (tics == 0) tics = 1000;
95 | network net = parse_network_cfg(cfgfile);
96 | set_batch_network(&net, 1);
97 | int i;
98 | time_t start = time(0);
99 | image im = make_image(net.w, net.h, net.c);
100 | for(i = 0; i < tics; ++i){
101 | network_predict(net, im.data);
102 | }
103 | double t = difftime(time(0), start);
104 | printf("\n%d evals, %f Seconds\n", tics, t);
105 | printf("Speed: %f sec/eval\n", t/tics);
106 | printf("Speed: %f Hz\n", tics/t);
107 | }
108 |
109 | void operations(char *cfgfile)
110 | {
111 | gpu_index = -1;
112 | network net = parse_network_cfg(cfgfile);
113 | int i;
114 | long ops = 0;
115 | for(i = 0; i < net.n; ++i){
116 | layer l = net.layers[i];
117 | if(l.type == CONVOLUTIONAL){
118 | ops += 2l * l.n * l.size*l.size*l.c * l.out_h*l.out_w;
119 | } else if(l.type == CONNECTED){
120 | ops += 2l * l.inputs * l.outputs;
121 | }
122 | }
123 | printf("Floating Point Operations: %ld\n", ops);
124 | printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.);
125 | }
126 |
127 | void oneoff(char *cfgfile, char *weightfile, char *outfile)
128 | {
129 | gpu_index = -1;
130 | network net = parse_network_cfg(cfgfile);
131 | int oldn = net.layers[net.n - 2].n;
132 | int c = net.layers[net.n - 2].c;
133 | scal_cpu(oldn*c, .1, net.layers[net.n - 2].weights, 1);
134 | scal_cpu(oldn, 0, net.layers[net.n - 2].biases, 1);
135 | net.layers[net.n - 2].n = 9418;
136 | net.layers[net.n - 2].biases += 5;
137 | net.layers[net.n - 2].weights += 5*c;
138 | if(weightfile){
139 | load_weights(&net, weightfile);
140 | }
141 | net.layers[net.n - 2].biases -= 5;
142 | net.layers[net.n - 2].weights -= 5*c;
143 | net.layers[net.n - 2].n = oldn;
144 | printf("%d\n", oldn);
145 | layer l = net.layers[net.n - 2];
146 | copy_cpu(l.n/3, l.biases, 1, l.biases + l.n/3, 1);
147 | copy_cpu(l.n/3, l.biases, 1, l.biases + 2*l.n/3, 1);
148 | copy_cpu(l.n/3*l.c, l.weights, 1, l.weights + l.n/3*l.c, 1);
149 | copy_cpu(l.n/3*l.c, l.weights, 1, l.weights + 2*l.n/3*l.c, 1);
150 | *net.seen = 0;
151 | save_weights(net, outfile);
152 | }
153 |
154 | void partial(char *cfgfile, char *weightfile, char *outfile, int max)
155 | {
156 | gpu_index = -1;
157 | network net = parse_network_cfg(cfgfile);
158 | if(weightfile){
159 | load_weights_upto(&net, weightfile, max);
160 | }
161 | *net.seen = 0;
162 | save_weights_upto(net, outfile, max);
163 | }
164 |
165 | #include "convolutional_layer.h"
166 | void rescale_net(char *cfgfile, char *weightfile, char *outfile)
167 | {
168 | gpu_index = -1;
169 | network net = parse_network_cfg(cfgfile);
170 | if(weightfile){
171 | load_weights(&net, weightfile);
172 | }
173 | int i;
174 | for(i = 0; i < net.n; ++i){
175 | layer l = net.layers[i];
176 | if(l.type == CONVOLUTIONAL){
177 | rescale_weights(l, 2, -.5);
178 | break;
179 | }
180 | }
181 | save_weights(net, outfile);
182 | }
183 |
184 | void rgbgr_net(char *cfgfile, char *weightfile, char *outfile)
185 | {
186 | gpu_index = -1;
187 | network net = parse_network_cfg(cfgfile);
188 | if(weightfile){
189 | load_weights(&net, weightfile);
190 | }
191 | int i;
192 | for(i = 0; i < net.n; ++i){
193 | layer l = net.layers[i];
194 | if(l.type == CONVOLUTIONAL){
195 | rgbgr_weights(l);
196 | break;
197 | }
198 | }
199 | save_weights(net, outfile);
200 | }
201 |
202 | void reset_normalize_net(char *cfgfile, char *weightfile, char *outfile)
203 | {
204 | gpu_index = -1;
205 | network net = parse_network_cfg(cfgfile);
206 | if (weightfile) {
207 | load_weights(&net, weightfile);
208 | }
209 | int i;
210 | for (i = 0; i < net.n; ++i) {
211 | layer l = net.layers[i];
212 | if (l.type == CONVOLUTIONAL && l.batch_normalize) {
213 | denormalize_convolutional_layer(l);
214 | }
215 | if (l.type == CONNECTED && l.batch_normalize) {
216 | denormalize_connected_layer(l);
217 | }
218 | if (l.type == GRU && l.batch_normalize) {
219 | denormalize_connected_layer(*l.input_z_layer);
220 | denormalize_connected_layer(*l.input_r_layer);
221 | denormalize_connected_layer(*l.input_h_layer);
222 | denormalize_connected_layer(*l.state_z_layer);
223 | denormalize_connected_layer(*l.state_r_layer);
224 | denormalize_connected_layer(*l.state_h_layer);
225 | }
226 | }
227 | save_weights(net, outfile);
228 | }
229 |
230 | layer normalize_layer(layer l, int n)
231 | {
232 | int j;
233 | l.batch_normalize=1;
234 | l.scales = calloc(n, sizeof(float));
235 | for(j = 0; j < n; ++j){
236 | l.scales[j] = 1;
237 | }
238 | l.rolling_mean = calloc(n, sizeof(float));
239 | l.rolling_variance = calloc(n, sizeof(float));
240 | return l;
241 | }
242 |
243 | void normalize_net(char *cfgfile, char *weightfile, char *outfile)
244 | {
245 | gpu_index = -1;
246 | network net = parse_network_cfg(cfgfile);
247 | if(weightfile){
248 | load_weights(&net, weightfile);
249 | }
250 | int i;
251 | for(i = 0; i < net.n; ++i){
252 | layer l = net.layers[i];
253 | if(l.type == CONVOLUTIONAL && !l.batch_normalize){
254 | net.layers[i] = normalize_layer(l, l.n);
255 | }
256 | if (l.type == CONNECTED && !l.batch_normalize) {
257 | net.layers[i] = normalize_layer(l, l.outputs);
258 | }
259 | if (l.type == GRU && l.batch_normalize) {
260 | *l.input_z_layer = normalize_layer(*l.input_z_layer, l.input_z_layer->outputs);
261 | *l.input_r_layer = normalize_layer(*l.input_r_layer, l.input_r_layer->outputs);
262 | *l.input_h_layer = normalize_layer(*l.input_h_layer, l.input_h_layer->outputs);
263 | *l.state_z_layer = normalize_layer(*l.state_z_layer, l.state_z_layer->outputs);
264 | *l.state_r_layer = normalize_layer(*l.state_r_layer, l.state_r_layer->outputs);
265 | *l.state_h_layer = normalize_layer(*l.state_h_layer, l.state_h_layer->outputs);
266 | net.layers[i].batch_normalize=1;
267 | }
268 | }
269 | save_weights(net, outfile);
270 | }
271 |
272 | void statistics_net(char *cfgfile, char *weightfile)
273 | {
274 | gpu_index = -1;
275 | network net = parse_network_cfg(cfgfile);
276 | if (weightfile) {
277 | load_weights(&net, weightfile);
278 | }
279 | int i;
280 | for (i = 0; i < net.n; ++i) {
281 | layer l = net.layers[i];
282 | if (l.type == CONNECTED && l.batch_normalize) {
283 | printf("Connected Layer %d\n", i);
284 | statistics_connected_layer(l);
285 | }
286 | if (l.type == GRU && l.batch_normalize) {
287 | printf("GRU Layer %d\n", i);
288 | printf("Input Z\n");
289 | statistics_connected_layer(*l.input_z_layer);
290 | printf("Input R\n");
291 | statistics_connected_layer(*l.input_r_layer);
292 | printf("Input H\n");
293 | statistics_connected_layer(*l.input_h_layer);
294 | printf("State Z\n");
295 | statistics_connected_layer(*l.state_z_layer);
296 | printf("State R\n");
297 | statistics_connected_layer(*l.state_r_layer);
298 | printf("State H\n");
299 | statistics_connected_layer(*l.state_h_layer);
300 | }
301 | printf("\n");
302 | }
303 | }
304 |
305 | void denormalize_net(char *cfgfile, char *weightfile, char *outfile)
306 | {
307 | gpu_index = -1;
308 | network net = parse_network_cfg(cfgfile);
309 | if (weightfile) {
310 | load_weights(&net, weightfile);
311 | }
312 | int i;
313 | for (i = 0; i < net.n; ++i) {
314 | layer l = net.layers[i];
315 | if (l.type == CONVOLUTIONAL && l.batch_normalize) {
316 | denormalize_convolutional_layer(l);
317 | net.layers[i].batch_normalize=0;
318 | }
319 | if (l.type == CONNECTED && l.batch_normalize) {
320 | denormalize_connected_layer(l);
321 | net.layers[i].batch_normalize=0;
322 | }
323 | if (l.type == GRU && l.batch_normalize) {
324 | denormalize_connected_layer(*l.input_z_layer);
325 | denormalize_connected_layer(*l.input_r_layer);
326 | denormalize_connected_layer(*l.input_h_layer);
327 | denormalize_connected_layer(*l.state_z_layer);
328 | denormalize_connected_layer(*l.state_r_layer);
329 | denormalize_connected_layer(*l.state_h_layer);
330 | l.input_z_layer->batch_normalize = 0;
331 | l.input_r_layer->batch_normalize = 0;
332 | l.input_h_layer->batch_normalize = 0;
333 | l.state_z_layer->batch_normalize = 0;
334 | l.state_r_layer->batch_normalize = 0;
335 | l.state_h_layer->batch_normalize = 0;
336 | net.layers[i].batch_normalize=0;
337 | }
338 | }
339 | save_weights(net, outfile);
340 | }
341 |
342 | void visualize(char *cfgfile, char *weightfile)
343 | {
344 | network net = parse_network_cfg(cfgfile);
345 | if(weightfile){
346 | load_weights(&net, weightfile);
347 | }
348 | visualize_network(net);
349 | #ifdef OPENCV
350 | cvWaitKey(0);
351 | #endif
352 | }
353 |
354 | int main(int argc, char **argv)
355 | {
356 | //test_resize("data/bad.jpg");
357 | //test_box();
358 | //test_convolutional_layer();
359 | if(argc < 2){
360 | fprintf(stderr, "usage: %s \n", argv[0]);
361 | return 0;
362 | }
363 | gpu_index = find_int_arg(argc, argv, "-i", 0);
364 | if(find_arg(argc, argv, "-nogpu")) {
365 | gpu_index = -1;
366 | }
367 |
368 | #ifndef GPU
369 | gpu_index = -1;
370 | #else
371 | if(gpu_index >= 0){
372 | cuda_set_device(gpu_index);
373 | }
374 | #endif
375 |
376 | if (0 == strcmp(argv[1], "average")){
377 | average(argc, argv);
378 | } else if (0 == strcmp(argv[1], "yolo")){
379 | run_yolo(argc, argv);
380 | } else if (0 == strcmp(argv[1], "voxel")){
381 | run_voxel(argc, argv);
382 | } else if (0 == strcmp(argv[1], "super")){
383 | run_super(argc, argv);
384 | } else if (0 == strcmp(argv[1], "detector")){
385 | run_detector(argc, argv);
386 | } else if (0 == strcmp(argv[1], "detect")){
387 | float thresh = find_float_arg(argc, argv, "-thresh", .24);
388 | char *filename = (argc > 4) ? argv[4]: 0;
389 | test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, .5);
390 | } else if (0 == strcmp(argv[1], "detect_folder")){
391 | float thresh = find_float_arg(argc, argv, "-thresh", .24); // default threshold is 0.24
392 | char *filename = (argc > 4) ? argv[4]: 0;
393 | char *output_folder = (argc > 5) ? argv[5]: 0;
394 | test_detector_folder("cfg/coco.data", argv[2], argv[3], filename, output_folder, thresh, .5);
395 | } else if (0 == strcmp(argv[1], "cifar")){
396 | run_cifar(argc, argv);
397 | } else if (0 == strcmp(argv[1], "go")){
398 | run_go(argc, argv);
399 | } else if (0 == strcmp(argv[1], "rnn")){
400 | run_char_rnn(argc, argv);
401 | } else if (0 == strcmp(argv[1], "vid")){
402 | run_vid_rnn(argc, argv);
403 | } else if (0 == strcmp(argv[1], "coco")){
404 | run_coco(argc, argv);
405 | } else if (0 == strcmp(argv[1], "classify")){
406 | predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5);
407 | } else if (0 == strcmp(argv[1], "classifier")){
408 | run_classifier(argc, argv);
409 | } else if (0 == strcmp(argv[1], "art")){
410 | run_art(argc, argv);
411 | } else if (0 == strcmp(argv[1], "tag")){
412 | run_tag(argc, argv);
413 | } else if (0 == strcmp(argv[1], "compare")){
414 | run_compare(argc, argv);
415 | } else if (0 == strcmp(argv[1], "dice")){
416 | run_dice(argc, argv);
417 | } else if (0 == strcmp(argv[1], "writing")){
418 | run_writing(argc, argv);
419 | } else if (0 == strcmp(argv[1], "3d")){
420 | composite_3d(argv[2], argv[3], argv[4], (argc > 5) ? atof(argv[5]) : 0);
421 | } else if (0 == strcmp(argv[1], "test")){
422 | test_resize(argv[2]);
423 | } else if (0 == strcmp(argv[1], "captcha")){
424 | run_captcha(argc, argv);
425 | } else if (0 == strcmp(argv[1], "nightmare")){
426 | run_nightmare(argc, argv);
427 | } else if (0 == strcmp(argv[1], "rgbgr")){
428 | rgbgr_net(argv[2], argv[3], argv[4]);
429 | } else if (0 == strcmp(argv[1], "reset")){
430 | reset_normalize_net(argv[2], argv[3], argv[4]);
431 | } else if (0 == strcmp(argv[1], "denormalize")){
432 | denormalize_net(argv[2], argv[3], argv[4]);
433 | } else if (0 == strcmp(argv[1], "statistics")){
434 | statistics_net(argv[2], argv[3]);
435 | } else if (0 == strcmp(argv[1], "normalize")){
436 | normalize_net(argv[2], argv[3], argv[4]);
437 | } else if (0 == strcmp(argv[1], "rescale")){
438 | rescale_net(argv[2], argv[3], argv[4]);
439 | } else if (0 == strcmp(argv[1], "ops")){
440 | operations(argv[2]);
441 | } else if (0 == strcmp(argv[1], "speed")){
442 | speed(argv[2], (argc > 3 && argv[3]) ? atoi(argv[3]) : 0);
443 | } else if (0 == strcmp(argv[1], "oneoff")){
444 | oneoff(argv[2], argv[3], argv[4]);
445 | } else if (0 == strcmp(argv[1], "partial")){
446 | partial(argv[2], argv[3], argv[4], atoi(argv[5]));
447 | } else if (0 == strcmp(argv[1], "average")){
448 | average(argc, argv);
449 | } else if (0 == strcmp(argv[1], "visualize")){
450 | visualize(argv[2], (argc > 3) ? argv[3] : 0);
451 | } else if (0 == strcmp(argv[1], "imtest")){
452 | test_resize(argv[2]);
453 | } else {
454 | fprintf(stderr, "Not an option: %s\n", argv[1]);
455 | }
456 | return 0;
457 | }
458 |
459 |
--------------------------------------------------------------------------------
/preprocessing/2D_object_detect/yolov2/detector.c:
--------------------------------------------------------------------------------
1 | #include "network.h"
2 | #include "region_layer.h"
3 | #include "cost_layer.h"
4 | #include "utils.h"
5 | #include "parser.h"
6 | #include "box.h"
7 | #include "demo.h"
8 | #include "option_list.h"
9 | #include
10 | #ifdef OPENCV
11 | #include "opencv2/highgui/highgui_c.h"
12 | #include
13 | #include
14 | #endif
15 | static int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90};
16 |
17 | void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear)
18 | {
19 | list *options = read_data_cfg(datacfg);
20 | char *train_images = option_find_str(options, "train", "data/train.list");
21 | char *backup_directory = option_find_str(options, "backup", "/backup/");
22 |
23 | srand(time(0));
24 | char *base = basecfg(cfgfile);
25 | printf("%s\n", base);
26 | float avg_loss = -1;
27 | network *nets = calloc(ngpus, sizeof(network));
28 |
29 | srand(time(0));
30 | int seed = rand();
31 | int i;
32 | for(i = 0; i < ngpus; ++i){
33 | srand(seed);
34 | #ifdef GPU
35 | cuda_set_device(gpus[i]);
36 | #endif
37 | nets[i] = parse_network_cfg(cfgfile);
38 | if(weightfile){
39 | load_weights(&nets[i], weightfile);
40 | }
41 | if(clear) *nets[i].seen = 0;
42 | nets[i].learning_rate *= ngpus;
43 | }
44 | srand(time(0));
45 | network net = nets[0];
46 |
47 | int imgs = net.batch * net.subdivisions * ngpus;
48 | printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
49 | data train, buffer;
50 |
51 | layer l = net.layers[net.n - 1];
52 |
53 | int classes = l.classes;
54 | float jitter = l.jitter;
55 |
56 | list *plist = get_paths(train_images);
57 | //int N = plist->size;
58 | char **paths = (char **)list_to_array(plist);
59 |
60 | load_args args = {0};
61 | args.w = net.w;
62 | args.h = net.h;
63 | args.paths = paths;
64 | args.n = imgs;
65 | args.m = plist->size;
66 | args.classes = classes;
67 | args.jitter = jitter;
68 | args.num_boxes = l.max_boxes;
69 | args.d = &buffer;
70 | args.type = DETECTION_DATA;
71 | args.threads = 8;
72 |
73 | args.angle = net.angle;
74 | args.exposure = net.exposure;
75 | args.saturation = net.saturation;
76 | args.hue = net.hue;
77 |
78 | pthread_t load_thread = load_data(args);
79 | clock_t time;
80 | int count = 0;
81 | //while(i*imgs < N*120){
82 | while(get_current_batch(net) < net.max_batches){
83 | if(l.random && count++%10 == 0){
84 | printf("Resizing\n");
85 | int dim = (rand() % 10 + 10) * 32;
86 | if (get_current_batch(net)+200 > net.max_batches) dim = 608;
87 | //int dim = (rand() % 4 + 16) * 32;
88 | printf("%d\n", dim);
89 | args.w = dim;
90 | args.h = dim;
91 |
92 | pthread_join(load_thread, 0);
93 | train = buffer;
94 | free_data(train);
95 | load_thread = load_data(args);
96 |
97 | for(i = 0; i < ngpus; ++i){
98 | resize_network(nets + i, dim, dim);
99 | }
100 | net = nets[0];
101 | }
102 | time=clock();
103 | pthread_join(load_thread, 0);
104 | train = buffer;
105 | load_thread = load_data(args);
106 |
107 | /*
108 | int k;
109 | for(k = 0; k < l.max_boxes; ++k){
110 | box b = float_to_box(train.y.vals[10] + 1 + k*5);
111 | if(!b.x) break;
112 | printf("loaded: %f %f %f %f\n", b.x, b.y, b.w, b.h);
113 | }
114 | image im = float_to_image(448, 448, 3, train.X.vals[10]);
115 | int k;
116 | for(k = 0; k < l.max_boxes; ++k){
117 | box b = float_to_box(train.y.vals[10] + 1 + k*5);
118 | printf("%d %d %d %d\n", truth.x, truth.y, truth.w, truth.h);
119 | draw_bbox(im, b, 8, 1,0,0);
120 | }
121 | save_image(im, "truth11");
122 | */
123 |
124 | printf("Loaded: %lf seconds\n", sec(clock()-time));
125 |
126 | time=clock();
127 | float loss = 0;
128 | #ifdef GPU
129 | if(ngpus == 1){
130 | loss = train_network(net, train);
131 | } else {
132 | loss = train_networks(nets, ngpus, train, 4);
133 | }
134 | #else
135 | loss = train_network(net, train);
136 | #endif
137 | if (avg_loss < 0) avg_loss = loss;
138 | avg_loss = avg_loss*.9 + loss*.1;
139 |
140 | i = get_current_batch(net);
141 | printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs);
142 | if(i%1000==0 || (i < 1000 && i%100 == 0)){
143 | #ifdef GPU
144 | if(ngpus != 1) sync_nets(nets, ngpus, 0);
145 | #endif
146 | char buff[256];
147 | sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
148 | save_weights(net, buff);
149 | }
150 | free_data(train);
151 | }
152 | #ifdef GPU
153 | if(ngpus != 1) sync_nets(nets, ngpus, 0);
154 | #endif
155 | char buff[256];
156 | sprintf(buff, "%s/%s_final.weights", backup_directory, base);
157 | save_weights(net, buff);
158 | }
159 |
160 |
161 | static int get_coco_image_id(char *filename)
162 | {
163 | char *p = strrchr(filename, '_');
164 | return atoi(p+1);
165 | }
166 |
167 | static void print_cocos(FILE *fp, char *image_path, box *boxes, float **probs, int num_boxes, int classes, int w, int h)
168 | {
169 | int i, j;
170 | int image_id = get_coco_image_id(image_path);
171 | for(i = 0; i < num_boxes; ++i){
172 | float xmin = boxes[i].x - boxes[i].w/2.;
173 | float xmax = boxes[i].x + boxes[i].w/2.;
174 | float ymin = boxes[i].y - boxes[i].h/2.;
175 | float ymax = boxes[i].y + boxes[i].h/2.;
176 |
177 | if (xmin < 0) xmin = 0;
178 | if (ymin < 0) ymin = 0;
179 | if (xmax > w) xmax = w;
180 | if (ymax > h) ymax = h;
181 |
182 | float bx = xmin;
183 | float by = ymin;
184 | float bw = xmax - xmin;
185 | float bh = ymax - ymin;
186 |
187 | for(j = 0; j < classes; ++j){
188 | if (probs[i][j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, probs[i][j]);
189 | }
190 | }
191 | }
192 |
193 | void print_detector_detections(FILE **fps, char *id, box *boxes, float **probs, int total, int classes, int w, int h)
194 | {
195 | int i, j;
196 | for(i = 0; i < total; ++i){
197 | float xmin = boxes[i].x - boxes[i].w/2.;
198 | float xmax = boxes[i].x + boxes[i].w/2.;
199 | float ymin = boxes[i].y - boxes[i].h/2.;
200 | float ymax = boxes[i].y + boxes[i].h/2.;
201 |
202 | if (xmin < 0) xmin = 0;
203 | if (ymin < 0) ymin = 0;
204 | if (xmax > w) xmax = w;
205 | if (ymax > h) ymax = h;
206 |
207 | for(j = 0; j < classes; ++j){
208 | if (probs[i][j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, probs[i][j],
209 | xmin, ymin, xmax, ymax);
210 | }
211 | }
212 | }
213 |
214 | void print_imagenet_detections(FILE *fp, int id, box *boxes, float **probs, int total, int classes, int w, int h)
215 | {
216 | int i, j;
217 | for(i = 0; i < total; ++i){
218 | float xmin = boxes[i].x - boxes[i].w/2.;
219 | float xmax = boxes[i].x + boxes[i].w/2.;
220 | float ymin = boxes[i].y - boxes[i].h/2.;
221 | float ymax = boxes[i].y + boxes[i].h/2.;
222 |
223 | if (xmin < 0) xmin = 0;
224 | if (ymin < 0) ymin = 0;
225 | if (xmax > w) xmax = w;
226 | if (ymax > h) ymax = h;
227 |
228 | for(j = 0; j < classes; ++j){
229 | int class = j;
230 | if (probs[i][class]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j+1, probs[i][class],
231 | xmin, ymin, xmax, ymax);
232 | }
233 | }
234 | }
235 |
236 | void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *outfile)
237 | {
238 | int j;
239 | list *options = read_data_cfg(datacfg);
240 | char *valid_images = option_find_str(options, "valid", "data/train.list");
241 | char *name_list = option_find_str(options, "names", "data/names.list");
242 | char *prefix = option_find_str(options, "results", "results");
243 | char **names = get_labels(name_list);
244 | char *mapf = option_find_str(options, "map", 0);
245 | int *map = 0;
246 | if (mapf) map = read_map(mapf);
247 |
248 | network net = parse_network_cfg(cfgfile);
249 | if(weightfile){
250 | load_weights(&net, weightfile);
251 | }
252 | set_batch_network(&net, 1);
253 | fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
254 | srand(time(0));
255 |
256 | list *plist = get_paths(valid_images);
257 | char **paths = (char **)list_to_array(plist);
258 |
259 | layer l = net.layers[net.n-1];
260 | int classes = l.classes;
261 |
262 | char buff[1024];
263 | char *type = option_find_str(options, "eval", "voc");
264 | FILE *fp = 0;
265 | FILE **fps = 0;
266 | int coco = 0;
267 | int imagenet = 0;
268 | if(0==strcmp(type, "coco")){
269 | if(!outfile) outfile = "coco_results";
270 | snprintf(buff, 1024, "%s/%s.json", prefix, outfile);
271 | fp = fopen(buff, "w");
272 | fprintf(fp, "[\n");
273 | coco = 1;
274 | } else if(0==strcmp(type, "imagenet")){
275 | if(!outfile) outfile = "imagenet-detection";
276 | snprintf(buff, 1024, "%s/%s.txt", prefix, outfile);
277 | fp = fopen(buff, "w");
278 | imagenet = 1;
279 | classes = 200;
280 | } else {
281 | if(!outfile) outfile = "comp4_det_test_";
282 | fps = calloc(classes, sizeof(FILE *));
283 | for(j = 0; j < classes; ++j){
284 | snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]);
285 | fps[j] = fopen(buff, "w");
286 | }
287 | }
288 |
289 |
290 | box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
291 | float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
292 | for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
293 |
294 | int m = plist->size;
295 | int i=0;
296 | int t;
297 |
298 | float thresh = .005;
299 | float nms = .45;
300 |
301 | int nthreads = 4;
302 | image *val = calloc(nthreads, sizeof(image));
303 | image *val_resized = calloc(nthreads, sizeof(image));
304 | image *buf = calloc(nthreads, sizeof(image));
305 | image *buf_resized = calloc(nthreads, sizeof(image));
306 | pthread_t *thr = calloc(nthreads, sizeof(pthread_t));
307 |
308 | load_args args = {0};
309 | args.w = net.w;
310 | args.h = net.h;
311 | args.type = IMAGE_DATA;
312 |
313 | for(t = 0; t < nthreads; ++t){
314 | args.path = paths[i+t];
315 | args.im = &buf[t];
316 | args.resized = &buf_resized[t];
317 | thr[t] = load_data_in_thread(args);
318 | }
319 | time_t start = time(0);
320 | for(i = nthreads; i < m+nthreads; i += nthreads){
321 | fprintf(stderr, "%d\n", i);
322 | for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
323 | pthread_join(thr[t], 0);
324 | val[t] = buf[t];
325 | val_resized[t] = buf_resized[t];
326 | }
327 | for(t = 0; t < nthreads && i+t < m; ++t){
328 | args.path = paths[i+t];
329 | args.im = &buf[t];
330 | args.resized = &buf_resized[t];
331 | thr[t] = load_data_in_thread(args);
332 | }
333 | for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
334 | char *path = paths[i+t-nthreads];
335 | char *id = basecfg(path);
336 | float *X = val_resized[t].data;
337 | network_predict(net, X);
338 | int w = val[t].w;
339 | int h = val[t].h;
340 | get_region_boxes(l, w, h, thresh, probs, boxes, 0, map, .5);
341 | if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, classes, nms);
342 | if (coco){
343 | print_cocos(fp, path, boxes, probs, l.w*l.h*l.n, classes, w, h);
344 | } else if (imagenet){
345 | print_imagenet_detections(fp, i+t-nthreads+1, boxes, probs, l.w*l.h*l.n, classes, w, h);
346 | } else {
347 | print_detector_detections(fps, id, boxes, probs, l.w*l.h*l.n, classes, w, h);
348 | }
349 | free(id);
350 | free_image(val[t]);
351 | free_image(val_resized[t]);
352 | }
353 | }
354 | for(j = 0; j < classes; ++j){
355 | if(fps) fclose(fps[j]);
356 | }
357 | if(coco){
358 | fseek(fp, -2, SEEK_CUR);
359 | fprintf(fp, "\n]\n");
360 | fclose(fp);
361 | }
362 | fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start));
363 | }
364 |
365 | void validate_detector_recall(char *cfgfile, char *weightfile)
366 | {
367 | network net = parse_network_cfg(cfgfile);
368 | if(weightfile){
369 | load_weights(&net, weightfile);
370 | }
371 | set_batch_network(&net, 1);
372 | fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
373 | srand(time(0));
374 |
375 | list *plist = get_paths("data/voc.2007.test");
376 | char **paths = (char **)list_to_array(plist);
377 |
378 | layer l = net.layers[net.n-1];
379 | int classes = l.classes;
380 |
381 | int j, k;
382 | box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
383 | float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
384 | for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
385 |
386 | int m = plist->size;
387 | int i=0;
388 |
389 | float thresh = .001;
390 | float iou_thresh = .5;
391 | float nms = .4;
392 |
393 | int total = 0;
394 | int correct = 0;
395 | int proposals = 0;
396 | float avg_iou = 0;
397 |
398 | for(i = 0; i < m; ++i){
399 | char *path = paths[i];
400 | image orig = load_image_color(path, 0, 0);
401 | image sized = resize_image(orig, net.w, net.h);
402 | char *id = basecfg(path);
403 | network_predict(net, sized.data);
404 | get_region_boxes(l, 1, 1, thresh, probs, boxes, 1, 0, .5);
405 | if (nms) do_nms(boxes, probs, l.w*l.h*l.n, 1, nms);
406 |
407 | char labelpath[4096];
408 | find_replace(path, "images", "labels", labelpath);
409 | find_replace(labelpath, "JPEGImages", "labels", labelpath);
410 | find_replace(labelpath, ".jpg", ".txt", labelpath);
411 | find_replace(labelpath, ".JPEG", ".txt", labelpath);
412 |
413 | int num_labels = 0;
414 | box_label *truth = read_boxes(labelpath, &num_labels);
415 | for(k = 0; k < l.w*l.h*l.n; ++k){
416 | if(probs[k][0] > thresh){
417 | ++proposals;
418 | }
419 | }
420 | for (j = 0; j < num_labels; ++j) {
421 | ++total;
422 | box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h};
423 | float best_iou = 0;
424 | for(k = 0; k < l.w*l.h*l.n; ++k){
425 | float iou = box_iou(boxes[k], t);
426 | if(probs[k][0] > thresh && iou > best_iou){
427 | best_iou = iou;
428 | }
429 | }
430 | avg_iou += best_iou;
431 | if(best_iou > iou_thresh){
432 | ++correct;
433 | }
434 | }
435 |
436 | fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total);
437 | free(id);
438 | free_image(orig);
439 | free_image(sized);
440 | }
441 | }
442 |
443 |
444 | void test_detector_folder(char *datacfg, char *cfgfile, char *weightfile, char *input_folder, char *output_folder, float thresh, float hier_thresh)
445 | {
446 | if( !(input_folder && output_folder) ){
447 | printf("Please Provide Image Folder");
448 | return;
449 | }
450 |
451 | list *options = read_data_cfg(datacfg);
452 | char *name_list = option_find_str(options, "names", "data/names.list");
453 | char **names = get_labels(name_list);
454 |
455 |
456 | image **alphabet = load_alphabet();
457 | network net = parse_network_cfg(cfgfile);
458 | if(weightfile){
459 | load_weights(&net, weightfile);
460 | }
461 | set_batch_network(&net, 1);
462 | srand(2222222);
463 | clock_t time;
464 |
465 |
466 | int max_labels=net.layers[net.n-1].classes;
467 | int ii=0;int jj=0;
468 | for (ii=0; ii < max_labels; ii++)
469 | {
470 | for (jj=0;jj<255;jj++)
471 | {
472 | if (names[ii][jj]=='\0')
473 | break;
474 | else if (names[ii][jj]==' ') // find space, replace it by '_'
475 | names[ii][jj]='_';
476 | }
477 | // printf("class name %s \n",names[ii]);
478 | }
479 |
480 | char buff[256],buff2[256];
481 | char *input_img_name = buff;
482 | char *output_file = buff2;
483 | int j;
484 | float nms=.4;
485 |
486 | FILE *fp;
487 | int save_result_txt=1;
488 |
489 | int img_counter=-1;
490 | while(1){
491 | img_counter++;
492 | strncpy(input_img_name, input_folder, 256);
493 | char frame_index_c[256];
494 | // sprintf(frame_index_c,"/frame%04d.jpg",img_counter); // Important!!! change file name
495 | sprintf(frame_index_c,"/%04d_rgb_raw.jpg",img_counter); // format into 6 digit
496 | // sprintf(frame_index_c,"/%04d.png",img_counter);
497 |
498 | strcat(input_img_name,frame_index_c);
499 |
500 | if( access( input_img_name, F_OK ) == -1 ) {
501 | printf("Cannot find image %s \n",input_img_name);
502 | break;
503 | }
504 |
505 | strncpy(output_file, output_folder, 256);
506 | if (save_result_txt==1)
507 | {
508 | char frame_index_c3[256];
509 | sprintf(frame_index_c3,"_txts/%04d_yolo2_%.2f.txt",img_counter,thresh); // format into 6 digit
510 | char * result_file=strcat(output_file,frame_index_c3);
511 | // printf("save to txt: %s \n",result_file);
512 | fp = fopen(result_file,"w+");
513 | if (fp==NULL)
514 | {
515 | printf("Cannot save to file %s \n",result_file);
516 | break;
517 | }
518 | }
519 |
520 | strncpy(output_file, output_folder, 256);
521 | char frame_index_c2[256];
522 | sprintf(frame_index_c2,"/%04d_yolo2_%.2f",img_counter,thresh); // format into 6 digit
523 | strcat(output_file,frame_index_c2);
524 |
525 | image im = load_image_color(input_img_name,0,0);
526 | image sized = resize_image(im, net.w, net.h);
527 | layer l = net.layers[net.n-1];
528 |
529 | box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
530 | float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
531 | for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes + 1, sizeof(float *));
532 |
533 | float *X = sized.data;
534 | time=clock();
535 | network_predict(net, X);
536 | if (img_counter%10==0)
537 | printf("%s: Predicted in %f seconds.\n", input_img_name, sec(clock()-time));
538 | get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0, hier_thresh);
539 | if (l.softmax_tree && nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms);
540 | else if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms);
541 | if (save_result_txt==0)
542 | {
543 | draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes); // if want to show classes, prob in terminal. See inside function.
544 | save_image(im, output_file);
545 | }
546 | else
547 | {
548 | draw_save_detections(im, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes,fp, output_file);
549 | }
550 |
551 | free_image(im);
552 | free_image(sized);
553 | free(boxes);
554 | free_ptrs((void **)probs, l.w*l.h*l.n);
555 |
556 | if (save_result_txt==1)
557 | fclose(fp);
558 |
559 | #ifdef OPENCV
560 | // cvWaitKey(0);
561 | // cvDestroyAllWindows();
562 | #endif
563 | }
564 | }
565 |
566 | void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh)
567 | {
568 | list *options = read_data_cfg(datacfg);
569 | char *name_list = option_find_str(options, "names", "data/names.list");
570 | char **names = get_labels(name_list);
571 | image **alphabet = load_alphabet();
572 | network net = parse_network_cfg(cfgfile);
573 | if(weightfile){
574 | load_weights(&net, weightfile);
575 | }
576 | set_batch_network(&net, 1);
577 | srand(2222222);
578 | clock_t time;
579 | char buff[256];
580 | char *input = buff;
581 | int j;
582 | float nms=.4;
583 |
584 | while(1){
585 | if(filename){
586 | strncpy(input, filename, 256);
587 | } else {
588 | printf("Enter Image Path: ");
589 | fflush(stdout);
590 | input = fgets(input, 256, stdin);
591 | if(!input) return;
592 | strtok(input, "\n");
593 | }
594 | image im = load_image_color(input,0,0);
595 | image sized = resize_image(im, net.w, net.h);
596 | layer l = net.layers[net.n-1];
597 |
598 | box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
599 | float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
600 | for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes + 1, sizeof(float *));
601 |
602 | float *X = sized.data;
603 | time=clock();
604 | network_predict(net, X);
605 | printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
606 | get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0, hier_thresh);
607 | if (l.softmax_tree && nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms);
608 | else if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms);
609 | draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes);
610 | save_image(im, "predictions");
611 | show_image(im, "predictions");
612 |
613 | free_image(im);
614 | free_image(sized);
615 | free(boxes);
616 | free_ptrs((void **)probs, l.w*l.h*l.n);
617 | #ifdef OPENCV
618 | cvWaitKey(0);
619 | cvDestroyAllWindows();
620 | #endif
621 | if (filename) break;
622 | }
623 | }
624 |
625 | void run_detector(int argc, char **argv)
626 | {
627 | char *prefix = find_char_arg(argc, argv, "-prefix", 0);
628 | float thresh = find_float_arg(argc, argv, "-thresh", .24);
629 | float hier_thresh = find_float_arg(argc, argv, "-hier", .5);
630 | int cam_index = find_int_arg(argc, argv, "-c", 0);
631 | int frame_skip = find_int_arg(argc, argv, "-s", 0);
632 | if(argc < 4){
633 | fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
634 | return;
635 | }
636 | char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);
637 | char *outfile = find_char_arg(argc, argv, "-out", 0);
638 | int *gpus = 0;
639 | int gpu = 0;
640 | int ngpus = 0;
641 | if(gpu_list){
642 | printf("%s\n", gpu_list);
643 | int len = strlen(gpu_list);
644 | ngpus = 1;
645 | int i;
646 | for(i = 0; i < len; ++i){
647 | if (gpu_list[i] == ',') ++ngpus;
648 | }
649 | gpus = calloc(ngpus, sizeof(int));
650 | for(i = 0; i < ngpus; ++i){
651 | gpus[i] = atoi(gpu_list);
652 | gpu_list = strchr(gpu_list, ',')+1;
653 | }
654 | } else {
655 | gpu = gpu_index;
656 | gpus = &gpu;
657 | ngpus = 1;
658 | }
659 |
660 | int clear = find_arg(argc, argv, "-clear");
661 |
662 | char *datacfg = argv[3];
663 | char *cfg = argv[4];
664 | char *weights = (argc > 5) ? argv[5] : 0;
665 | char *filename = (argc > 6) ? argv[6]: 0;
666 | if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh);
667 | else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear);
668 | else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile);
669 | else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights);
670 | else if(0==strcmp(argv[2], "demo")) {
671 | list *options = read_data_cfg(datacfg);
672 | int classes = option_find_int(options, "classes", 20);
673 | char *name_list = option_find_str(options, "names", "data/names.list");
674 | char **names = get_labels(name_list);
675 | demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, hier_thresh);
676 | }
677 | }
678 |
--------------------------------------------------------------------------------
/preprocessing/2D_object_detect/yolov2/image.h:
--------------------------------------------------------------------------------
1 | #ifndef IMAGE_H
2 | #define IMAGE_H
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include "box.h"
10 |
11 | typedef struct {
12 | int h;
13 | int w;
14 | int c;
15 | float *data;
16 | } image;
17 |
18 | float get_color(int c, int x, int max);
19 | void flip_image(image a);
20 | void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b);
21 | void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b);
22 | void draw_bbox(image a, box bbox, int w, float r, float g, float b);
23 | void draw_label(image a, int r, int c, image label, const float *rgb);
24 | void write_label(image a, int r, int c, image *characters, char *string, float *rgb);
25 | void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image **labels, int classes);
26 | void draw_save_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image *labels, int classes, FILE *save_txt, char* output_img_file);
27 | image image_distance(image a, image b);
28 | void scale_image(image m, float s);
29 | image crop_image(image im, int dx, int dy, int w, int h);
30 | image random_crop_image(image im, int w, int h);
31 | image random_augment_image(image im, float angle, float aspect, int low, int high, int size);
32 | void random_distort_image(image im, float hue, float saturation, float exposure);
33 | image resize_image(image im, int w, int h);
34 | image resize_min(image im, int min);
35 | image resize_max(image im, int max);
36 | void translate_image(image m, float s);
37 | void normalize_image(image p);
38 | image rotate_image(image m, float rad);
39 | void rotate_image_cw(image im, int times);
40 | void embed_image(image source, image dest, int dx, int dy);
41 | void saturate_image(image im, float sat);
42 | void exposure_image(image im, float sat);
43 | void distort_image(image im, float hue, float sat, float val);
44 | void saturate_exposure_image(image im, float sat, float exposure);
45 | void hsv_to_rgb(image im);
46 | void rgbgr_image(image im);
47 | void constrain_image(image im);
48 | void composite_3d(char *f1, char *f2, char *out, int delta);
49 | int best_3d_shift_r(image a, image b, int min, int max);
50 |
51 | image grayscale_image(image im);
52 | image threshold_image(image im, float thresh);
53 |
54 | image collapse_image_layers(image source, int border);
55 | image collapse_images_horz(image *ims, int n);
56 | image collapse_images_vert(image *ims, int n);
57 |
58 | void show_image(image p, const char *name);
59 | void show_image_normalized(image im, const char *name);
60 | void save_image_png(image im, const char *name);
61 | void save_image(image p, const char *name);
62 | void show_images(image *ims, int n, char *window);
63 | void show_image_layers(image p, char *name);
64 | void show_image_collapsed(image p, char *name);
65 |
66 | void print_image(image m);
67 |
68 | image make_image(int w, int h, int c);
69 | image make_random_image(int w, int h, int c);
70 | image make_empty_image(int w, int h, int c);
71 | image float_to_image(int w, int h, int c, float *data);
72 | image copy_image(image p);
73 | image load_image(char *filename, int w, int h, int c);
74 | image load_image_color(char *filename, int w, int h);
75 | image **load_alphabet();
76 |
77 | float get_pixel(image m, int x, int y, int c);
78 | float get_pixel_extend(image m, int x, int y, int c);
79 | void set_pixel(image m, int x, int y, int c, float val);
80 | void add_pixel(image m, int x, int y, int c, float val);
81 | float bilinear_interpolate(image im, float x, float y, int c);
82 |
83 | image get_image_layer(image m, int l);
84 |
85 | void free_image(image m);
86 | void test_resize(char *filename);
87 | #endif
88 |
89 |
--------------------------------------------------------------------------------
/preprocessing/2D_object_detect/yolov3/darknet.c:
--------------------------------------------------------------------------------
1 | #include "darknet.h"
2 |
3 | #include
4 | #include
5 | #include
6 |
7 | extern void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top);
8 | extern void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen);
9 | extern void test_detector_folder(char *datacfg, char *cfgfile, char *weightfile, char *input_folder, char *output_folder, float thresh, float hier_thresh);
10 | extern void run_yolo(int argc, char **argv);
11 | extern void run_detector(int argc, char **argv);
12 | extern void run_coco(int argc, char **argv);
13 | extern void run_captcha(int argc, char **argv);
14 | extern void run_nightmare(int argc, char **argv);
15 | extern void run_classifier(int argc, char **argv);
16 | extern void run_regressor(int argc, char **argv);
17 | extern void run_segmenter(int argc, char **argv);
18 | extern void run_char_rnn(int argc, char **argv);
19 | extern void run_tag(int argc, char **argv);
20 | extern void run_cifar(int argc, char **argv);
21 | extern void run_go(int argc, char **argv);
22 | extern void run_art(int argc, char **argv);
23 | extern void run_super(int argc, char **argv);
24 | extern void run_lsd(int argc, char **argv);
25 |
26 | void average(int argc, char *argv[])
27 | {
28 | char *cfgfile = argv[2];
29 | char *outfile = argv[3];
30 | gpu_index = -1;
31 | network *net = parse_network_cfg(cfgfile);
32 | network *sum = parse_network_cfg(cfgfile);
33 |
34 | char *weightfile = argv[4];
35 | load_weights(sum, weightfile);
36 |
37 | int i, j;
38 | int n = argc - 5;
39 | for(i = 0; i < n; ++i){
40 | weightfile = argv[i+5];
41 | load_weights(net, weightfile);
42 | for(j = 0; j < net->n; ++j){
43 | layer l = net->layers[j];
44 | layer out = sum->layers[j];
45 | if(l.type == CONVOLUTIONAL){
46 | int num = l.n*l.c*l.size*l.size;
47 | axpy_cpu(l.n, 1, l.biases, 1, out.biases, 1);
48 | axpy_cpu(num, 1, l.weights, 1, out.weights, 1);
49 | if(l.batch_normalize){
50 | axpy_cpu(l.n, 1, l.scales, 1, out.scales, 1);
51 | axpy_cpu(l.n, 1, l.rolling_mean, 1, out.rolling_mean, 1);
52 | axpy_cpu(l.n, 1, l.rolling_variance, 1, out.rolling_variance, 1);
53 | }
54 | }
55 | if(l.type == CONNECTED){
56 | axpy_cpu(l.outputs, 1, l.biases, 1, out.biases, 1);
57 | axpy_cpu(l.outputs*l.inputs, 1, l.weights, 1, out.weights, 1);
58 | }
59 | }
60 | }
61 | n = n+1;
62 | for(j = 0; j < net->n; ++j){
63 | layer l = sum->layers[j];
64 | if(l.type == CONVOLUTIONAL){
65 | int num = l.n*l.c*l.size*l.size;
66 | scal_cpu(l.n, 1./n, l.biases, 1);
67 | scal_cpu(num, 1./n, l.weights, 1);
68 | if(l.batch_normalize){
69 | scal_cpu(l.n, 1./n, l.scales, 1);
70 | scal_cpu(l.n, 1./n, l.rolling_mean, 1);
71 | scal_cpu(l.n, 1./n, l.rolling_variance, 1);
72 | }
73 | }
74 | if(l.type == CONNECTED){
75 | scal_cpu(l.outputs, 1./n, l.biases, 1);
76 | scal_cpu(l.outputs*l.inputs, 1./n, l.weights, 1);
77 | }
78 | }
79 | save_weights(sum, outfile);
80 | }
81 |
82 | long numops(network *net)
83 | {
84 | int i;
85 | long ops = 0;
86 | for(i = 0; i < net->n; ++i){
87 | layer l = net->layers[i];
88 | if(l.type == CONVOLUTIONAL){
89 | ops += 2l * l.n * l.size*l.size*l.c/l.groups * l.out_h*l.out_w;
90 | } else if(l.type == CONNECTED){
91 | ops += 2l * l.inputs * l.outputs;
92 | } else if (l.type == RNN){
93 | ops += 2l * l.input_layer->inputs * l.input_layer->outputs;
94 | ops += 2l * l.self_layer->inputs * l.self_layer->outputs;
95 | ops += 2l * l.output_layer->inputs * l.output_layer->outputs;
96 | } else if (l.type == GRU){
97 | ops += 2l * l.uz->inputs * l.uz->outputs;
98 | ops += 2l * l.uh->inputs * l.uh->outputs;
99 | ops += 2l * l.ur->inputs * l.ur->outputs;
100 | ops += 2l * l.wz->inputs * l.wz->outputs;
101 | ops += 2l * l.wh->inputs * l.wh->outputs;
102 | ops += 2l * l.wr->inputs * l.wr->outputs;
103 | } else if (l.type == LSTM){
104 | ops += 2l * l.uf->inputs * l.uf->outputs;
105 | ops += 2l * l.ui->inputs * l.ui->outputs;
106 | ops += 2l * l.ug->inputs * l.ug->outputs;
107 | ops += 2l * l.uo->inputs * l.uo->outputs;
108 | ops += 2l * l.wf->inputs * l.wf->outputs;
109 | ops += 2l * l.wi->inputs * l.wi->outputs;
110 | ops += 2l * l.wg->inputs * l.wg->outputs;
111 | ops += 2l * l.wo->inputs * l.wo->outputs;
112 | }
113 | }
114 | return ops;
115 | }
116 |
117 | void speed(char *cfgfile, int tics)
118 | {
119 | if (tics == 0) tics = 1000;
120 | network *net = parse_network_cfg(cfgfile);
121 | set_batch_network(net, 1);
122 | int i;
123 | double time=what_time_is_it_now();
124 | image im = make_image(net->w, net->h, net->c*net->batch);
125 | for(i = 0; i < tics; ++i){
126 | network_predict(net, im.data);
127 | }
128 | double t = what_time_is_it_now() - time;
129 | long ops = numops(net);
130 | printf("\n%d evals, %f Seconds\n", tics, t);
131 | printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.);
132 | printf("FLOPS: %.2f Bn\n", (float)ops/1000000000.*tics/t);
133 | printf("Speed: %f sec/eval\n", t/tics);
134 | printf("Speed: %f Hz\n", tics/t);
135 | }
136 |
137 | void operations(char *cfgfile)
138 | {
139 | gpu_index = -1;
140 | network *net = parse_network_cfg(cfgfile);
141 | long ops = numops(net);
142 | printf("Floating Point Operations: %ld\n", ops);
143 | printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.);
144 | }
145 |
146 | void oneoff(char *cfgfile, char *weightfile, char *outfile)
147 | {
148 | gpu_index = -1;
149 | network *net = parse_network_cfg(cfgfile);
150 | int oldn = net->layers[net->n - 2].n;
151 | int c = net->layers[net->n - 2].c;
152 | scal_cpu(oldn*c, .1, net->layers[net->n - 2].weights, 1);
153 | scal_cpu(oldn, 0, net->layers[net->n - 2].biases, 1);
154 | net->layers[net->n - 2].n = 11921;
155 | net->layers[net->n - 2].biases += 5;
156 | net->layers[net->n - 2].weights += 5*c;
157 | if(weightfile){
158 | load_weights(net, weightfile);
159 | }
160 | net->layers[net->n - 2].biases -= 5;
161 | net->layers[net->n - 2].weights -= 5*c;
162 | net->layers[net->n - 2].n = oldn;
163 | printf("%d\n", oldn);
164 | layer l = net->layers[net->n - 2];
165 | copy_cpu(l.n/3, l.biases, 1, l.biases + l.n/3, 1);
166 | copy_cpu(l.n/3, l.biases, 1, l.biases + 2*l.n/3, 1);
167 | copy_cpu(l.n/3*l.c, l.weights, 1, l.weights + l.n/3*l.c, 1);
168 | copy_cpu(l.n/3*l.c, l.weights, 1, l.weights + 2*l.n/3*l.c, 1);
169 | *net->seen = 0;
170 | save_weights(net, outfile);
171 | }
172 |
173 | void oneoff2(char *cfgfile, char *weightfile, char *outfile, int l)
174 | {
175 | gpu_index = -1;
176 | network *net = parse_network_cfg(cfgfile);
177 | if(weightfile){
178 | load_weights_upto(net, weightfile, 0, net->n);
179 | load_weights_upto(net, weightfile, l, net->n);
180 | }
181 | *net->seen = 0;
182 | save_weights_upto(net, outfile, net->n);
183 | }
184 |
185 | void partial(char *cfgfile, char *weightfile, char *outfile, int max)
186 | {
187 | gpu_index = -1;
188 | network *net = load_network(cfgfile, weightfile, 1);
189 | save_weights_upto(net, outfile, max);
190 | }
191 |
192 | void print_weights(char *cfgfile, char *weightfile, int n)
193 | {
194 | gpu_index = -1;
195 | network *net = load_network(cfgfile, weightfile, 1);
196 | layer l = net->layers[n];
197 | int i, j;
198 | //printf("[");
199 | for(i = 0; i < l.n; ++i){
200 | //printf("[");
201 | for(j = 0; j < l.size*l.size*l.c; ++j){
202 | //if(j > 0) printf(",");
203 | printf("%g ", l.weights[i*l.size*l.size*l.c + j]);
204 | }
205 | printf("\n");
206 | //printf("]%s\n", (i == l.n-1)?"":",");
207 | }
208 | //printf("]");
209 | }
210 |
211 | void rescale_net(char *cfgfile, char *weightfile, char *outfile)
212 | {
213 | gpu_index = -1;
214 | network *net = load_network(cfgfile, weightfile, 0);
215 | int i;
216 | for(i = 0; i < net->n; ++i){
217 | layer l = net->layers[i];
218 | if(l.type == CONVOLUTIONAL){
219 | rescale_weights(l, 2, -.5);
220 | break;
221 | }
222 | }
223 | save_weights(net, outfile);
224 | }
225 |
226 | void rgbgr_net(char *cfgfile, char *weightfile, char *outfile)
227 | {
228 | gpu_index = -1;
229 | network *net = load_network(cfgfile, weightfile, 0);
230 | int i;
231 | for(i = 0; i < net->n; ++i){
232 | layer l = net->layers[i];
233 | if(l.type == CONVOLUTIONAL){
234 | rgbgr_weights(l);
235 | break;
236 | }
237 | }
238 | save_weights(net, outfile);
239 | }
240 |
241 | void reset_normalize_net(char *cfgfile, char *weightfile, char *outfile)
242 | {
243 | gpu_index = -1;
244 | network *net = load_network(cfgfile, weightfile, 0);
245 | int i;
246 | for (i = 0; i < net->n; ++i) {
247 | layer l = net->layers[i];
248 | if (l.type == CONVOLUTIONAL && l.batch_normalize) {
249 | denormalize_convolutional_layer(l);
250 | }
251 | if (l.type == CONNECTED && l.batch_normalize) {
252 | denormalize_connected_layer(l);
253 | }
254 | if (l.type == GRU && l.batch_normalize) {
255 | denormalize_connected_layer(*l.input_z_layer);
256 | denormalize_connected_layer(*l.input_r_layer);
257 | denormalize_connected_layer(*l.input_h_layer);
258 | denormalize_connected_layer(*l.state_z_layer);
259 | denormalize_connected_layer(*l.state_r_layer);
260 | denormalize_connected_layer(*l.state_h_layer);
261 | }
262 | }
263 | save_weights(net, outfile);
264 | }
265 |
266 | layer normalize_layer(layer l, int n)
267 | {
268 | int j;
269 | l.batch_normalize=1;
270 | l.scales = calloc(n, sizeof(float));
271 | for(j = 0; j < n; ++j){
272 | l.scales[j] = 1;
273 | }
274 | l.rolling_mean = calloc(n, sizeof(float));
275 | l.rolling_variance = calloc(n, sizeof(float));
276 | return l;
277 | }
278 |
279 | void normalize_net(char *cfgfile, char *weightfile, char *outfile)
280 | {
281 | gpu_index = -1;
282 | network *net = load_network(cfgfile, weightfile, 0);
283 | int i;
284 | for(i = 0; i < net->n; ++i){
285 | layer l = net->layers[i];
286 | if(l.type == CONVOLUTIONAL && !l.batch_normalize){
287 | net->layers[i] = normalize_layer(l, l.n);
288 | }
289 | if (l.type == CONNECTED && !l.batch_normalize) {
290 | net->layers[i] = normalize_layer(l, l.outputs);
291 | }
292 | if (l.type == GRU && l.batch_normalize) {
293 | *l.input_z_layer = normalize_layer(*l.input_z_layer, l.input_z_layer->outputs);
294 | *l.input_r_layer = normalize_layer(*l.input_r_layer, l.input_r_layer->outputs);
295 | *l.input_h_layer = normalize_layer(*l.input_h_layer, l.input_h_layer->outputs);
296 | *l.state_z_layer = normalize_layer(*l.state_z_layer, l.state_z_layer->outputs);
297 | *l.state_r_layer = normalize_layer(*l.state_r_layer, l.state_r_layer->outputs);
298 | *l.state_h_layer = normalize_layer(*l.state_h_layer, l.state_h_layer->outputs);
299 | net->layers[i].batch_normalize=1;
300 | }
301 | }
302 | save_weights(net, outfile);
303 | }
304 |
305 | void statistics_net(char *cfgfile, char *weightfile)
306 | {
307 | gpu_index = -1;
308 | network *net = load_network(cfgfile, weightfile, 0);
309 | int i;
310 | for (i = 0; i < net->n; ++i) {
311 | layer l = net->layers[i];
312 | if (l.type == CONNECTED && l.batch_normalize) {
313 | printf("Connected Layer %d\n", i);
314 | statistics_connected_layer(l);
315 | }
316 | if (l.type == GRU && l.batch_normalize) {
317 | printf("GRU Layer %d\n", i);
318 | printf("Input Z\n");
319 | statistics_connected_layer(*l.input_z_layer);
320 | printf("Input R\n");
321 | statistics_connected_layer(*l.input_r_layer);
322 | printf("Input H\n");
323 | statistics_connected_layer(*l.input_h_layer);
324 | printf("State Z\n");
325 | statistics_connected_layer(*l.state_z_layer);
326 | printf("State R\n");
327 | statistics_connected_layer(*l.state_r_layer);
328 | printf("State H\n");
329 | statistics_connected_layer(*l.state_h_layer);
330 | }
331 | printf("\n");
332 | }
333 | }
334 |
335 | void denormalize_net(char *cfgfile, char *weightfile, char *outfile)
336 | {
337 | gpu_index = -1;
338 | network *net = load_network(cfgfile, weightfile, 0);
339 | int i;
340 | for (i = 0; i < net->n; ++i) {
341 | layer l = net->layers[i];
342 | if ((l.type == DECONVOLUTIONAL || l.type == CONVOLUTIONAL) && l.batch_normalize) {
343 | denormalize_convolutional_layer(l);
344 | net->layers[i].batch_normalize=0;
345 | }
346 | if (l.type == CONNECTED && l.batch_normalize) {
347 | denormalize_connected_layer(l);
348 | net->layers[i].batch_normalize=0;
349 | }
350 | if (l.type == GRU && l.batch_normalize) {
351 | denormalize_connected_layer(*l.input_z_layer);
352 | denormalize_connected_layer(*l.input_r_layer);
353 | denormalize_connected_layer(*l.input_h_layer);
354 | denormalize_connected_layer(*l.state_z_layer);
355 | denormalize_connected_layer(*l.state_r_layer);
356 | denormalize_connected_layer(*l.state_h_layer);
357 | l.input_z_layer->batch_normalize = 0;
358 | l.input_r_layer->batch_normalize = 0;
359 | l.input_h_layer->batch_normalize = 0;
360 | l.state_z_layer->batch_normalize = 0;
361 | l.state_r_layer->batch_normalize = 0;
362 | l.state_h_layer->batch_normalize = 0;
363 | net->layers[i].batch_normalize=0;
364 | }
365 | }
366 | save_weights(net, outfile);
367 | }
368 |
369 | void mkimg(char *cfgfile, char *weightfile, int h, int w, int num, char *prefix)
370 | {
371 | network *net = load_network(cfgfile, weightfile, 0);
372 | image *ims = get_weights(net->layers[0]);
373 | int n = net->layers[0].n;
374 | int z;
375 | for(z = 0; z < num; ++z){
376 | image im = make_image(h, w, 3);
377 | fill_image(im, .5);
378 | int i;
379 | for(i = 0; i < 100; ++i){
380 | image r = copy_image(ims[rand()%n]);
381 | rotate_image_cw(r, rand()%4);
382 | random_distort_image(r, 1, 1.5, 1.5);
383 | int dx = rand()%(w-r.w);
384 | int dy = rand()%(h-r.h);
385 | ghost_image(r, im, dx, dy);
386 | free_image(r);
387 | }
388 | char buff[256];
389 | sprintf(buff, "%s/gen_%d", prefix, z);
390 | save_image(im, buff);
391 | free_image(im);
392 | }
393 | }
394 |
395 | void visualize(char *cfgfile, char *weightfile)
396 | {
397 | network *net = load_network(cfgfile, weightfile, 0);
398 | visualize_network(net);
399 | #ifdef OPENCV
400 | cvWaitKey(0);
401 | #endif
402 | }
403 |
404 | int main(int argc, char **argv)
405 | {
406 | //test_resize("data/bad.jpg");
407 | //test_box();
408 | //test_convolutional_layer();
409 | if(argc < 2){
410 | fprintf(stderr, "usage: %s \n", argv[0]);
411 | return 0;
412 | }
413 | gpu_index = find_int_arg(argc, argv, "-i", 0);
414 | if(find_arg(argc, argv, "-nogpu")) {
415 | gpu_index = -1;
416 | }
417 |
418 | #ifndef GPU
419 | gpu_index = -1;
420 | #else
421 | if(gpu_index >= 0){
422 | cuda_set_device(gpu_index);
423 | }
424 | #endif
425 |
426 | if (0 == strcmp(argv[1], "average")){
427 | average(argc, argv);
428 | } else if (0 == strcmp(argv[1], "yolo")){
429 | run_yolo(argc, argv);
430 | } else if (0 == strcmp(argv[1], "super")){
431 | run_super(argc, argv);
432 | } else if (0 == strcmp(argv[1], "lsd")){
433 | run_lsd(argc, argv);
434 | } else if (0 == strcmp(argv[1], "detector")){
435 | run_detector(argc, argv);
436 | } else if (0 == strcmp(argv[1], "detect")){
437 | float thresh = find_float_arg(argc, argv, "-thresh", .5);
438 | char *filename = (argc > 4) ? argv[4]: 0;
439 | char *outfile = find_char_arg(argc, argv, "-out", 0);
440 | int fullscreen = find_arg(argc, argv, "-fullscreen");
441 | test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, .5, outfile, fullscreen);
442 | } else if (0 == strcmp(argv[1], "detect_folder")){
443 | float thresh = find_float_arg(argc, argv, "-thresh", .24); // default threshold is 0.24
444 | char *filename = (argc > 4) ? argv[4]: 0;
445 | char *output_folder = (argc > 5) ? argv[5]: 0;
446 | test_detector_folder("cfg/coco.data", argv[2], argv[3], filename, output_folder, thresh, .5);
447 | } else if (0 == strcmp(argv[1], "cifar")){
448 | run_cifar(argc, argv);
449 | } else if (0 == strcmp(argv[1], "go")){
450 | run_go(argc, argv);
451 | } else if (0 == strcmp(argv[1], "rnn")){
452 | run_char_rnn(argc, argv);
453 | } else if (0 == strcmp(argv[1], "coco")){
454 | run_coco(argc, argv);
455 | } else if (0 == strcmp(argv[1], "classify")){
456 | predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5);
457 | } else if (0 == strcmp(argv[1], "classifier")){
458 | run_classifier(argc, argv);
459 | } else if (0 == strcmp(argv[1], "regressor")){
460 | run_regressor(argc, argv);
461 | } else if (0 == strcmp(argv[1], "segmenter")){
462 | run_segmenter(argc, argv);
463 | } else if (0 == strcmp(argv[1], "art")){
464 | run_art(argc, argv);
465 | } else if (0 == strcmp(argv[1], "tag")){
466 | run_tag(argc, argv);
467 | } else if (0 == strcmp(argv[1], "3d")){
468 | composite_3d(argv[2], argv[3], argv[4], (argc > 5) ? atof(argv[5]) : 0);
469 | } else if (0 == strcmp(argv[1], "test")){
470 | test_resize(argv[2]);
471 | } else if (0 == strcmp(argv[1], "captcha")){
472 | run_captcha(argc, argv);
473 | } else if (0 == strcmp(argv[1], "nightmare")){
474 | run_nightmare(argc, argv);
475 | } else if (0 == strcmp(argv[1], "rgbgr")){
476 | rgbgr_net(argv[2], argv[3], argv[4]);
477 | } else if (0 == strcmp(argv[1], "reset")){
478 | reset_normalize_net(argv[2], argv[3], argv[4]);
479 | } else if (0 == strcmp(argv[1], "denormalize")){
480 | denormalize_net(argv[2], argv[3], argv[4]);
481 | } else if (0 == strcmp(argv[1], "statistics")){
482 | statistics_net(argv[2], argv[3]);
483 | } else if (0 == strcmp(argv[1], "normalize")){
484 | normalize_net(argv[2], argv[3], argv[4]);
485 | } else if (0 == strcmp(argv[1], "rescale")){
486 | rescale_net(argv[2], argv[3], argv[4]);
487 | } else if (0 == strcmp(argv[1], "ops")){
488 | operations(argv[2]);
489 | } else if (0 == strcmp(argv[1], "speed")){
490 | speed(argv[2], (argc > 3 && argv[3]) ? atoi(argv[3]) : 0);
491 | } else if (0 == strcmp(argv[1], "oneoff")){
492 | oneoff(argv[2], argv[3], argv[4]);
493 | } else if (0 == strcmp(argv[1], "oneoff2")){
494 | oneoff2(argv[2], argv[3], argv[4], atoi(argv[5]));
495 | } else if (0 == strcmp(argv[1], "print")){
496 | print_weights(argv[2], argv[3], atoi(argv[4]));
497 | } else if (0 == strcmp(argv[1], "partial")){
498 | partial(argv[2], argv[3], argv[4], atoi(argv[5]));
499 | } else if (0 == strcmp(argv[1], "average")){
500 | average(argc, argv);
501 | } else if (0 == strcmp(argv[1], "visualize")){
502 | visualize(argv[2], (argc > 3) ? argv[3] : 0);
503 | } else if (0 == strcmp(argv[1], "mkimg")){
504 | mkimg(argv[2], argv[3], atoi(argv[4]), atoi(argv[5]), atoi(argv[6]), argv[7]);
505 | } else if (0 == strcmp(argv[1], "imtest")){
506 | test_resize(argv[2]);
507 | } else {
508 | fprintf(stderr, "Not an option: %s\n", argv[1]);
509 | }
510 | return 0;
511 | }
512 |
513 |
--------------------------------------------------------------------------------
/preprocessing/2D_object_detect/yolov3/darknet.h:
--------------------------------------------------------------------------------
1 | #ifndef DARKNET_API
2 | #define DARKNET_API
3 | #include
4 | #include
5 | #include
6 | #include
7 |
8 | #define SECRET_NUM -1234
9 | extern int gpu_index;
10 |
11 | #ifdef GPU
12 | #define BLOCK 512
13 |
14 | #include "cuda_runtime.h"
15 | #include "curand.h"
16 | #include "cublas_v2.h"
17 |
18 | #ifdef CUDNN
19 | #include "cudnn.h"
20 | #endif
21 | #endif
22 |
23 | #ifndef __cplusplus
24 | #ifdef OPENCV
25 | #include "opencv2/highgui/highgui_c.h"
26 | #include "opencv2/imgproc/imgproc_c.h"
27 | #include "opencv2/core/version.hpp"
28 | #if CV_MAJOR_VERSION == 3
29 | #include "opencv2/videoio/videoio_c.h"
30 | #include "opencv2/imgcodecs/imgcodecs_c.h"
31 | #endif
32 | #endif
33 | #endif
34 |
35 | typedef struct{
36 | int classes;
37 | char **names;
38 | } metadata;
39 |
40 | metadata get_metadata(char *file);
41 |
42 | typedef struct{
43 | int *leaf;
44 | int n;
45 | int *parent;
46 | int *child;
47 | int *group;
48 | char **name;
49 |
50 | int groups;
51 | int *group_size;
52 | int *group_offset;
53 | } tree;
54 | tree *read_tree(char *filename);
55 |
56 | typedef enum{
57 | LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN
58 | } ACTIVATION;
59 |
60 | typedef enum{
61 | MULT, ADD, SUB, DIV
62 | } BINARY_ACTIVATION;
63 |
64 | typedef enum {
65 | CONVOLUTIONAL,
66 | DECONVOLUTIONAL,
67 | CONNECTED,
68 | MAXPOOL,
69 | SOFTMAX,
70 | DETECTION,
71 | DROPOUT,
72 | CROP,
73 | ROUTE,
74 | COST,
75 | NORMALIZATION,
76 | AVGPOOL,
77 | LOCAL,
78 | SHORTCUT,
79 | ACTIVE,
80 | RNN,
81 | GRU,
82 | LSTM,
83 | CRNN,
84 | BATCHNORM,
85 | NETWORK,
86 | XNOR,
87 | REGION,
88 | YOLO,
89 | REORG,
90 | UPSAMPLE,
91 | LOGXENT,
92 | L2NORM,
93 | BLANK
94 | } LAYER_TYPE;
95 |
96 | typedef enum{
97 | SSE, MASKED, L1, SEG, SMOOTH,WGAN
98 | } COST_TYPE;
99 |
100 | typedef struct{
101 | int batch;
102 | float learning_rate;
103 | float momentum;
104 | float decay;
105 | int adam;
106 | float B1;
107 | float B2;
108 | float eps;
109 | int t;
110 | } update_args;
111 |
112 | struct network;
113 | typedef struct network network;
114 |
115 | struct layer;
116 | typedef struct layer layer;
117 |
118 | struct layer{
119 | LAYER_TYPE type;
120 | ACTIVATION activation;
121 | COST_TYPE cost_type;
122 | void (*forward) (struct layer, struct network);
123 | void (*backward) (struct layer, struct network);
124 | void (*update) (struct layer, update_args);
125 | void (*forward_gpu) (struct layer, struct network);
126 | void (*backward_gpu) (struct layer, struct network);
127 | void (*update_gpu) (struct layer, update_args);
128 | int batch_normalize;
129 | int shortcut;
130 | int batch;
131 | int forced;
132 | int flipped;
133 | int inputs;
134 | int outputs;
135 | int nweights;
136 | int nbiases;
137 | int extra;
138 | int truths;
139 | int h,w,c;
140 | int out_h, out_w, out_c;
141 | int n;
142 | int max_boxes;
143 | int groups;
144 | int size;
145 | int side;
146 | int stride;
147 | int reverse;
148 | int flatten;
149 | int spatial;
150 | int pad;
151 | int sqrt;
152 | int flip;
153 | int index;
154 | int binary;
155 | int xnor;
156 | int steps;
157 | int hidden;
158 | int truth;
159 | float smooth;
160 | float dot;
161 | float angle;
162 | float jitter;
163 | float saturation;
164 | float exposure;
165 | float shift;
166 | float ratio;
167 | float learning_rate_scale;
168 | float clip;
169 | int softmax;
170 | int classes;
171 | int coords;
172 | int background;
173 | int rescore;
174 | int objectness;
175 | int joint;
176 | int noadjust;
177 | int reorg;
178 | int log;
179 | int tanh;
180 | int *mask;
181 | int total;
182 |
183 | float alpha;
184 | float beta;
185 | float kappa;
186 |
187 | float coord_scale;
188 | float object_scale;
189 | float noobject_scale;
190 | float mask_scale;
191 | float class_scale;
192 | int bias_match;
193 | int random;
194 | float ignore_thresh;
195 | float truth_thresh;
196 | float thresh;
197 | float focus;
198 | int classfix;
199 | int absolute;
200 |
201 | int onlyforward;
202 | int stopbackward;
203 | int dontload;
204 | int dontsave;
205 | int dontloadscales;
206 |
207 | float temperature;
208 | float probability;
209 | float scale;
210 |
211 | char * cweights;
212 | int * indexes;
213 | int * input_layers;
214 | int * input_sizes;
215 | int * map;
216 | float * rand;
217 | float * cost;
218 | float * state;
219 | float * prev_state;
220 | float * forgot_state;
221 | float * forgot_delta;
222 | float * state_delta;
223 | float * combine_cpu;
224 | float * combine_delta_cpu;
225 |
226 | float * concat;
227 | float * concat_delta;
228 |
229 | float * binary_weights;
230 |
231 | float * biases;
232 | float * bias_updates;
233 |
234 | float * scales;
235 | float * scale_updates;
236 |
237 | float * weights;
238 | float * weight_updates;
239 |
240 | float * delta;
241 | float * output;
242 | float * loss;
243 | float * squared;
244 | float * norms;
245 |
246 | float * spatial_mean;
247 | float * mean;
248 | float * variance;
249 |
250 | float * mean_delta;
251 | float * variance_delta;
252 |
253 | float * rolling_mean;
254 | float * rolling_variance;
255 |
256 | float * x;
257 | float * x_norm;
258 |
259 | float * m;
260 | float * v;
261 |
262 | float * bias_m;
263 | float * bias_v;
264 | float * scale_m;
265 | float * scale_v;
266 |
267 |
268 | float *z_cpu;
269 | float *r_cpu;
270 | float *h_cpu;
271 | float * prev_state_cpu;
272 |
273 | float *temp_cpu;
274 | float *temp2_cpu;
275 | float *temp3_cpu;
276 |
277 | float *dh_cpu;
278 | float *hh_cpu;
279 | float *prev_cell_cpu;
280 | float *cell_cpu;
281 | float *f_cpu;
282 | float *i_cpu;
283 | float *g_cpu;
284 | float *o_cpu;
285 | float *c_cpu;
286 | float *dc_cpu;
287 |
288 | float * binary_input;
289 |
290 | struct layer *input_layer;
291 | struct layer *self_layer;
292 | struct layer *output_layer;
293 |
294 | struct layer *reset_layer;
295 | struct layer *update_layer;
296 | struct layer *state_layer;
297 |
298 | struct layer *input_gate_layer;
299 | struct layer *state_gate_layer;
300 | struct layer *input_save_layer;
301 | struct layer *state_save_layer;
302 | struct layer *input_state_layer;
303 | struct layer *state_state_layer;
304 |
305 | struct layer *input_z_layer;
306 | struct layer *state_z_layer;
307 |
308 | struct layer *input_r_layer;
309 | struct layer *state_r_layer;
310 |
311 | struct layer *input_h_layer;
312 | struct layer *state_h_layer;
313 |
314 | struct layer *wz;
315 | struct layer *uz;
316 | struct layer *wr;
317 | struct layer *ur;
318 | struct layer *wh;
319 | struct layer *uh;
320 | struct layer *uo;
321 | struct layer *wo;
322 | struct layer *uf;
323 | struct layer *wf;
324 | struct layer *ui;
325 | struct layer *wi;
326 | struct layer *ug;
327 | struct layer *wg;
328 |
329 | tree *softmax_tree;
330 |
331 | size_t workspace_size;
332 |
333 | #ifdef GPU
334 | int *indexes_gpu;
335 |
336 | float *z_gpu;
337 | float *r_gpu;
338 | float *h_gpu;
339 |
340 | float *temp_gpu;
341 | float *temp2_gpu;
342 | float *temp3_gpu;
343 |
344 | float *dh_gpu;
345 | float *hh_gpu;
346 | float *prev_cell_gpu;
347 | float *cell_gpu;
348 | float *f_gpu;
349 | float *i_gpu;
350 | float *g_gpu;
351 | float *o_gpu;
352 | float *c_gpu;
353 | float *dc_gpu;
354 |
355 | float *m_gpu;
356 | float *v_gpu;
357 | float *bias_m_gpu;
358 | float *scale_m_gpu;
359 | float *bias_v_gpu;
360 | float *scale_v_gpu;
361 |
362 | float * combine_gpu;
363 | float * combine_delta_gpu;
364 |
365 | float * prev_state_gpu;
366 | float * forgot_state_gpu;
367 | float * forgot_delta_gpu;
368 | float * state_gpu;
369 | float * state_delta_gpu;
370 | float * gate_gpu;
371 | float * gate_delta_gpu;
372 | float * save_gpu;
373 | float * save_delta_gpu;
374 | float * concat_gpu;
375 | float * concat_delta_gpu;
376 |
377 | float * binary_input_gpu;
378 | float * binary_weights_gpu;
379 |
380 | float * mean_gpu;
381 | float * variance_gpu;
382 |
383 | float * rolling_mean_gpu;
384 | float * rolling_variance_gpu;
385 |
386 | float * variance_delta_gpu;
387 | float * mean_delta_gpu;
388 |
389 | float * x_gpu;
390 | float * x_norm_gpu;
391 | float * weights_gpu;
392 | float * weight_updates_gpu;
393 | float * weight_change_gpu;
394 |
395 | float * biases_gpu;
396 | float * bias_updates_gpu;
397 | float * bias_change_gpu;
398 |
399 | float * scales_gpu;
400 | float * scale_updates_gpu;
401 | float * scale_change_gpu;
402 |
403 | float * output_gpu;
404 | float * loss_gpu;
405 | float * delta_gpu;
406 | float * rand_gpu;
407 | float * squared_gpu;
408 | float * norms_gpu;
409 | #ifdef CUDNN
410 | cudnnTensorDescriptor_t srcTensorDesc, dstTensorDesc;
411 | cudnnTensorDescriptor_t dsrcTensorDesc, ddstTensorDesc;
412 | cudnnTensorDescriptor_t normTensorDesc;
413 | cudnnFilterDescriptor_t weightDesc;
414 | cudnnFilterDescriptor_t dweightDesc;
415 | cudnnConvolutionDescriptor_t convDesc;
416 | cudnnConvolutionFwdAlgo_t fw_algo;
417 | cudnnConvolutionBwdDataAlgo_t bd_algo;
418 | cudnnConvolutionBwdFilterAlgo_t bf_algo;
419 | #endif
420 | #endif
421 | };
422 |
423 | void free_layer(layer);
424 |
425 | typedef enum {
426 | CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM
427 | } learning_rate_policy;
428 |
429 | typedef struct network{
430 | int n;
431 | int batch;
432 | size_t *seen;
433 | int *t;
434 | float epoch;
435 | int subdivisions;
436 | layer *layers;
437 | float *output;
438 | learning_rate_policy policy;
439 |
440 | float learning_rate;
441 | float momentum;
442 | float decay;
443 | float gamma;
444 | float scale;
445 | float power;
446 | int time_steps;
447 | int step;
448 | int max_batches;
449 | float *scales;
450 | int *steps;
451 | int num_steps;
452 | int burn_in;
453 |
454 | int adam;
455 | float B1;
456 | float B2;
457 | float eps;
458 |
459 | int inputs;
460 | int outputs;
461 | int truths;
462 | int notruth;
463 | int h, w, c;
464 | int max_crop;
465 | int min_crop;
466 | float max_ratio;
467 | float min_ratio;
468 | int center;
469 | float angle;
470 | float aspect;
471 | float exposure;
472 | float saturation;
473 | float hue;
474 | int random;
475 |
476 | int gpu_index;
477 | tree *hierarchy;
478 |
479 | float *input;
480 | float *truth;
481 | float *delta;
482 | float *workspace;
483 | int train;
484 | int index;
485 | float *cost;
486 | float clip;
487 |
488 | #ifdef GPU
489 | float *input_gpu;
490 | float *truth_gpu;
491 | float *delta_gpu;
492 | float *output_gpu;
493 | #endif
494 |
495 | } network;
496 |
497 | typedef struct {
498 | int w;
499 | int h;
500 | float scale;
501 | float rad;
502 | float dx;
503 | float dy;
504 | float aspect;
505 | } augment_args;
506 |
507 | typedef struct {
508 | int w;
509 | int h;
510 | int c;
511 | float *data;
512 | } image;
513 |
514 | typedef struct{
515 | float x, y, w, h;
516 | } box;
517 |
518 | typedef struct detection{
519 | box bbox;
520 | int classes;
521 | float *prob;
522 | float *mask;
523 | float objectness;
524 | int sort_class;
525 | } detection;
526 |
527 | typedef struct matrix{
528 | int rows, cols;
529 | float **vals;
530 | } matrix;
531 |
532 |
533 | typedef struct{
534 | int w, h;
535 | matrix X;
536 | matrix y;
537 | int shallow;
538 | int *num_boxes;
539 | box **boxes;
540 | } data;
541 |
542 | typedef enum {
543 | CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA, SEGMENTATION_DATA, INSTANCE_DATA
544 | } data_type;
545 |
546 | typedef struct load_args{
547 | int threads;
548 | char **paths;
549 | char *path;
550 | int n;
551 | int m;
552 | char **labels;
553 | int h;
554 | int w;
555 | int out_w;
556 | int out_h;
557 | int nh;
558 | int nw;
559 | int num_boxes;
560 | int min, max, size;
561 | int classes;
562 | int background;
563 | int scale;
564 | int center;
565 | int coords;
566 | float jitter;
567 | float angle;
568 | float aspect;
569 | float saturation;
570 | float exposure;
571 | float hue;
572 | data *d;
573 | image *im;
574 | image *resized;
575 | data_type type;
576 | tree *hierarchy;
577 | } load_args;
578 |
579 | typedef struct{
580 | int id;
581 | float x,y,w,h;
582 | float left, right, top, bottom;
583 | } box_label;
584 |
585 |
586 | network *load_network(char *cfg, char *weights, int clear);
587 | load_args get_base_args(network *net);
588 |
589 | void free_data(data d);
590 |
591 | typedef struct node{
592 | void *val;
593 | struct node *next;
594 | struct node *prev;
595 | } node;
596 |
597 | typedef struct list{
598 | int size;
599 | node *front;
600 | node *back;
601 | } list;
602 |
603 | pthread_t load_data(load_args args);
604 | list *read_data_cfg(char *filename);
605 | list *read_cfg(char *filename);
606 | unsigned char *read_file(char *filename);
607 | data resize_data(data orig, int w, int h);
608 | data *tile_data(data orig, int divs, int size);
609 | data select_data(data *orig, int *inds);
610 |
611 | void forward_network(network *net);
612 | void backward_network(network *net);
613 | void update_network(network *net);
614 |
615 |
616 | float dot_cpu(int N, float *X, int INCX, float *Y, int INCY);
617 | void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY);
618 | void copy_cpu(int N, float *X, int INCX, float *Y, int INCY);
619 | void scal_cpu(int N, float ALPHA, float *X, int INCX);
620 | void fill_cpu(int N, float ALPHA, float * X, int INCX);
621 | void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial);
622 | void softmax(float *input, int n, float temp, int stride, float *output);
623 |
624 | int best_3d_shift_r(image a, image b, int min, int max);
625 | #ifdef GPU
626 | void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY);
627 | void fill_gpu(int N, float ALPHA, float * X, int INCX);
628 | void scal_gpu(int N, float ALPHA, float * X, int INCX);
629 | void copy_gpu(int N, float * X, int INCX, float * Y, int INCY);
630 |
631 | void cuda_set_device(int n);
632 | void cuda_free(float *x_gpu);
633 | float *cuda_make_array(float *x, size_t n);
634 | void cuda_pull_array(float *x_gpu, float *x, size_t n);
635 | float cuda_mag_array(float *x_gpu, size_t n);
636 | void cuda_push_array(float *x_gpu, float *x, size_t n);
637 |
638 | void forward_network_gpu(network *net);
639 | void backward_network_gpu(network *net);
640 | void update_network_gpu(network *net);
641 |
642 | float train_networks(network **nets, int n, data d, int interval);
643 | void sync_nets(network **nets, int n, int interval);
644 | void harmless_update_network_gpu(network *net);
645 | #endif
646 | image get_label(image **characters, char *string, int size);
647 | void draw_label(image a, int r, int c, image label, const float *rgb);
648 | void save_image_png(image im, const char *name);
649 | void get_next_batch(data d, int n, int offset, float *X, float *y);
650 | void grayscale_image_3c(image im);
651 | void normalize_image(image p);
652 | void matrix_to_csv(matrix m);
653 | float train_network_sgd(network *net, data d, int n);
654 | void rgbgr_image(image im);
655 | data copy_data(data d);
656 | data concat_data(data d1, data d2);
657 | data load_cifar10_data(char *filename);
658 | float matrix_topk_accuracy(matrix truth, matrix guess, int k);
659 | void matrix_add_matrix(matrix from, matrix to);
660 | void scale_matrix(matrix m, float scale);
661 | matrix csv_to_matrix(char *filename);
662 | float *network_accuracies(network *net, data d, int n);
663 | float train_network_datum(network *net);
664 | image make_random_image(int w, int h, int c);
665 |
666 | void denormalize_connected_layer(layer l);
667 | void denormalize_convolutional_layer(layer l);
668 | void statistics_connected_layer(layer l);
669 | void rescale_weights(layer l, float scale, float trans);
670 | void rgbgr_weights(layer l);
671 | image *get_weights(layer l);
672 |
673 | void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, int avg, float hier_thresh, int w, int h, int fps, int fullscreen);
674 | void get_detection_detections(layer l, int w, int h, float thresh, detection *dets);
675 |
676 | char *option_find_str(list *l, char *key, char *def);
677 | int option_find_int(list *l, char *key, int def);
678 | int option_find_int_quiet(list *l, char *key, int def);
679 |
680 | network *parse_network_cfg(char *filename);
681 | void save_weights(network *net, char *filename);
682 | void load_weights(network *net, char *filename);
683 | void save_weights_upto(network *net, char *filename, int cutoff);
684 | void load_weights_upto(network *net, char *filename, int start, int cutoff);
685 |
686 | void zero_objectness(layer l);
687 | void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets);
688 | int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets);
689 | void free_network(network *net);
690 | void set_batch_network(network *net, int b);
691 | void set_temp_network(network *net, float t);
692 | image load_image(char *filename, int w, int h, int c);
693 | image load_image_color(char *filename, int w, int h);
694 | image make_image(int w, int h, int c);
695 | image resize_image(image im, int w, int h);
696 | void censor_image(image im, int dx, int dy, int w, int h);
697 | image letterbox_image(image im, int w, int h);
698 | image crop_image(image im, int dx, int dy, int w, int h);
699 | image center_crop_image(image im, int w, int h);
700 | image resize_min(image im, int min);
701 | image resize_max(image im, int max);
702 | image threshold_image(image im, float thresh);
703 | image mask_to_rgb(image mask);
704 | int resize_network(network *net, int w, int h);
705 | void free_matrix(matrix m);
706 | void test_resize(char *filename);
707 | void save_image(image p, const char *name);
708 | void show_image(image p, const char *name);
709 | image copy_image(image p);
710 | void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b);
711 | float get_current_rate(network *net);
712 | void composite_3d(char *f1, char *f2, char *out, int delta);
713 | data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h);
714 | size_t get_current_batch(network *net);
715 | void constrain_image(image im);
716 | image get_network_image_layer(network *net, int i);
717 | layer get_network_output_layer(network *net);
718 | void top_predictions(network *net, int n, int *index);
719 | void flip_image(image a);
720 | image float_to_image(int w, int h, int c, float *data);
721 | void ghost_image(image source, image dest, int dx, int dy);
722 | float network_accuracy(network *net, data d);
723 | void random_distort_image(image im, float hue, float saturation, float exposure);
724 | void fill_image(image m, float s);
725 | image grayscale_image(image im);
726 | void rotate_image_cw(image im, int times);
727 | double what_time_is_it_now();
728 | image rotate_image(image m, float rad);
729 | void visualize_network(network *net);
730 | float box_iou(box a, box b);
731 | data load_all_cifar10();
732 | box_label *read_boxes(char *filename, int *n);
733 | box float_to_box(float *f, int stride);
734 | void draw_detections(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes);
735 | void draw_save_detections(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes, FILE *save_txt);
736 | matrix network_predict_data(network *net, data test);
737 | image **load_alphabet();
738 | image get_network_image(network *net);
739 | float *network_predict(network *net, float *input);
740 |
741 | int network_width(network *net);
742 | int network_height(network *net);
743 | float *network_predict_image(network *net, image im);
744 | void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, detection *dets);
745 | detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num);
746 | void free_detections(detection *dets, int n);
747 |
748 | void reset_network_state(network *net, int b);
749 |
750 | char **get_labels(char *filename);
751 | void do_nms_obj(detection *dets, int total, int classes, float thresh);
752 | void do_nms_sort(detection *dets, int total, int classes, float thresh);
753 |
754 | matrix make_matrix(int rows, int cols);
755 |
756 | #ifndef __cplusplus
757 | #ifdef OPENCV
758 | image get_image_from_stream(CvCapture *cap);
759 | #endif
760 | #endif
761 | void free_image(image m);
762 | float train_network(network *net, data d);
763 | pthread_t load_data_in_thread(load_args args);
764 | void load_data_blocking(load_args args);
765 | list *get_paths(char *filename);
766 | void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride);
767 | void change_leaves(tree *t, char *leaf_list);
768 |
769 | int find_int_arg(int argc, char **argv, char *arg, int def);
770 | float find_float_arg(int argc, char **argv, char *arg, float def);
771 | int find_arg(int argc, char* argv[], char *arg);
772 | char *find_char_arg(int argc, char **argv, char *arg, char *def);
773 | char *basecfg(char *cfgfile);
774 | void find_replace(char *str, char *orig, char *rep, char *output);
775 | void free_ptrs(void **ptrs, int n);
776 | char *fgetl(FILE *fp);
777 | void strip(char *s);
778 | float sec(clock_t clocks);
779 | void **list_to_array(list *l);
780 | void top_k(float *a, int n, int k, int *index);
781 | int *read_map(char *filename);
782 | void error(const char *s);
783 | int max_index(float *a, int n);
784 | int max_int_index(int *a, int n);
785 | int sample_array(float *a, int n);
786 | int *random_index_order(int min, int max);
787 | void free_list(list *l);
788 | float mse_array(float *a, int n);
789 | float variance_array(float *a, int n);
790 | float mag_array(float *a, int n);
791 | void scale_array(float *a, int n, float s);
792 | float mean_array(float *a, int n);
793 | float sum_array(float *a, int n);
794 | void normalize_array(float *a, int n);
795 | int *read_intlist(char *s, int *n, int d);
796 | size_t rand_size_t();
797 | float rand_normal();
798 | float rand_uniform(float min, float max);
799 |
800 | #endif
801 |
--------------------------------------------------------------------------------
/preprocessing/2D_object_detect/yolov3/detector.c:
--------------------------------------------------------------------------------
1 | #include "darknet.h"
2 |
3 | #include
4 |
5 | static int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90};
6 |
7 |
8 | void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear)
9 | {
10 | list *options = read_data_cfg(datacfg);
11 | char *train_images = option_find_str(options, "train", "data/train.list");
12 | char *backup_directory = option_find_str(options, "backup", "/backup/");
13 |
14 | srand(time(0));
15 | char *base = basecfg(cfgfile);
16 | printf("%s\n", base);
17 | float avg_loss = -1;
18 | network **nets = calloc(ngpus, sizeof(network));
19 |
20 | srand(time(0));
21 | int seed = rand();
22 | int i;
23 | for(i = 0; i < ngpus; ++i){
24 | srand(seed);
25 | #ifdef GPU
26 | cuda_set_device(gpus[i]);
27 | #endif
28 | nets[i] = load_network(cfgfile, weightfile, clear);
29 | nets[i]->learning_rate *= ngpus;
30 | }
31 | srand(time(0));
32 | network *net = nets[0];
33 |
34 | int imgs = net->batch * net->subdivisions * ngpus;
35 | printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
36 | data train, buffer;
37 |
38 | layer l = net->layers[net->n - 1];
39 |
40 | int classes = l.classes;
41 | float jitter = l.jitter;
42 |
43 | list *plist = get_paths(train_images);
44 | //int N = plist->size;
45 | char **paths = (char **)list_to_array(plist);
46 |
47 | load_args args = get_base_args(net);
48 | args.coords = l.coords;
49 | args.paths = paths;
50 | args.n = imgs;
51 | args.m = plist->size;
52 | args.classes = classes;
53 | args.jitter = jitter;
54 | args.num_boxes = l.max_boxes;
55 | args.d = &buffer;
56 | args.type = DETECTION_DATA;
57 | //args.type = INSTANCE_DATA;
58 | args.threads = 64;
59 |
60 | pthread_t load_thread = load_data(args);
61 | double time;
62 | int count = 0;
63 | //while(i*imgs < N*120){
64 | while(get_current_batch(net) < net->max_batches){
65 | if(l.random && count++%10 == 0){
66 | printf("Resizing\n");
67 | int dim = (rand() % 10 + 10) * 32;
68 | if (get_current_batch(net)+200 > net->max_batches) dim = 608;
69 | //int dim = (rand() % 4 + 16) * 32;
70 | printf("%d\n", dim);
71 | args.w = dim;
72 | args.h = dim;
73 |
74 | pthread_join(load_thread, 0);
75 | train = buffer;
76 | free_data(train);
77 | load_thread = load_data(args);
78 |
79 | #pragma omp parallel for
80 | for(i = 0; i < ngpus; ++i){
81 | resize_network(nets[i], dim, dim);
82 | }
83 | net = nets[0];
84 | }
85 | time=what_time_is_it_now();
86 | pthread_join(load_thread, 0);
87 | train = buffer;
88 | load_thread = load_data(args);
89 |
90 | /*
91 | int k;
92 | for(k = 0; k < l.max_boxes; ++k){
93 | box b = float_to_box(train.y.vals[10] + 1 + k*5);
94 | if(!b.x) break;
95 | printf("loaded: %f %f %f %f\n", b.x, b.y, b.w, b.h);
96 | }
97 | */
98 | /*
99 | int zz;
100 | for(zz = 0; zz < train.X.cols; ++zz){
101 | image im = float_to_image(net->w, net->h, 3, train.X.vals[zz]);
102 | int k;
103 | for(k = 0; k < l.max_boxes; ++k){
104 | box b = float_to_box(train.y.vals[zz] + k*5, 1);
105 | printf("%f %f %f %f\n", b.x, b.y, b.w, b.h);
106 | draw_bbox(im, b, 1, 1,0,0);
107 | }
108 | show_image(im, "truth11");
109 | cvWaitKey(0);
110 | save_image(im, "truth11");
111 | }
112 | */
113 |
114 | printf("Loaded: %lf seconds\n", what_time_is_it_now()-time);
115 |
116 | time=what_time_is_it_now();
117 | float loss = 0;
118 | #ifdef GPU
119 | if(ngpus == 1){
120 | loss = train_network(net, train);
121 | } else {
122 | loss = train_networks(nets, ngpus, train, 4);
123 | }
124 | #else
125 | loss = train_network(net, train);
126 | #endif
127 | if (avg_loss < 0) avg_loss = loss;
128 | avg_loss = avg_loss*.9 + loss*.1;
129 |
130 | i = get_current_batch(net);
131 | printf("%ld: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, i*imgs);
132 | if(i%100==0){
133 | #ifdef GPU
134 | if(ngpus != 1) sync_nets(nets, ngpus, 0);
135 | #endif
136 | char buff[256];
137 | sprintf(buff, "%s/%s.backup", backup_directory, base);
138 | save_weights(net, buff);
139 | }
140 | if(i%10000==0 || (i < 1000 && i%100 == 0)){
141 | #ifdef GPU
142 | if(ngpus != 1) sync_nets(nets, ngpus, 0);
143 | #endif
144 | char buff[256];
145 | sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
146 | save_weights(net, buff);
147 | }
148 | free_data(train);
149 | }
150 | #ifdef GPU
151 | if(ngpus != 1) sync_nets(nets, ngpus, 0);
152 | #endif
153 | char buff[256];
154 | sprintf(buff, "%s/%s_final.weights", backup_directory, base);
155 | save_weights(net, buff);
156 | }
157 |
158 |
159 | static int get_coco_image_id(char *filename)
160 | {
161 | char *p = strrchr(filename, '/');
162 | char *c = strrchr(filename, '_');
163 | if(c) p = c;
164 | return atoi(p+1);
165 | }
166 |
167 | static void print_cocos(FILE *fp, char *image_path, detection *dets, int num_boxes, int classes, int w, int h)
168 | {
169 | int i, j;
170 | int image_id = get_coco_image_id(image_path);
171 | for(i = 0; i < num_boxes; ++i){
172 | float xmin = dets[i].bbox.x - dets[i].bbox.w/2.;
173 | float xmax = dets[i].bbox.x + dets[i].bbox.w/2.;
174 | float ymin = dets[i].bbox.y - dets[i].bbox.h/2.;
175 | float ymax = dets[i].bbox.y + dets[i].bbox.h/2.;
176 |
177 | if (xmin < 0) xmin = 0;
178 | if (ymin < 0) ymin = 0;
179 | if (xmax > w) xmax = w;
180 | if (ymax > h) ymax = h;
181 |
182 | float bx = xmin;
183 | float by = ymin;
184 | float bw = xmax - xmin;
185 | float bh = ymax - ymin;
186 |
187 | for(j = 0; j < classes; ++j){
188 | if (dets[i].prob[j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]);
189 | }
190 | }
191 | }
192 |
193 | void print_detector_detections(FILE **fps, char *id, detection *dets, int total, int classes, int w, int h)
194 | {
195 | int i, j;
196 | for(i = 0; i < total; ++i){
197 | float xmin = dets[i].bbox.x - dets[i].bbox.w/2. + 1;
198 | float xmax = dets[i].bbox.x + dets[i].bbox.w/2. + 1;
199 | float ymin = dets[i].bbox.y - dets[i].bbox.h/2. + 1;
200 | float ymax = dets[i].bbox.y + dets[i].bbox.h/2. + 1;
201 |
202 | if (xmin < 1) xmin = 1;
203 | if (ymin < 1) ymin = 1;
204 | if (xmax > w) xmax = w;
205 | if (ymax > h) ymax = h;
206 |
207 | for(j = 0; j < classes; ++j){
208 | if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j],
209 | xmin, ymin, xmax, ymax);
210 | }
211 | }
212 | }
213 |
214 | void print_imagenet_detections(FILE *fp, int id, detection *dets, int total, int classes, int w, int h)
215 | {
216 | int i, j;
217 | for(i = 0; i < total; ++i){
218 | float xmin = dets[i].bbox.x - dets[i].bbox.w/2.;
219 | float xmax = dets[i].bbox.x + dets[i].bbox.w/2.;
220 | float ymin = dets[i].bbox.y - dets[i].bbox.h/2.;
221 | float ymax = dets[i].bbox.y + dets[i].bbox.h/2.;
222 |
223 | if (xmin < 0) xmin = 0;
224 | if (ymin < 0) ymin = 0;
225 | if (xmax > w) xmax = w;
226 | if (ymax > h) ymax = h;
227 |
228 | for(j = 0; j < classes; ++j){
229 | int class = j;
230 | if (dets[i].prob[class]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j+1, dets[i].prob[class],
231 | xmin, ymin, xmax, ymax);
232 | }
233 | }
234 | }
235 |
236 | void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char *outfile)
237 | {
238 | int j;
239 | list *options = read_data_cfg(datacfg);
240 | char *valid_images = option_find_str(options, "valid", "data/train.list");
241 | char *name_list = option_find_str(options, "names", "data/names.list");
242 | char *prefix = option_find_str(options, "results", "results");
243 | char **names = get_labels(name_list);
244 | char *mapf = option_find_str(options, "map", 0);
245 | int *map = 0;
246 | if (mapf) map = read_map(mapf);
247 |
248 | network *net = load_network(cfgfile, weightfile, 0);
249 | set_batch_network(net, 2);
250 | fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
251 | srand(time(0));
252 |
253 | list *plist = get_paths(valid_images);
254 | char **paths = (char **)list_to_array(plist);
255 |
256 | layer l = net->layers[net->n-1];
257 | int classes = l.classes;
258 |
259 | char buff[1024];
260 | char *type = option_find_str(options, "eval", "voc");
261 | FILE *fp = 0;
262 | FILE **fps = 0;
263 | int coco = 0;
264 | int imagenet = 0;
265 | if(0==strcmp(type, "coco")){
266 | if(!outfile) outfile = "coco_results";
267 | snprintf(buff, 1024, "%s/%s.json", prefix, outfile);
268 | fp = fopen(buff, "w");
269 | fprintf(fp, "[\n");
270 | coco = 1;
271 | } else if(0==strcmp(type, "imagenet")){
272 | if(!outfile) outfile = "imagenet-detection";
273 | snprintf(buff, 1024, "%s/%s.txt", prefix, outfile);
274 | fp = fopen(buff, "w");
275 | imagenet = 1;
276 | classes = 200;
277 | } else {
278 | if(!outfile) outfile = "comp4_det_test_";
279 | fps = calloc(classes, sizeof(FILE *));
280 | for(j = 0; j < classes; ++j){
281 | snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]);
282 | fps[j] = fopen(buff, "w");
283 | }
284 | }
285 |
286 | int m = plist->size;
287 | int i=0;
288 | int t;
289 |
290 | float thresh = .005;
291 | float nms = .45;
292 |
293 | int nthreads = 4;
294 | image *val = calloc(nthreads, sizeof(image));
295 | image *val_resized = calloc(nthreads, sizeof(image));
296 | image *buf = calloc(nthreads, sizeof(image));
297 | image *buf_resized = calloc(nthreads, sizeof(image));
298 | pthread_t *thr = calloc(nthreads, sizeof(pthread_t));
299 |
300 | image input = make_image(net->w, net->h, net->c*2);
301 |
302 | load_args args = {0};
303 | args.w = net->w;
304 | args.h = net->h;
305 | //args.type = IMAGE_DATA;
306 | args.type = LETTERBOX_DATA;
307 |
308 | for(t = 0; t < nthreads; ++t){
309 | args.path = paths[i+t];
310 | args.im = &buf[t];
311 | args.resized = &buf_resized[t];
312 | thr[t] = load_data_in_thread(args);
313 | }
314 | double start = what_time_is_it_now();
315 | for(i = nthreads; i < m+nthreads; i += nthreads){
316 | fprintf(stderr, "%d\n", i);
317 | for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
318 | pthread_join(thr[t], 0);
319 | val[t] = buf[t];
320 | val_resized[t] = buf_resized[t];
321 | }
322 | for(t = 0; t < nthreads && i+t < m; ++t){
323 | args.path = paths[i+t];
324 | args.im = &buf[t];
325 | args.resized = &buf_resized[t];
326 | thr[t] = load_data_in_thread(args);
327 | }
328 | for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
329 | char *path = paths[i+t-nthreads];
330 | char *id = basecfg(path);
331 | copy_cpu(net->w*net->h*net->c, val_resized[t].data, 1, input.data, 1);
332 | flip_image(val_resized[t]);
333 | copy_cpu(net->w*net->h*net->c, val_resized[t].data, 1, input.data + net->w*net->h*net->c, 1);
334 |
335 | network_predict(net, input.data);
336 | int w = val[t].w;
337 | int h = val[t].h;
338 | int num = 0;
339 | detection *dets = get_network_boxes(net, w, h, thresh, .5, map, 0, &num);
340 | if (nms) do_nms_sort(dets, num, classes, nms);
341 | if (coco){
342 | print_cocos(fp, path, dets, num, classes, w, h);
343 | } else if (imagenet){
344 | print_imagenet_detections(fp, i+t-nthreads+1, dets, num, classes, w, h);
345 | } else {
346 | print_detector_detections(fps, id, dets, num, classes, w, h);
347 | }
348 | free_detections(dets, num);
349 | free(id);
350 | free_image(val[t]);
351 | free_image(val_resized[t]);
352 | }
353 | }
354 | for(j = 0; j < classes; ++j){
355 | if(fps) fclose(fps[j]);
356 | }
357 | if(coco){
358 | fseek(fp, -2, SEEK_CUR);
359 | fprintf(fp, "\n]\n");
360 | fclose(fp);
361 | }
362 | fprintf(stderr, "Total Detection Time: %f Seconds\n", what_time_is_it_now() - start);
363 | }
364 |
365 |
366 | void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *outfile)
367 | {
368 | int j;
369 | list *options = read_data_cfg(datacfg);
370 | char *valid_images = option_find_str(options, "valid", "data/train.list");
371 | char *name_list = option_find_str(options, "names", "data/names.list");
372 | char *prefix = option_find_str(options, "results", "results");
373 | char **names = get_labels(name_list);
374 | char *mapf = option_find_str(options, "map", 0);
375 | int *map = 0;
376 | if (mapf) map = read_map(mapf);
377 |
378 | network *net = load_network(cfgfile, weightfile, 0);
379 | set_batch_network(net, 1);
380 | fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
381 | srand(time(0));
382 |
383 | list *plist = get_paths(valid_images);
384 | char **paths = (char **)list_to_array(plist);
385 |
386 | layer l = net->layers[net->n-1];
387 | int classes = l.classes;
388 |
389 | char buff[1024];
390 | char *type = option_find_str(options, "eval", "voc");
391 | FILE *fp = 0;
392 | FILE **fps = 0;
393 | int coco = 0;
394 | int imagenet = 0;
395 | if(0==strcmp(type, "coco")){
396 | if(!outfile) outfile = "coco_results";
397 | snprintf(buff, 1024, "%s/%s.json", prefix, outfile);
398 | fp = fopen(buff, "w");
399 | fprintf(fp, "[\n");
400 | coco = 1;
401 | } else if(0==strcmp(type, "imagenet")){
402 | if(!outfile) outfile = "imagenet-detection";
403 | snprintf(buff, 1024, "%s/%s.txt", prefix, outfile);
404 | fp = fopen(buff, "w");
405 | imagenet = 1;
406 | classes = 200;
407 | } else {
408 | if(!outfile) outfile = "comp4_det_test_";
409 | fps = calloc(classes, sizeof(FILE *));
410 | for(j = 0; j < classes; ++j){
411 | snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]);
412 | fps[j] = fopen(buff, "w");
413 | }
414 | }
415 |
416 |
417 | int m = plist->size;
418 | int i=0;
419 | int t;
420 |
421 | float thresh = .005;
422 | float nms = .45;
423 |
424 | int nthreads = 4;
425 | image *val = calloc(nthreads, sizeof(image));
426 | image *val_resized = calloc(nthreads, sizeof(image));
427 | image *buf = calloc(nthreads, sizeof(image));
428 | image *buf_resized = calloc(nthreads, sizeof(image));
429 | pthread_t *thr = calloc(nthreads, sizeof(pthread_t));
430 |
431 | load_args args = {0};
432 | args.w = net->w;
433 | args.h = net->h;
434 | //args.type = IMAGE_DATA;
435 | args.type = LETTERBOX_DATA;
436 |
437 | for(t = 0; t < nthreads; ++t){
438 | args.path = paths[i+t];
439 | args.im = &buf[t];
440 | args.resized = &buf_resized[t];
441 | thr[t] = load_data_in_thread(args);
442 | }
443 | double start = what_time_is_it_now();
444 | for(i = nthreads; i < m+nthreads; i += nthreads){
445 | fprintf(stderr, "%d\n", i);
446 | for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
447 | pthread_join(thr[t], 0);
448 | val[t] = buf[t];
449 | val_resized[t] = buf_resized[t];
450 | }
451 | for(t = 0; t < nthreads && i+t < m; ++t){
452 | args.path = paths[i+t];
453 | args.im = &buf[t];
454 | args.resized = &buf_resized[t];
455 | thr[t] = load_data_in_thread(args);
456 | }
457 | for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
458 | char *path = paths[i+t-nthreads];
459 | char *id = basecfg(path);
460 | float *X = val_resized[t].data;
461 | network_predict(net, X);
462 | int w = val[t].w;
463 | int h = val[t].h;
464 | int nboxes = 0;
465 | detection *dets = get_network_boxes(net, w, h, thresh, .5, map, 0, &nboxes);
466 | if (nms) do_nms_sort(dets, nboxes, classes, nms);
467 | if (coco){
468 | print_cocos(fp, path, dets, nboxes, classes, w, h);
469 | } else if (imagenet){
470 | print_imagenet_detections(fp, i+t-nthreads+1, dets, nboxes, classes, w, h);
471 | } else {
472 | print_detector_detections(fps, id, dets, nboxes, classes, w, h);
473 | }
474 | free_detections(dets, nboxes);
475 | free(id);
476 | free_image(val[t]);
477 | free_image(val_resized[t]);
478 | }
479 | }
480 | for(j = 0; j < classes; ++j){
481 | if(fps) fclose(fps[j]);
482 | }
483 | if(coco){
484 | fseek(fp, -2, SEEK_CUR);
485 | fprintf(fp, "\n]\n");
486 | fclose(fp);
487 | }
488 | fprintf(stderr, "Total Detection Time: %f Seconds\n", what_time_is_it_now() - start);
489 | }
490 |
491 | void validate_detector_recall(char *cfgfile, char *weightfile)
492 | {
493 | network *net = load_network(cfgfile, weightfile, 0);
494 | set_batch_network(net, 1);
495 | fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
496 | srand(time(0));
497 |
498 | list *plist = get_paths("data/coco_val_5k.list");
499 | char **paths = (char **)list_to_array(plist);
500 |
501 | layer l = net->layers[net->n-1];
502 |
503 | int j, k;
504 |
505 | int m = plist->size;
506 | int i=0;
507 |
508 | float thresh = .001;
509 | float iou_thresh = .5;
510 | float nms = .4;
511 |
512 | int total = 0;
513 | int correct = 0;
514 | int proposals = 0;
515 | float avg_iou = 0;
516 |
517 | for(i = 0; i < m; ++i){
518 | char *path = paths[i];
519 | image orig = load_image_color(path, 0, 0);
520 | image sized = resize_image(orig, net->w, net->h);
521 | char *id = basecfg(path);
522 | network_predict(net, sized.data);
523 | int nboxes = 0;
524 | detection *dets = get_network_boxes(net, sized.w, sized.h, thresh, .5, 0, 1, &nboxes);
525 | if (nms) do_nms_obj(dets, nboxes, 1, nms);
526 |
527 | char labelpath[4096];
528 | find_replace(path, "images", "labels", labelpath);
529 | find_replace(labelpath, "JPEGImages", "labels", labelpath);
530 | find_replace(labelpath, ".jpg", ".txt", labelpath);
531 | find_replace(labelpath, ".JPEG", ".txt", labelpath);
532 |
533 | int num_labels = 0;
534 | box_label *truth = read_boxes(labelpath, &num_labels);
535 | for(k = 0; k < nboxes; ++k){
536 | if(dets[k].objectness > thresh){
537 | ++proposals;
538 | }
539 | }
540 | for (j = 0; j < num_labels; ++j) {
541 | ++total;
542 | box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h};
543 | float best_iou = 0;
544 | for(k = 0; k < l.w*l.h*l.n; ++k){
545 | float iou = box_iou(dets[k].bbox, t);
546 | if(dets[k].objectness > thresh && iou > best_iou){
547 | best_iou = iou;
548 | }
549 | }
550 | avg_iou += best_iou;
551 | if(best_iou > iou_thresh){
552 | ++correct;
553 | }
554 | }
555 |
556 | fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total);
557 | free(id);
558 | free_image(orig);
559 | free_image(sized);
560 | }
561 | }
562 |
563 |
564 | void test_detector_folder(char *datacfg, char *cfgfile, char *weightfile, char *input_folder, char *output_folder, float thresh, float hier_thresh)
565 | {
566 | if( !(input_folder && output_folder) ){
567 | printf("Please Provide Image Folder");
568 | return;
569 | }
570 |
571 | list *options = read_data_cfg(datacfg);
572 | char *name_list = option_find_str(options, "names", "data/names.list");
573 | char **names = get_labels(name_list);
574 |
575 |
576 | image **alphabet = load_alphabet();
577 | network *net = load_network(cfgfile, weightfile, 0);
578 | set_batch_network(net, 1);
579 | srand(2222222);
580 | double time;
581 |
582 | char buff[256],buff2[256];
583 | char *input_img_name = buff;
584 | char *output_file = buff2;
585 | float nms=.45;
586 |
587 | FILE *fp;
588 | int save_result_txt=1;
589 |
590 |
591 | int img_counter=-1;
592 | while(1)
593 | {
594 | img_counter++;
595 | strncpy(input_img_name, input_folder, 256);
596 | char frame_index_c[256];
597 | // sprintf(frame_index_c,"/frame%04d.jpg",img_counter); // Important!!! change file name
598 | sprintf(frame_index_c,"/%04d_rgb_raw.jpg",img_counter); // format into 6 digit
599 | // sprintf(frame_index_c,"/%04d.png",img_counter);
600 |
601 | strcat(input_img_name,frame_index_c);
602 |
603 | if( access( input_img_name, F_OK ) == -1 ) {
604 | printf("Cannot find image %s \n",input_img_name);
605 | break;
606 | }
607 |
608 | strncpy(output_file, output_folder, 256);
609 | if (save_result_txt==1)
610 | {
611 | char frame_index_c3[256];
612 | sprintf(frame_index_c3,"_txts/%04d_yolo2_%.2f.txt",img_counter,thresh); // format into 6 digit
613 | char * result_file=strcat(output_file,frame_index_c3);
614 | // printf("save to txt: %s \n",result_file);
615 | fp = fopen(result_file,"w+");
616 | if (fp==NULL)
617 | {
618 | printf("Cannot save to file %s \n",result_file);
619 | break;
620 | }
621 | }
622 |
623 | strncpy(output_file, output_folder, 256);
624 | char frame_index_c2[256];
625 | sprintf(frame_index_c2,"/%04d_yolo2_%.2f",img_counter,thresh); // format into 6 digit
626 | strcat(output_file,frame_index_c2);
627 |
628 |
629 | image im = load_image_color(input_img_name,0,0);
630 | image sized = letterbox_image(im, net->w, net->h);
631 | layer l = net->layers[net->n-1];
632 |
633 | float *X = sized.data;
634 | time=what_time_is_it_now();
635 | network_predict(net, X);
636 | if (img_counter%10==0)
637 | printf("%s: Predicted in %f seconds.\n", input_img_name, what_time_is_it_now()-time);
638 | int nboxes = 0;
639 | detection *dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes);
640 | if (nms) do_nms_sort(dets, nboxes, l.classes, nms);
641 |
642 | if (save_result_txt==0)
643 | draw_detections(im, dets, nboxes, thresh, names, alphabet, l.classes); // if want to show classes, prob in terminal. See inside function.
644 | else
645 | draw_save_detections(im, dets, nboxes, thresh, names, alphabet, l.classes,fp);
646 |
647 | free_detections(dets, nboxes);
648 | save_image(im, output_file);
649 |
650 |
651 | free_image(im);
652 | free_image(sized);
653 |
654 | if (save_result_txt==1)
655 | fclose(fp);
656 | }
657 | }
658 |
659 |
660 | void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen)
661 | {
662 | list *options = read_data_cfg(datacfg);
663 | char *name_list = option_find_str(options, "names", "data/names.list");
664 | char **names = get_labels(name_list);
665 |
666 | image **alphabet = load_alphabet();
667 | network *net = load_network(cfgfile, weightfile, 0);
668 | set_batch_network(net, 1);
669 | srand(2222222);
670 | double time;
671 | char buff[256];
672 | char *input = buff;
673 | float nms=.45;
674 | while(1){
675 | if(filename){
676 | strncpy(input, filename, 256);
677 | } else {
678 | printf("Enter Image Path: ");
679 | fflush(stdout);
680 | input = fgets(input, 256, stdin);
681 | if(!input) return;
682 | strtok(input, "\n");
683 | }
684 | image im = load_image_color(input,0,0);
685 | image sized = letterbox_image(im, net->w, net->h);
686 | //image sized = resize_image(im, net->w, net->h);
687 | //image sized2 = resize_max(im, net->w);
688 | //image sized = crop_image(sized2, -((net->w - sized2.w)/2), -((net->h - sized2.h)/2), net->w, net->h);
689 | //resize_network(net, sized.w, sized.h);
690 | layer l = net->layers[net->n-1];
691 |
692 |
693 | float *X = sized.data;
694 | time=what_time_is_it_now();
695 | network_predict(net, X);
696 | printf("%s: Predicted in %f seconds.\n", input, what_time_is_it_now()-time);
697 | int nboxes = 0;
698 | detection *dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes);
699 | //printf("%d\n", nboxes);
700 | //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms);
701 | if (nms) do_nms_sort(dets, nboxes, l.classes, nms);
702 | draw_detections(im, dets, nboxes, thresh, names, alphabet, l.classes);
703 | free_detections(dets, nboxes);
704 | if(outfile){
705 | save_image(im, outfile);
706 | }
707 | else{
708 | save_image(im, "predictions");
709 | #ifdef OPENCV
710 | cvNamedWindow("predictions", CV_WINDOW_NORMAL);
711 | if(fullscreen){
712 | cvSetWindowProperty("predictions", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN);
713 | }
714 | show_image(im, "predictions");
715 | cvWaitKey(0);
716 | cvDestroyAllWindows();
717 | #endif
718 | }
719 |
720 | free_image(im);
721 | free_image(sized);
722 | if (filename) break;
723 | }
724 | }
725 |
726 | /*
727 | void censor_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip)
728 | {
729 | #ifdef OPENCV
730 | char *base = basecfg(cfgfile);
731 | network *net = load_network(cfgfile, weightfile, 0);
732 | set_batch_network(net, 1);
733 |
734 | srand(2222222);
735 | CvCapture * cap;
736 |
737 | int w = 1280;
738 | int h = 720;
739 |
740 | if(filename){
741 | cap = cvCaptureFromFile(filename);
742 | }else{
743 | cap = cvCaptureFromCAM(cam_index);
744 | }
745 |
746 | if(w){
747 | cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w);
748 | }
749 | if(h){
750 | cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h);
751 | }
752 |
753 | if(!cap) error("Couldn't connect to webcam.\n");
754 | cvNamedWindow(base, CV_WINDOW_NORMAL);
755 | cvResizeWindow(base, 512, 512);
756 | float fps = 0;
757 | int i;
758 | float nms = .45;
759 |
760 | while(1){
761 | image in = get_image_from_stream(cap);
762 | //image in_s = resize_image(in, net->w, net->h);
763 | image in_s = letterbox_image(in, net->w, net->h);
764 | layer l = net->layers[net->n-1];
765 |
766 | float *X = in_s.data;
767 | network_predict(net, X);
768 | int nboxes = 0;
769 | detection *dets = get_network_boxes(net, in.w, in.h, thresh, 0, 0, 0, &nboxes);
770 | //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms);
771 | if (nms) do_nms_sort(dets, nboxes, l.classes, nms);
772 |
773 | for(i = 0; i < nboxes; ++i){
774 | if(dets[i].prob[class] > thresh){
775 | box b = dets[i].bbox;
776 | int left = b.x-b.w/2.;
777 | int top = b.y-b.h/2.;
778 | censor_image(in, left, top, b.w, b.h);
779 | }
780 | }
781 | show_image(in, base);
782 | cvWaitKey(10);
783 | free_detections(dets, nboxes);
784 |
785 |
786 | free_image(in_s);
787 | free_image(in);
788 |
789 |
790 | float curr = 0;
791 | fps = .9*fps + .1*curr;
792 | for(i = 0; i < skip; ++i){
793 | image in = get_image_from_stream(cap);
794 | free_image(in);
795 | }
796 | }
797 | #endif
798 | }
799 |
800 | void extract_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip)
801 | {
802 | #ifdef OPENCV
803 | char *base = basecfg(cfgfile);
804 | network *net = load_network(cfgfile, weightfile, 0);
805 | set_batch_network(net, 1);
806 |
807 | srand(2222222);
808 | CvCapture * cap;
809 |
810 | int w = 1280;
811 | int h = 720;
812 |
813 | if(filename){
814 | cap = cvCaptureFromFile(filename);
815 | }else{
816 | cap = cvCaptureFromCAM(cam_index);
817 | }
818 |
819 | if(w){
820 | cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w);
821 | }
822 | if(h){
823 | cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h);
824 | }
825 |
826 | if(!cap) error("Couldn't connect to webcam.\n");
827 | cvNamedWindow(base, CV_WINDOW_NORMAL);
828 | cvResizeWindow(base, 512, 512);
829 | float fps = 0;
830 | int i;
831 | int count = 0;
832 | float nms = .45;
833 |
834 | while(1){
835 | image in = get_image_from_stream(cap);
836 | //image in_s = resize_image(in, net->w, net->h);
837 | image in_s = letterbox_image(in, net->w, net->h);
838 | layer l = net->layers[net->n-1];
839 |
840 | show_image(in, base);
841 |
842 | int nboxes = 0;
843 | float *X = in_s.data;
844 | network_predict(net, X);
845 | detection *dets = get_network_boxes(net, in.w, in.h, thresh, 0, 0, 1, &nboxes);
846 | //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms);
847 | if (nms) do_nms_sort(dets, nboxes, l.classes, nms);
848 |
849 | for(i = 0; i < nboxes; ++i){
850 | if(dets[i].prob[class] > thresh){
851 | box b = dets[i].bbox;
852 | int size = b.w*in.w > b.h*in.h ? b.w*in.w : b.h*in.h;
853 | int dx = b.x*in.w-size/2.;
854 | int dy = b.y*in.h-size/2.;
855 | image bim = crop_image(in, dx, dy, size, size);
856 | char buff[2048];
857 | sprintf(buff, "results/extract/%07d", count);
858 | ++count;
859 | save_image(bim, buff);
860 | free_image(bim);
861 | }
862 | }
863 | free_detections(dets, nboxes);
864 |
865 |
866 | free_image(in_s);
867 | free_image(in);
868 |
869 |
870 | float curr = 0;
871 | fps = .9*fps + .1*curr;
872 | for(i = 0; i < skip; ++i){
873 | image in = get_image_from_stream(cap);
874 | free_image(in);
875 | }
876 | }
877 | #endif
878 | }
879 | */
880 |
881 | /*
882 | void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, detection *dets)
883 | {
884 | network_predict_image(net, im);
885 | layer l = net->layers[net->n-1];
886 | int nboxes = num_boxes(net);
887 | fill_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 0, dets);
888 | if (nms) do_nms_sort(dets, nboxes, l.classes, nms);
889 | }
890 | */
891 |
892 | void run_detector(int argc, char **argv)
893 | {
894 | char *prefix = find_char_arg(argc, argv, "-prefix", 0);
895 | float thresh = find_float_arg(argc, argv, "-thresh", .5);
896 | float hier_thresh = find_float_arg(argc, argv, "-hier", .5);
897 | int cam_index = find_int_arg(argc, argv, "-c", 0);
898 | int frame_skip = find_int_arg(argc, argv, "-s", 0);
899 | int avg = find_int_arg(argc, argv, "-avg", 3);
900 | if(argc < 4){
901 | fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
902 | return;
903 | }
904 | char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);
905 | char *outfile = find_char_arg(argc, argv, "-out", 0);
906 | int *gpus = 0;
907 | int gpu = 0;
908 | int ngpus = 0;
909 | if(gpu_list){
910 | printf("%s\n", gpu_list);
911 | int len = strlen(gpu_list);
912 | ngpus = 1;
913 | int i;
914 | for(i = 0; i < len; ++i){
915 | if (gpu_list[i] == ',') ++ngpus;
916 | }
917 | gpus = calloc(ngpus, sizeof(int));
918 | for(i = 0; i < ngpus; ++i){
919 | gpus[i] = atoi(gpu_list);
920 | gpu_list = strchr(gpu_list, ',')+1;
921 | }
922 | } else {
923 | gpu = gpu_index;
924 | gpus = &gpu;
925 | ngpus = 1;
926 | }
927 |
928 | int clear = find_arg(argc, argv, "-clear");
929 | int fullscreen = find_arg(argc, argv, "-fullscreen");
930 | int width = find_int_arg(argc, argv, "-w", 0);
931 | int height = find_int_arg(argc, argv, "-h", 0);
932 | int fps = find_int_arg(argc, argv, "-fps", 0);
933 | //int class = find_int_arg(argc, argv, "-class", 0);
934 |
935 | char *datacfg = argv[3];
936 | char *cfg = argv[4];
937 | char *weights = (argc > 5) ? argv[5] : 0;
938 | char *filename = (argc > 6) ? argv[6]: 0;
939 | if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile, fullscreen);
940 | else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear);
941 | else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile);
942 | else if(0==strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile);
943 | else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights);
944 | else if(0==strcmp(argv[2], "demo")) {
945 | list *options = read_data_cfg(datacfg);
946 | int classes = option_find_int(options, "classes", 20);
947 | char *name_list = option_find_str(options, "names", "data/names.list");
948 | char **names = get_labels(name_list);
949 | demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, avg, hier_thresh, width, height, fps, fullscreen);
950 | }
951 | //else if(0==strcmp(argv[2], "extract")) extract_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip);
952 | //else if(0==strcmp(argv[2], "censor")) censor_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip);
953 | }
954 |
--------------------------------------------------------------------------------
/preprocessing/README.md:
--------------------------------------------------------------------------------
1 |
2 | We explain preprocessing steps more details here. Feel free to contact if you have problems running them.
3 |
4 |
5 | ### 2D object detection
6 | We use [Yolo](https://pjreddie.com/darknet/yolo/) to detect 2D object bounding box, which can be replaced by other algorithms. We add functions to batch predict all images under a folder and save results to images and txts. See the modification code under ```2D_object_detect```. Might need to change rgb file name in line 495 of detector.c
7 |
8 |
9 | Create two folders to save results of images and txts: ```yolov2_obj``` ```yolov2_obj_txts```. After compiling Yolo, run command:
10 |
11 | ```bash
12 | ./darknet detect_folder cfg/yolo.cfg trained_model/yolo.weights path/to/rgb path/to/yolov2_obj -thresh 0.25
13 | ```
14 |
15 | Pay attention to the cfg and weights for different yolo version.
16 |
--------------------------------------------------------------------------------
/utils/VP_support_edge_infos.m:
--------------------------------------------------------------------------------
1 | function all_vp_bound_edge_angles = VP_support_edge_infos(VPs,edge_mid_pts,edge_angles,vp_support_angle_thres)
2 | % VPs 3*2 three VP edge_mid_pts: n*2 edge_angles: n*1 vp_support_angle_thres 1*2
3 | % output: 3*2 each row is a VP's two boundary supported edges' angle. if not found, nan for that entry
4 | % [edge_angles all_vp_bound_edge_angles] are all [-pi, pi]
5 |
6 | if (size(edge_mid_pts,1)>0)
7 | vp_1 = VPs(1,:);vp_2 = VPs(2,:);vp_3 = VPs(3,:);
8 | vp12_edge_angle_thre=vp_support_angle_thres(1);
9 | vp3_edge_angle_thre=vp_support_angle_thres(2);
10 |
11 | % first find edges that are supported by (pass through) VP
12 | vp1_edge_midpt_angle_raw=atan2( edge_mid_pts(:,2)-vp_1(2), edge_mid_pts(:,1)-vp_1(1));
13 | vp1_edge_midpt_angle_norm = normalize_to_pi(vp1_edge_midpt_angle_raw,true); % [-pi/2 pi/2] angle of [vp, edge middle pt]
14 | angle_diff_1 = abs(edge_angles-vp1_edge_midpt_angle_norm); % [0 pi]
15 | angle_diff_1 = min([angle_diff_1,pi-angle_diff_1],[],2);
16 | vp1_inlier_edge_id = find(angle_diff_1 < vp12_edge_angle_thre/180*pi);
17 |
18 | vp2_edge_midpt_angle_raw=atan2( edge_mid_pts(:,2)-vp_2(2), edge_mid_pts(:,1)-vp_2(1));
19 | vp2_edge_midpt_angle_norm = normalize_to_pi(vp2_edge_midpt_angle_raw,true);
20 | angle_diff_2 = abs(edge_angles-vp2_edge_midpt_angle_norm);
21 | angle_diff_2 = min([angle_diff_2,pi-angle_diff_2],[],2);
22 | vp2_inlier_edge_id = find(angle_diff_2 < vp12_edge_angle_thre/180*pi);
23 |
24 | if ~isnan(vp_3(1)) % nan because vp_3 is vertically infinity
25 | vp3_edge_midpt_angle_raw = atan2(edge_mid_pts(:,2)-vp_3(2), edge_mid_pts(:,1)-vp_3(1));
26 | else
27 | vp3_edge_midpt_angle_raw = pi/2*ones(size(edge_angles));
28 | end
29 | vp3_edge_midpt_angle_norm = normalize_to_pi(vp3_edge_midpt_angle_raw,true);
30 |
31 | angle_diff_3 = abs(edge_angles-vp3_edge_midpt_angle_norm);
32 | angle_diff_3 = min([angle_diff_3,pi-angle_diff_3],[],2);
33 | vp3_inlier_edge_id = find(angle_diff_3 < vp3_edge_angle_thre/180*pi);
34 |
35 |
36 | % find VP supported Boundary Edges. % TODO min/max actually needs to change if want to vp1 in left/right is different min/max reversed
37 | vp1_edge_midpt_angle_raw_inlier = vp1_edge_midpt_angle_raw(vp1_inlier_edge_id,:);
38 | vp1_edge_midpt_angle_raw_inlier_shift = smooth_jump_angles(vp1_edge_midpt_angle_raw_inlier);
39 | [~,vp1_low_edge_id] = max(vp1_edge_midpt_angle_raw_inlier_shift); % image coordinate goes down, yaw clockwise large. % NOTE if vp1 on the right. should switch min/max....
40 | vp1_low_edge_angle = edge_angles(vp1_inlier_edge_id(vp1_low_edge_id)); % it will be 0*1 matrix if not found inlier edges.
41 | [~,vp1_top_edge_id] = min(vp1_edge_midpt_angle_raw_inlier_shift);
42 | vp1_top_edge_angle = edge_angles(vp1_inlier_edge_id(vp1_top_edge_id));
43 |
44 | vp2_edge_midpt_angle_raw_inlier = vp2_edge_midpt_angle_raw(vp2_inlier_edge_id,:);
45 | vp2_edge_midpt_angle_raw_inlier_shift = smooth_jump_angles(vp2_edge_midpt_angle_raw_inlier);
46 | [~,vp2_low_edge_id] = min(vp2_edge_midpt_angle_raw_inlier_shift);
47 | vp2_low_edge_angle = edge_angles(vp2_inlier_edge_id(vp2_low_edge_id));
48 | [~,vp2_top_edge_id] = max(vp2_edge_midpt_angle_raw_inlier_shift);
49 | vp2_top_edge_angle = edge_angles(vp2_inlier_edge_id(vp2_top_edge_id));
50 |
51 | vp3_edge_midpt_angle_raw_inlier = vp3_edge_midpt_angle_raw(vp3_inlier_edge_id,:);
52 | vp3_edge_midpt_angle_raw_inlier_shift = smooth_jump_angles(vp3_edge_midpt_angle_raw_inlier);
53 | [~,vp3_left_edge_id] = min(vp3_edge_midpt_angle_raw_inlier_shift);
54 | vp3_left_edge_angle = edge_angles(vp3_inlier_edge_id(vp3_left_edge_id));
55 | [~,vp3_right_edge_id] = max(vp3_edge_midpt_angle_raw_inlier_shift);
56 | vp3_right_edge_angle = edge_angles(vp3_inlier_edge_id(vp3_right_edge_id));
57 | else
58 | vp1_low_edge_angle=[];vp1_top_edge_angle=[];
59 | vp2_low_edge_angle=[];vp2_top_edge_angle=[];
60 | vp3_left_edge_angle=[];vp3_right_edge_angle=[];
61 | end
62 |
63 | if size(vp1_low_edge_angle,1)==0 % if no inlier edges. possibly because of too strict edge support threshold
64 | vp1_low_edge_angle=nan;
65 | end
66 | if size(vp1_top_edge_angle,1)==0
67 | vp1_top_edge_angle=nan;
68 | end
69 | if size(vp2_low_edge_angle,1)==0
70 | vp2_low_edge_angle=nan;
71 | end
72 | if size(vp2_top_edge_angle,1)==0
73 | vp2_top_edge_angle=nan;
74 | end
75 | if size(vp3_left_edge_angle,1)==0
76 | vp3_left_edge_angle=nan;
77 | end
78 | if size(vp3_right_edge_angle,1)==0
79 | vp3_right_edge_angle=nan;
80 | end
81 |
82 | all_vp_bound_edge_angles = [vp1_low_edge_angle vp1_top_edge_angle;
83 | vp2_low_edge_angle vp2_top_edge_angle;
84 | vp3_left_edge_angle vp3_right_edge_angle];
85 |
86 |
--------------------------------------------------------------------------------
/utils/box_edge_alignment_angle_error.m:
--------------------------------------------------------------------------------
1 | function total_angle_diff = box_edge_alignment_angle_error(all_vp_bound_edge_angles,vps_box_edge_pt_ids,box_corners_2d)
2 | % evaluate box proposal by computing the angle difference of projected cuboid edge with VP aligned image edges.
3 | % all_vp_bound_edge_angles: VP aligned actual image angles. 3*2 if not found, nan. box_corners_2d: 2*8
4 | % vps_box_edge_pt_ids: % six edges. each row represents two edges [e1_1 e1_2 e2_1 e2_2;...] of one VP
5 |
6 | total_angle_diff = 0;
7 | not_found_penalty = 30/180*pi*2; % if not found any VP supported lines, give each box edge a constant cost (45 or 30 ? degree)
8 | for vp_id=1:size(vps_box_edge_pt_ids,1);
9 | vp_bound_angles=all_vp_bound_edge_angles(vp_id,:);
10 | vp_bound_angles=vp_bound_angles(~isnan((vp_bound_angles))); % row vector, will be 1*2 or 1*1 or 1*0
11 | found_matching_edge = false;
12 | if (size(vp_bound_angles,2)>0) % exist valid edges
13 | if (vps_box_edge_pt_ids(vp_id,1)>0)
14 | for ee_id =1:2 % find cloeset from two boundary edges. we could also do left-left right-right compare. but pay close attention different vp locations
15 | found_matching_edge = true;
16 | two_box_corners = box_corners_2d(:, vps_box_edge_pt_ids(vp_id,2*ee_id-1:2*ee_id) ); % [ x1 x2;y1 y2 ]
17 | box_edge_angle = normalize_to_pi( atan2(two_box_corners(2,2)-two_box_corners(2,1), two_box_corners(1,2)-two_box_corners(1,1)),true ); % [-pi/2 pi/2]
18 | angle_diff_temp = abs(box_edge_angle - vp_bound_angles); %vp_bound_angles is already in [-pi, pi/2]
19 | % Edge distance could also be used. but might cause big error if edge is not detected or broken... and also if there is actually no edge, for example, chair might not have the ground contact edges.
20 | angle_diff_temp = min(min( [angle_diff_temp;pi-angle_diff_temp],[],2));
21 | total_angle_diff=total_angle_diff+angle_diff_temp;
22 | end
23 | end
24 | end
25 | if (~found_matching_edge)
26 | angle_diff_temp = not_found_penalty;
27 | total_angle_diff=total_angle_diff+angle_diff_temp;
28 | end
29 | end
--------------------------------------------------------------------------------
/utils/box_edge_sum_dists.m:
--------------------------------------------------------------------------------
1 | function sum_dist = box_edge_sum_dists(dist_map,box_corners_2d,edge_pt_ids, reweight_edge_distance)
2 | % sample some points on the edge then sum up distance from dist_map
3 | % input: visible_edge_pt_ids is n*2 each row stores an edge's two end point's index from box_corners_2d
4 |
5 | % if weight_configs: for configuration 1, there are more visible edges compared to configuration2, so we need to re-weight config2
6 | % [1 2;2 3;3 4;4 1;2 6;3 5;5 6] reweight vertical edge id 5-6 by 2/3, horizontal edge id 7 by 1/2
7 |
8 | sum_dist=0;
9 | if (nargin<4)
10 | reweight_edge_distance=false;
11 | end
12 | for edge_id=1:size(edge_pt_ids,1)
13 | corner_tmp1=box_corners_2d(:, edge_pt_ids(edge_id,1));
14 | corner_tmp2=box_corners_2d(:, edge_pt_ids(edge_id,2));
15 | for sample_ind=0:1:10 % for each line, sample 10 points on this line,then retrieve each point's distance to canny edge.
16 | sample_pt = round(sample_ind/10.0*corner_tmp1+(1-sample_ind/10.0)*corner_tmp2);
17 | dist1 = dist_map(sample_pt(2),sample_pt(1));
18 | if (reweight_edge_distance) % two different configurations have different number of edges directly summation is not fair true in config2
19 | % if (5<=edge_id) && (edge_id<=7)
20 | % dist1=dist1*2/3;
21 | % end
22 | % if (8<=edge_id)
23 | % dist1=dist1*1/2;
24 | % end
25 |
26 | if (5<=edge_id) && (edge_id<=6)
27 | dist1=dist1*3/2;
28 | end
29 | if (7==edge_id)
30 | dist1=dist1*2;
31 | end
32 | end
33 | sum_dist=sum_dist+dist1;
34 | end
35 | end
36 | end
--------------------------------------------------------------------------------
/utils/box_edge_sum_dists2.m:
--------------------------------------------------------------------------------
1 | function mean_dist = box_edge_sum_dists2(dist_map,box_corners_2d,edge_pt_ids, reweight_edge_distance)
2 | % sample some points on the edge then sum up distance from dist_map
3 | % input: visible_edge_pt_ids is n*2 each row stores an edge's two end point's index from box_corners_2d
4 |
5 | % if weight_configs: for configuration 1, there are more visible edges compared to configuration2, so we need to re-weight config2
6 | % [1 2;2 3;3 4;4 1;2 6;3 5;5 6] reweight vertical edge id 5-6 by 2/3, horizontal edge id 7 by 1/2
7 |
8 | % compared to box_edge_sum_dists(), longer edges sample more points! instead of fix 10
9 |
10 | sum_dist=0;
11 | if (nargin<4)
12 | reweight_edge_distance=false;
13 | end
14 | total_pt_num=0;
15 | for edge_id=1:size(edge_pt_ids,1)
16 | corner_tmp1=box_corners_2d(:, edge_pt_ids(edge_id,1));
17 | corner_tmp2=box_corners_2d(:, edge_pt_ids(edge_id,2));
18 | % sample points on this edge, every 10 pixel length, sample a point.
19 | edge_length = norm(corner_tmp2-corner_tmp1);
20 | sample_num = round(edge_length/10.0);
21 | total_pt_num = total_pt_num+sample_num;
22 | for sample_ind=1:sample_num
23 | sample_pt = round(sample_ind/sample_num*corner_tmp1+(1-sample_ind/sample_num)*corner_tmp2);
24 | dist1 = dist_map(sample_pt(2),sample_pt(1));
25 | if (reweight_edge_distance) % two different configurations have different number of edges true in config2
26 | % if (5<=edge_id) && (edge_id<=7)
27 | % dist1=dist1*2/3;
28 | % end
29 | % if (8<=edge_id)
30 | % dist1=dist1*1/2;
31 | % end
32 |
33 | if (5<=edge_id) && (edge_id<=6)
34 | dist1=dist1*3/2;
35 | end
36 | if (7==edge_id)
37 | dist1=dist1*2;
38 | end
39 | end
40 | sum_dist=sum_dist+dist1;
41 | end
42 | end
43 | mean_dist = sum_dist/total_pt_num;
44 | end
--------------------------------------------------------------------------------
/utils/change_2d_corner_to_3d_object.m:
--------------------------------------------------------------------------------
1 | function sample_obj = change_2d_corner_to_3d_object(box_corners_2d_float,configs, ground_plane_sensor, transToWolrd, invK,projectionMatrix,sanity_check)
2 | % unproject image corners to 3D ground, to form cuboid struct. box_corners_2d_float is 2*8 image pixels.
3 |
4 | obj_gnd_pt_world_3d = plane_hits_3d(transToWolrd,invK,ground_plane_sensor,box_corners_2d_float(:,5:8)); % 3*n each column is a 3D point floating point
5 | % figure();plot(obj_gnd_pt_world_3d(1,:),obj_gnd_pt_world_3d(2,:));axis equal
6 |
7 | length_half = norm(obj_gnd_pt_world_3d(1:2,1)-obj_gnd_pt_world_3d(1:2,4))/2; % along object x direction corner 5-8
8 | width_half = norm(obj_gnd_pt_world_3d(1:2,1)-obj_gnd_pt_world_3d(1:2,2))/2; % along object y direction corner 5-6
9 |
10 | partwall_plane_world = get_wall_plane_equation([obj_gnd_pt_world_3d(:,1)' obj_gnd_pt_world_3d(:,2)']); % to compute height, need to unproject-hit-planes formed by 5-6 corner
11 | partwall_plane_sensor = partwall_plane_world*transToWolrd; % wall plane in sensor frame
12 | obj_top_pt_world_3d = plane_hits_3d(transToWolrd,invK,partwall_plane_sensor,box_corners_2d_float(:,2)); % should match obj_gnd_pt_world_3d
13 | height_half = obj_top_pt_world_3d(3,1)/2;
14 |
15 | mean_obj_x = mean(obj_gnd_pt_world_3d(1,:)); mean_obj_y = mean(obj_gnd_pt_world_3d(2,:));
16 |
17 | vp_1_position = configs(2); yaw_esti = configs(3);
18 | sample_obj.pos = [mean_obj_x,mean_obj_y,height_half]; sample_obj.rotY = yaw_esti;
19 | sample_obj.scale = [length_half,width_half,height_half];
20 | sample_obj.box_config_type = configs(1:2);
21 |
22 | if (vp_1_position==1) % vp1 on left, for all configurations
23 | cuboid_to_boxstructIds=[6 5 8 7 2 3 4 1]; % IMPORTANT!!! corner oder during generation is different from final universal cuboid struct
24 | end
25 | if (vp_1_position==2) % vp1 on right, for all configurations
26 | cuboid_to_boxstructIds=[5 6 7 8 3 2 1 4];
27 | end
28 |
29 | if (configs(1)==5)
30 | vp_2_position = configs(2);
31 | if (vp_2_position==1) % vp2 on left
32 | cuboid_to_boxstructIds=[7 8 5 6 1 4 3 2];
33 | end
34 | if (vp_2_position==2) % vp2 on right
35 | cuboid_to_boxstructIds=[8 7 6 5 4 1 2 3];
36 | end
37 | end
38 |
39 |
40 | box_corners_2d_int = round(box_corners_2d_float);
41 | sample_obj.mymodel=1;
42 | sample_obj.box_corners_2d = box_corners_2d_int(:,cuboid_to_boxstructIds); % 2*N x;y
43 | sample_obj.box_corners_3d_world = compute3D_BoxCorner(sample_obj); % should match obj_gnd_pt_world_3d this is my coordinate system, corner indexing is different
44 |
45 | % figure();
46 | % plot(obj_gnd_pt_world_3d(1,:),obj_gnd_pt_world_3d(2,:));axis equal % the these two rectangles should match
47 | % plot(sample_obj.box_corners_3d_world(1,:),sample_obj.box_corners_3d_world(2,:),'b','Linewidth',2.5); xlabel('x');ylabel('y'); pause(0.5);
48 |
49 | % check if the cuboid corners, position, indexing are correct
50 | if (sanity_check)
51 | box_corners_2d_method_2 = round(projectToImage(sample_obj.box_corners_3d_world,projectionMatrix)); % should match the above box_corners_2d exactly!!
52 | if (any(sample_obj.box_corners_2d ~= box_corners_2d_method_2)) % if found non-equal number
53 | msg = 'Change to 3D box Sanity Check Failed !!!!';
54 | error(msg);
55 | box_corners_2d_method_2
56 | sample_obj.box_corners_2d
57 | disp(msg);
58 | sample_obj=[];
59 | end
60 | end
--------------------------------------------------------------------------------
/utils/common_utils/bbox_overlap_ratio.m:
--------------------------------------------------------------------------------
1 | function [overlap_1,overlap_2]= bbox_overlap_ratio(bboxA,bboxB)
2 | % overlapping ratio of two bbox. each is 1*4 [x y width height] compute: union/Area(1) union/Area(2)
3 |
4 | intersectionArea = rectint(bboxA,bboxB);
5 | overlap_1 = intersectionArea/bboxA(3)/bboxA(4);
6 | overlap_2 = intersectionArea/bboxB(3)/bboxB(4);
--------------------------------------------------------------------------------
/utils/common_utils/check_inside_box.m:
--------------------------------------------------------------------------------
1 | function whether_inside = check_inside_box( pt, box_left_top, box_right_bottom)
2 | % check whether point lies inside rectangle box input are three 2D points [x y]
3 |
4 | whether_inside = box_left_top(1)<=pt(1) && pt(1)<=box_right_bottom(1) && box_left_top(2)<=pt(2) && pt(2)<=box_right_bottom(2);
5 |
--------------------------------------------------------------------------------
/utils/common_utils/getVanishingPoints.m:
--------------------------------------------------------------------------------
1 | function [vp_1,vp_2,vp_3] = getVanishingPoints(Kalib, invR, yaw)
2 | % word frame is on ground (xy on ground, z vertical up): assume object lies on ground, with zero roll, pitch. invR is world to camera rotation. yaw is object
3 |
4 | vp_1 = (homo_to_real_coord(Kalib*invR*[cos(yaw) sin(yaw) 0]'))'; % for object x axis
5 | vp_2 = (homo_to_real_coord(Kalib*invR*[-sin(yaw) cos(yaw) 0]'))'; % for object y axis
6 | vp_3 = (homo_to_real_coord(Kalib*invR*[0 0 1]'))'; % for object z axis
7 |
--------------------------------------------------------------------------------
/utils/common_utils/get_wall_plane_equation.m:
--------------------------------------------------------------------------------
1 | function plane_equation = get_wall_plane_equation(ground_seg3d_line_world)
2 | % input: 1*6 a wall line segment in 3D ground. [x1 y1 z1 x2 y2 z2] z1=z2=0 wall is vertical to ground
3 |
4 | partwall_normal_world=cross(ground_seg3d_line_world(1,1:3)-ground_seg3d_line_world(1,4:6),[0,0,1]); % [0,0,1] is world ground plane normal
5 | partwall_normal_world=partwall_normal_world/norm(partwall_normal_world);
6 | dist=-partwall_normal_world*ground_seg3d_line_world(1,1:3)';
7 | plane_equation=[partwall_normal_world dist]; % wall plane in world frame
8 | if (dist<0)
9 | plane_equation=-plane_equation; % make all the normal pointing inside the room. neamly, pointing to the camera
10 | end
11 |
--------------------------------------------------------------------------------
/utils/common_utils/normalize_to_pi.m:
--------------------------------------------------------------------------------
1 | function new_angle = normalize_to_pi(angle,whether_radian)
2 | %change angle from [-180,180] to [-90,90] through +-180 used to change line pointing to right.
3 | % if whether_radian: everything is changed to degree, instead of radian
4 |
5 | if (whether_radian)
6 | angle=angle/pi*180;
7 | end
8 |
9 | new_angle=angle;
10 | for ii=1:length(angle)
11 | if (angle(ii)>90)
12 | new_angle(ii) = angle(ii)-180; % change to -90 ~90
13 | else if (angle(ii)<-90)
14 | new_angle(ii) = angle(ii)+180;
15 | else
16 | new_angle(ii) = angle(ii);
17 | end
18 | end
19 | end
20 |
21 | if (whether_radian)
22 | new_angle=new_angle/180*pi;
23 | end
24 | end
25 |
--------------------------------------------------------------------------------
/utils/common_utils/plane_hits_3d.m:
--------------------------------------------------------------------------------
1 | function pts_3d_world=plane_hits_3d(transToworld,invK,plane_sensor,pixels)
2 | % compute pixel ray interesection with 3D plane in sensor frame. rays originates from camera center to pixel.
3 | % transToworld: 4*4 camera pose. invK: inverse of calibration matrix. plane: 1*4 plane equation in sensor frame.
4 | % pixels 2*n; each column is a pt [x;y] x is horizontal,y is vertical outputs: points 3*n in world frame
5 |
6 | pts_ray=invK*[pixels;ones(1,size(pixels,2))]; % 3*n
7 |
8 | pts_3d_sensor=ray_plane_interact(pts_ray,plane_sensor);
9 | pts_3d_homo_sensor=real_to_homo_coord(pts_3d_sensor);
10 |
11 | pts_3d_homo_world=transToworld*pts_3d_homo_sensor; % compute world ground polygons.
12 | pts_3d_world=homo_to_real_coord(pts_3d_homo_world);
13 | end
14 |
--------------------------------------------------------------------------------
/utils/common_utils/ray_plane_interact.m:
--------------------------------------------------------------------------------
1 | function [intersections,frac]=ray_plane_interact(rays,plane)
2 | % compute ray intersections with plane.
3 | % rays is 3*n, each column is a ray vector endpoint staring from origin (0). plane is 1*4. all in sensor frame
4 |
5 | nume=-plane(4);
6 | denom = plane(1:3)*rays;
7 | frac=repmat(nume,1,size(denom,2))./denom; % 1*n
8 | intersections=repmat(frac,3,1).*rays; %3*n
9 | end
10 |
--------------------------------------------------------------------------------
/utils/common_utils/smooth_jump_angles.m:
--------------------------------------------------------------------------------
1 | function new_angles = smooth_jump_angles(raw_angles)
2 | % remove the jumping angles from -pi to pi. to make the raw angles smoothly change, to find the outmost or angle ray easily.
3 |
4 | new_angles = raw_angles;
5 | if (length(raw_angles)==0)
6 | return;
7 | end
8 |
9 | angle_base = raw_angles(1); % choose a new base angle. (assume that the all the angles lie in [-pi pi] around the base)
10 | for i=1:length(raw_angles)
11 | if ( (raw_angles(i)-angle_base)<-pi )
12 | new_angles(i) = raw_angles(i)+2*pi;
13 | elseif ( (raw_angles(i)-angle_base)>pi )
14 | new_angles(i) = raw_angles(i)-2*pi;
15 | end
16 | end
17 | end
18 |
--------------------------------------------------------------------------------
/utils/cuboid_utils/compute3D_BoxCorner.m:
--------------------------------------------------------------------------------
1 | function [corners_3d_world] = compute3D_BoxCorner(object)
2 | % output is 3*8, face_idx is fixed, depending on how we define front and back face.
3 | % 8 ————5
4 | % /| /|
5 | % 7 4---6-1
6 | % |/ |/
7 | % 3 ————2
8 | % cuboid frame definition: x y at bottom center. x rightward, y innerward, z upward.
9 | % this cuboid frame and indexing/ordering is unified across all configurations!
10 |
11 |
12 | Tr = similarityTransformation(object); % [R*s t';0 0 0 1]
13 |
14 | % 3D bounding box corners
15 | x_corners = [1, 1, -1, -1, 1, 1, -1, -1]; % [-1 1] represent the boundary because object scale is half height/width/lenght!
16 | y_corners = [1, -1, -1, 1, 1, -1, -1, 1];
17 | z_corners = [-1, -1, -1, -1, 1, 1, 1, 1];
18 | corners_body=[x_corners;y_corners;z_corners];
19 | corners_3d_world=homo_to_real_coord(Tr*real_to_homo_coord(corners_body));
20 |
21 | end
--------------------------------------------------------------------------------
/utils/cuboid_utils/get_cuboid_draw_edge_markers.m:
--------------------------------------------------------------------------------
1 | function [edge_markers, line_marker_type] = get_cuboid_draw_edge_markers(box_config_type, final_universal_object)
2 | % output: edge_markers each row [ edge_start_pt_id, edge_end_pt_id, edge_marker_type_id in line_marker_type ]
3 | % box_config_type [configuration_id, vp_1_on_left_or_right] cuboid struct has this field.
4 |
5 | if (nargin==1)
6 | final_universal_object = true;
7 | end
8 | line_marker_type={'r','r--','g','g--','b','b--'};
9 | [visible_edge_pts, hidden_edge_pts] = get_object_edge_visibility(box_config_type, final_universal_object);
10 | edge_markers = [hidden_edge_pts; visible_edge_pts]; % draw hidden edge first
11 |
12 | if (final_universal_object) % final saved cuboid struct
13 | if (box_config_type(1)==1)
14 | if (box_config_type(2)==1)
15 | edge_markers = [edge_markers [4 2 6 3 1 5 5 5 3 1 3 1]' ]; % each row: edge_start_id,edge_end_id,edge_marker_type_id
16 | else
17 | edge_markers = [edge_markers [2 4 6 3 1 5 5 5 3 1 3 1]' ];
18 | end
19 | end
20 | if (box_config_type(1)==2)
21 | edge_markers = [edge_markers [2 4 2 6 6 3 5 5 3 1 3 1]' ];
22 | end
23 | if (box_config_type(1)==5)
24 | edge_markers = [edge_markers [2 4 2 2 4 2 6 6 3 5 5 3]' ];
25 | end
26 | else % 2D box corners index only used in cuboids genetation process
27 | if (box_config_type(1)==1)
28 | edge_markers = [edge_markers [4 2 6 1 3 1 3 5 5 5 1 3]' ];
29 | else
30 | edge_markers = [edge_markers [4 2 6 6 2 1 3 1 3 5 5 3]' ];
31 | end
32 | end
33 | end
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/utils/cuboid_utils/get_object_edge_visibility.m:
--------------------------------------------------------------------------------
1 | function [visible_edge_pts,hidden_edge_pts] = get_object_edge_visibility(box_config_type, final_universal_object)
2 | % Output: n*2 each row is a edge's start and end pt id.
3 | % box_config_type [configuration_id, vp_1_on_left_or_right] cuboid struct has this field.
4 |
5 | if (nargin==1)
6 | final_universal_object = true;
7 | end
8 |
9 | if (final_universal_object) % final saved cuboid struct
10 | if (box_config_type(1)==1) % look at get_cuboid_face_ids to know the faces and pt id using my old box format
11 | if (box_config_type(2)==1)
12 | visible_edge_pts=[1 2;2 3;2 6;1 5;3 7;5 6;6 7;7 8;8 5];
13 | hidden_edge_pts=[3 4;4 1;4 8];
14 | else
15 | visible_edge_pts=[1 2;1 4;2 6;1 5;4 8;5 6;6 7;7 8;8 5];
16 | hidden_edge_pts=[2 3;3 4;3 7];
17 | end
18 | end
19 | if (box_config_type(1)==2)
20 | visible_edge_pts=[1 2;2 6;1 5;5 6;6 7;7 8;8 5];
21 | hidden_edge_pts=[2 3;3 4;4 1;3 7;4 8];
22 | end
23 | if (box_config_type(1)==5)
24 | visible_edge_pts=[1 2;2 6;1 5;5 6];
25 | hidden_edge_pts=[6 7;7 8;8 5;2 3;3 4;4 1;3 7;4 8];
26 | end
27 | else % 2D box corners index only used in cuboids genetation process
28 | if (box_config_type(1)==1)
29 | visible_edge_pts = [1 2;2 3;3 4;4 1;2 6;3 5;4 8;5 8;5 6];
30 | hidden_edge_pts = [7 8;7 6;7 1];
31 | else
32 | visible_edge_pts = [1 2;2 3;3 4;4 1;2 6;3 5;5 6];
33 | hidden_edge_pts = [7 8;7 6;7 1;8 4;8 5];
34 | end
35 | end
36 |
37 |
--------------------------------------------------------------------------------
/utils/cuboid_utils/similarityTransformation.m:
--------------------------------------------------------------------------------
1 | function Tr = similarityTransformation(object)
2 | % get 4*4 similarity transform [sR t;0 1]. from cuboid frame to world frame.
3 |
4 | % rotY is object yaw angle.
5 | R = [cos(object.rotY) -sin(object.rotY) 0; sin(object.rotY) cos(object.rotY) 0; 0 0 1];
6 |
7 | % scale matrix
8 | S = diag(object.scale);
9 |
10 | if (size(object.pos,2)==3) % row vec
11 | Tr = [R*S object.pos'; 0 0 0 1];
12 | else % column vec
13 | Tr = [R*S object.pos; 0 0 0 1];
14 | end
--------------------------------------------------------------------------------
/utils/draw_utils/get_id_color.m:
--------------------------------------------------------------------------------
1 | function colors = get_id_color(i)
2 | object_colors={'red','green','blue','cyan','magenta','black','red','green','blue','cyan'}; % yellow is not clear to show
3 | modul = mod(i,length(object_colors));
4 | if (modul==0)
5 | modul = length(object_colors);
6 | end
7 | colors = object_colors{modul};
8 |
--------------------------------------------------------------------------------
/utils/draw_utils/plot_image_with_cuboids.m:
--------------------------------------------------------------------------------
1 | function plot_image_with_cuboids(cuboid, simple_color)
2 | % plot cuboid onto existing figure. an image should already be plotted
3 | % cuboid struct should contain necessary fields
4 |
5 | if nargin<2
6 | simple_color = false;
7 | end
8 |
9 | hold on;
10 |
11 | [edge_markers,line_marker_type] = get_cuboid_draw_edge_markers(cuboid.box_config_type, true); % each row: edge_start_id,edge_end_id,edge_marker_type_id
12 |
13 | if (simple_color)
14 | line_marker_type={'r','g','r','g','r','g'};
15 | % line_withds = [2 1]; % visible/hidden
16 | end
17 |
18 | for edge_id=1:size(edge_markers,1)
19 | plot(cuboid.box_corners_2d(1,edge_markers(edge_id,1:2)),cuboid.box_corners_2d(2,edge_markers(edge_id,1:2)),line_marker_type{edge_markers(edge_id,3)},'Linewidth',3.0);
20 | end
--------------------------------------------------------------------------------
/utils/draw_utils/plot_image_with_edges.m:
--------------------------------------------------------------------------------
1 | function plot_image_with_edges(figure_num, rgb_img, edges, title_string, edge_color, whether_new_imiage, pause_debug)
2 | % draw rgb image and edges. edges is n*4 matrix [x1 y1 x2 y2]
3 |
4 | figure(figure_num);
5 | if (whether_new_imiage) % if this is not new image, previous image should already be drawn!
6 | imshow(rgb_img);
7 | end
8 | if (size(title_string,2)>0)
9 | title(title_string);
10 | end
11 | hold on;
12 |
13 | if (nargin<7)
14 | pause_debug=false;
15 | end
16 | for i=1:size(edges,1)
17 | if (pause_debug)
18 | plot([edges(i,1) edges(i,3)],[edges(i,2) edges(i,4)],get_id_color(i),'Linewidth',2.5);
19 | pause();
20 | else
21 | plot([edges(i,1) edges(i,3)],[edges(i,2) edges(i,4)],edge_color,'Linewidth',2.5);
22 | end
23 | end
24 |
25 | pause(0.5);
26 | end
--------------------------------------------------------------------------------
/utils/draw_utils/save_figure_to_img.m:
--------------------------------------------------------------------------------
1 | function save_figure_to_img(figure_num, title_name, saved_img_name, high_reso)
2 | % save figure to file. saved_img_name doesn't contain suffix.
3 | % if want to save larger image, could set fig = figure(figure_num);fig.Position=[100 220 1127 840]; before calling this function
4 |
5 | if (nargin<4)
6 | high_reso = false;
7 | end
8 | if (high_reso)
9 | output_mode = '-dpng';
10 | else
11 | output_mode = '-djpe';
12 | end
13 | fig2=figure(figure_num);title(title_name); %fig2.Position=[133 646 570 422];
14 | fig = gcf;fig.PaperPositionMode = 'auto'; % for save image the the same as figure size
15 | print(saved_img_name,output_mode,'-r0'); % djpe for jpg dpng for png add sufix automatically
16 | end
--------------------------------------------------------------------------------
/utils/draw_utils/subtightplot.m:
--------------------------------------------------------------------------------
1 | function h=subtightplot(m,n,p,gap,marg_h,marg_w,varargin)
2 | % Copyright (c) 2012, Felipe G. Nievinski
3 | % Copyright (c) 2010, Pekka Kumpulainen
4 | % Copyright (c) 2011, Nikolay S. All rights reserved.
5 | % Functional purpose: A wrapper function for Matlab function subplot. Adds the ability to define the gap between
6 | % neighbouring subplots. Unfotrtunately Matlab subplot function lacks this functionality, and the gap between
7 | % subplots can reach 40% of figure area, which is pretty lavish.
8 | %
9 | % Input arguments (defaults exist):
10 | % gap- two elements vector [vertical,horizontal] defining the gap between neighbouring axes. Default value
11 | % is 0.01. Note this vale will cause titles legends and labels to collide with the subplots, while presenting
12 | % relatively large axis.
13 | % marg_h margins in height in normalized units (0...1)
14 | % or [lower uppper] for different lower and upper margins
15 | % marg_w margins in width in normalized units (0...1)
16 | % or [left right] for different left and right margins
17 | %
18 | % Output arguments: same as subplot- none, or axes handle according to function call.
19 | %
20 | % Issues & Comments: Note that if additional elements are used in order to be passed to subplot, gap parameter must
21 | % be defined. For default gap value use empty element- [].
22 | %
23 | % Usage example: h=subtightplot((2,3,1:2,[0.5,0.2])
24 |
25 | if (nargin<4) || isempty(gap), gap=0.01; end
26 | if (nargin<5) || isempty(marg_h), marg_h=0.05; end
27 | if (nargin<5) || isempty(marg_w), marg_w=marg_h; end
28 | if isscalar(gap), gap(2)=gap; end
29 | if isscalar(marg_h), marg_h(2)=marg_h; end
30 | if isscalar(marg_w), marg_w(2)=marg_w; end
31 | gap_vert = gap(1);
32 | gap_horz = gap(2);
33 | marg_lower = marg_h(1);
34 | marg_upper = marg_h(2);
35 | marg_left = marg_w(1);
36 | marg_right = marg_w(2);
37 |
38 | %note n and m are switched as Matlab indexing is column-wise, while subplot indexing is row-wise :(
39 | [subplot_col,subplot_row]=ind2sub([n,m],p);
40 |
41 | % note subplot suppors vector p inputs- so a merged subplot of higher dimentions will be created
42 | subplot_cols=1+max(subplot_col)-min(subplot_col); % number of column elements in merged subplot
43 | subplot_rows=1+max(subplot_row)-min(subplot_row); % number of row elements in merged subplot
44 |
45 | % single subplot dimensions:
46 | %height=(1-(m+1)*gap_vert)/m;
47 | %axh = (1-sum(marg_h)-(Nh-1)*gap(1))/Nh;
48 | height=(1-(marg_lower+marg_upper)-(m-1)*gap_vert)/m;
49 | %width =(1-(n+1)*gap_horz)/n;
50 | %axw = (1-sum(marg_w)-(Nw-1)*gap(2))/Nw;
51 | width =(1-(marg_left+marg_right)-(n-1)*gap_horz)/n;
52 |
53 | % merged subplot dimensions:
54 | merged_height=subplot_rows*( height+gap_vert )- gap_vert;
55 | merged_width= subplot_cols*( width +gap_horz )- gap_horz;
56 |
57 | % merged subplot position:
58 | merged_bottom=(m-max(subplot_row))*(height+gap_vert) +marg_lower;
59 | merged_left=(min(subplot_col)-1)*(width+gap_horz) +marg_left;
60 | pos_vec=[merged_left merged_bottom merged_width merged_height];
61 |
62 | % h_subplot=subplot(m,n,p,varargin{:},'Position',pos_vec);
63 | % Above line doesn't work as subplot tends to ignore 'position' when same mnp is utilized
64 | h=subplot('Position',pos_vec,varargin{:});
65 |
66 | if (nargout < 1), clear h; end
67 |
68 | end
69 |
--------------------------------------------------------------------------------
/utils/fuse_normalize_scores.m:
--------------------------------------------------------------------------------
1 | function [combined_error,all_delete_inds] = fuse_normalize_scores(dist_error, angle_error,weight_vp_angle,whether_normalize)
2 | % fuse and normalize two kinds of cuboid proposal error, delete some bad proposals.
3 | % output: combined score: the score after delete some rows. all_delete_inds: is the row indexes need to delete
4 |
5 | combined_error = [];
6 | all_delete_inds = [];
7 | if (size(dist_error,1)>0)
8 |
9 | % step 1. select top 2/3 of angle model and top 2/3 of distance model, find the union. then weighted fusion them. directly reject others.
10 | if (size(dist_error,1)>4) % at least five to perform delete operations otherwise there are too few...
11 | [~, dist_sorted_inds] = sort(dist_error,'ascend'); % ascending order
12 | [angle_sorted_score, angle_sorted_inds] = sort(angle_error,'ascend');
13 | dist_delete_inds=dist_sorted_inds(round(size(dist_sorted_inds,1)/3*2):end); % delete worst 1/3
14 | angle_delete_inds = [];
15 | delete_start_ind = round(size(angle_sorted_inds,1)/3*2);
16 | % since my angle error has setted maximum. maybe all cuboids have the maximum value. then ranking has no meaning.
17 | if ( angle_sorted_score(delete_start_ind)>angle_sorted_score(delete_start_ind-1) )
18 | angle_delete_inds=angle_sorted_inds(round(size(angle_sorted_inds,1)/3*2):end);
19 | end
20 | all_delete_inds = union(dist_delete_inds,angle_delete_inds);
21 | dist_error(all_delete_inds,:)=[];
22 | angle_error(all_delete_inds,:)=[];
23 | end
24 |
25 | % step 2: fuse them.
26 | if (size(dist_error,1)>0)
27 | dist_normalized = dist_error;
28 | angle_normalized = dist_error;
29 |
30 | if (whether_normalize)
31 | if (size(dist_error,1)>1)
32 | dist_score_range = max(dist_error)-min(dist_error);
33 | angle_score_range = max(angle_error)-min(angle_error);
34 | dist_normalized = (dist_error-min(dist_error))/dist_score_range; % actually max/min could be found on the fly.
35 | if ((angle_score_range>0)) % angle error might all be same
36 | angle_normalized = (angle_error-min(angle_error))/angle_score_range;
37 | end
38 | end
39 | end
40 | combined_error = (dist_normalized + weight_vp_angle*angle_normalized)/(1+weight_vp_angle);
41 |
42 | end
43 | end
44 |
45 |
46 |
47 |
48 |
--------------------------------------------------------------------------------
/utils/geometry_util/EulerZYX_to_Rot.m:
--------------------------------------------------------------------------------
1 | function R = EulerZYX_to_Rot(euler_angle)
2 | % [roll, pitch, yaw] --> rotation matrix
3 |
4 | roll = euler_angle(1);
5 | pitch = euler_angle(2);
6 | yaw = euler_angle(3);
7 |
8 | cp = cos(pitch);
9 | sp = sin(pitch);
10 | sr = sin(roll);
11 | cr = cos(roll);
12 | sy = sin(yaw);
13 | cy = cos(yaw);
14 |
15 | R = [cp * cy (sr * sp * cy) - (cr * sy) (cr * sp * cy) + (sr * sy);
16 | cp * sy (sr * sp * sy) + (cr * cy) (cr * sp * sy) - (sr * cy);
17 | -sp sr * cp cr * cp];
18 |
--------------------------------------------------------------------------------
/utils/geometry_util/EulerZYX_to_quat.m:
--------------------------------------------------------------------------------
1 | function quat = EulerZYX_to_quat(euler_angle)
2 | % [roll, pitch, yaw] --> [qx qy qz qw]
3 |
4 | roll = euler_angle(1);
5 | pitch = euler_angle(2);
6 | yaw = euler_angle(3);
7 | sy = sin(yaw*0.5);
8 | cy = cos(yaw*0.5);
9 | sp = sin(pitch*0.5);
10 | cp = cos(pitch*0.5);
11 | sr = sin(roll*0.5);
12 | cr = cos(roll*0.5);
13 | w = cr*cp*cy + sr*sp*sy;
14 | x = sr*cp*cy - cr*sp*sy;
15 | y = cr*sp*cy + sr*cp*sy;
16 | z = cr*cp*sy - sr*sp*cy;
17 | quat=[x y z w];
18 | end
19 |
--------------------------------------------------------------------------------
/utils/geometry_util/PoseQuat_to_Mat.m:
--------------------------------------------------------------------------------
1 | function T = PoseQuat_to_Mat(pose_quat_v)
2 | % [x y z qx qy qz qw] --> 4*4 matrix
3 |
4 | T = eye(4);
5 | T(1:3,1:3) = quat_to_Rot(pose_quat_v(4:7));
6 | T(1:3,4) = pose_quat_v(1:3);
--------------------------------------------------------------------------------
/utils/geometry_util/Rot_to_EulerZYX.m:
--------------------------------------------------------------------------------
1 | function [ euler ] = Rot_to_EulerZYX( R )
2 | % rotation matrix --> [roll, pitch, yaw]
3 |
4 | pitch = asin(-R(3,1));
5 |
6 | if (abs(pitch - pi/2) < 1.0e-3)
7 | roll = 0.0;
8 | yaw = atan2(R(2,3) - R(1,2), R(1,3) + R(2,2)) + roll;
9 | elseif (abs(pitch + pi/2) < 1.0e-3)
10 | roll = 0.0;
11 | yaw = atan2(R(2,3) - R(1,2), R(1,3) + R(2,2)) - roll;
12 | else
13 | roll = atan2(R(3,2), R(3,3));
14 | yaw = atan2(R(2,1), R(1,1));
15 | end
16 |
17 | euler = [roll;pitch;yaw];
18 |
19 |
20 | end
21 |
22 |
23 |
24 | % euler(2) = asin(-R(3,1));
25 | %
26 | % if (abs(euler(2) - pi/2) < 1.0e-3)
27 | % euler(1) = 0.0;
28 | % euler(3) = atan2(R(2,3) - R(1,2), R(1,3) + R(2,2)) + euler(1);
29 | % elseif (abs(euler(2) + pi/2) < 1.0e-3)
30 | % euler(1) = 0.0;
31 | % euler(3) = atan2(R(2,3) - R(1,2), R(1,3) + R(2,2)) - euler(1);
32 | % else
33 | % euler(1) = atan2(R(3,2), R(3,3));
34 | % euler(3) = atan2(R(2,1), R(1,1));
35 | % end
36 |
--------------------------------------------------------------------------------
/utils/geometry_util/Rot_to_quat.m:
--------------------------------------------------------------------------------
1 | function [ quat ] = Rot_to_quat( R )
2 | % rotation matrix --> [qx qy qz qw]
3 |
4 | % for corner case, see http://www.euclideanspace.com/maths/geometry/rotations/conversions/matrixToQuaternion/
5 |
6 | quat = zeros(1,4);
7 | quat(4) = 0.5 * sqrt(1 + R(1,1) + R(2,2) + R(3,3));
8 | quat(1) = (R(3,2)-R(2,3))/(4*quat(4));
9 | quat(2) = (R(1,3)-R(3,1))/(4*quat(4));
10 | quat(3) = (R(2,1)-R(1,2))/(4*quat(4));
11 |
12 | end
13 |
14 |
--------------------------------------------------------------------------------
/utils/geometry_util/exptwist.m:
--------------------------------------------------------------------------------
1 | function T = exptwist( twist )
2 | % exponential map 6*1 or 1*6 twist rpyxyz to transformation matrix
3 |
4 | omega = twist(1:3);
5 |
6 | if (length(twist)==6)
7 | upsilon = twist(4:6);
8 | if (size(upsilon,1)==1)
9 | upsilon = upsilon';
10 | end
11 | end
12 |
13 | theta = norm(omega);
14 |
15 | Omega = skew_matrix(omega);
16 |
17 | if (theta<0.00001)
18 | R = (eye(3) + Omega + Omega*Omega);
19 | V = R;
20 | else
21 | Omega2 = Omega*Omega;
22 | R = (eye(3)+ sin(theta)/theta *Omega + (1-cos(theta))/(theta*theta)*Omega2);
23 | V = (eye(3)+(1-cos(theta))/(theta*theta)*Omega + (theta-sin(theta))/(theta^3)*Omega2);
24 | end
25 |
26 | if (length(twist)==6)
27 | T = eye(4);
28 | T(1:3,1:3) = R;
29 | T(1:3,4) = V*upsilon;
30 | else
31 | T = R;
32 | end
33 | end
--------------------------------------------------------------------------------
/utils/geometry_util/homo_to_real_coord.m:
--------------------------------------------------------------------------------
1 | function x=homo_to_real_coord(pts_homo)
2 | % homogeneous coordinates to real coordinate d*n --> (d-1)*n
3 |
4 | x=pts_homo./repmat(pts_homo(end,:),size(pts_homo,1),1);
5 | x=x(1:end-1,:);
6 |
--------------------------------------------------------------------------------
/utils/geometry_util/quat_to_EulerZYX.m:
--------------------------------------------------------------------------------
1 | function [euler] = quat_to_EulerZYX(quat)
2 | % [qx qy qz qw] --> [roll, pitch, yaw]
3 |
4 | qx = quat(1);
5 | qy = quat(2);
6 | qz = quat(3);
7 | qw = quat(4);
8 |
9 | roll = atan2(2*(qw*qx+qy*qz), 1-2*(qx*qx+qy*qy));
10 | pitch = asin(2*(qw*qy-qz*qx));
11 | yaw = atan2(2*(qw*qz+qx*qy), 1-2*(qy*qy+qz*qz));
12 |
13 | euler = [roll, pitch, yaw];
14 | end
--------------------------------------------------------------------------------
/utils/geometry_util/quat_to_Rot.m:
--------------------------------------------------------------------------------
1 | function rot = quat_to_Rot(quat)
2 | % [qx qy qz qw] --> rotation matrix
3 |
4 | qx = quat(1);
5 | qy = quat(2);
6 | qz = quat(3);
7 | qw = quat(4);
8 |
9 | rot=[1 - 2*qy^2 - 2*qz^2, 2*qx*qy - 2*qz*qw, 2*qx*qz + 2*qy*qw;
10 | 2*qx*qy + 2*qz*qw, 1 - 2*qx^2 - 2*qz^2, 2*qy*qz - 2*qx*qw;
11 | 2*qx*qz - 2*qy*qw, 2*qy*qz + 2*qx*qw, 1 - 2*qx^2 - 2*qy^2];
12 |
13 | end
--------------------------------------------------------------------------------
/utils/geometry_util/real_to_homo_coord.m:
--------------------------------------------------------------------------------
1 | function x=real_to_homo_coord(pts)
2 | % real coords to homogeneous coords d*n --> (d+1)*n
3 |
4 | x=[pts; ones(1,size(pts,2))];
5 |
6 |
--------------------------------------------------------------------------------
/utils/geometry_util/skew_matrix.m:
--------------------------------------------------------------------------------
1 | function [ cp ] = skew_matrix( vec )
2 | % skew matrix for vector3
3 |
4 | cp = [0 -vec(3) vec(2);
5 | vec(3) 0 -vec(1);
6 | -vec(2) vec(1) 0];
7 |
8 | end
9 |
10 |
--------------------------------------------------------------------------------
/utils/line_utils/align_left_right_edges.m:
--------------------------------------------------------------------------------
1 | function all_lines_raw = align_left_right_edges(all_lines_raw)
2 | % make edge always start from left to right x1 < x2 lines: n*4 [x1 y1 x2 y2]
3 |
4 | for line_ind=1:size(all_lines_raw,1)
5 | if (all_lines_raw(line_ind,3) < all_lines_raw(line_ind,1)) % each edge starts from left to right
6 | temp=all_lines_raw(line_ind,3:4);
7 | all_lines_raw(line_ind,3:4)=all_lines_raw(line_ind,1:2);
8 | all_lines_raw(line_ind,1:2)=temp;
9 | end
10 | end
11 | end
--------------------------------------------------------------------------------
/utils/line_utils/lineSegmentIntersect.m:
--------------------------------------------------------------------------------
1 | function result_struct = lineSegmentIntersect(XY1,XY2, infinite_line)
2 | % XY1 and XY2 are N1x4 and N2x4 matrices Each row is of the form [x1 y1 x2 y2]
3 | % infinite_line (true /false) indicating wheather treat as infinite lines or line segments
4 |
5 |
6 | %LINESEGMENTINTERSECT Intersections of line segments.
7 | % OUT = LINESEGMENTINTERSECT(XY1,XY2) finds the 2D Cartesian Coordinates of
8 | % intersection points between the set of line segments given in XY1 and XY2.
9 | %
10 | % XY1 and XY2 are N1x4 and N2x4 matrices. Rows correspond to line segments.
11 | % Each row is of the form [x1 y1 x2 y2] where (x1,y1) is the start point and
12 | % (x2,y2) is the end point of a line segment:
13 | %
14 | % Line Segment
15 | % o--------------------------------o
16 | % ^ ^
17 | % (x1,y1) (x2,y2)
18 | %
19 | % OUT is a structure with fields:
20 | %
21 | % 'intAdjacencyMatrix' : N1xN2 indicator matrix where the entry (i,j) is 1 if
22 | % line segments XY1(i,:) and XY2(j,:) intersect.
23 | %
24 | % 'intMatrixX' : N1xN2 matrix where the entry (i,j) is the X coordinate of the
25 | % intersection point between line segments XY1(i,:) and XY2(j,:).
26 | %
27 | % 'intMatrixY' : N1xN2 matrix where the entry (i,j) is the Y coordinate of the
28 | % intersection point between line segments XY1(i,:) and XY2(j,:).
29 | %
30 | % 'intNormalizedDistance1To2' : N1xN2 matrix where the (i,j) entry is the
31 | % normalized distance from the start point of line segment XY1(i,:) to the
32 | % intersection point with XY2(j,:).
33 | %
34 | % 'intNormalizedDistance2To1' : N1xN2 matrix where the (i,j) entry is the
35 | % normalized distance from the start point of line segment XY1(j,:) to the
36 | % intersection point with XY2(i,:).
37 | %
38 | % 'parAdjacencyMatrix' : N1xN2 indicator matrix where the (i,j) entry is 1 if
39 | % line segments XY1(i,:) and XY2(j,:) are parallel.
40 | %
41 | % 'coincAdjacencyMatrix' : N1xN2 indicator matrix where the (i,j) entry is 1
42 | % if line segments XY1(i,:) and XY2(j,:) are coincident.
43 |
44 | % Version: 1.00, April 03, 2010
45 | % Version: 1.10, April 10, 2010
46 | % Author: U. Murat Erdem
47 |
48 | % CHANGELOG:
49 | %
50 | % Ver. 1.00:
51 | % -Initial release.
52 | %
53 | % Ver. 1.10:
54 | % - Changed the input parameters. Now the function accepts two sets of line
55 | % segments. The intersection analysis is done between these sets and not in
56 | % the same set.
57 | % - Changed and added fields of the output. Now the analysis provides more
58 | % information about the intersections and line segments.
59 | % - Performance tweaks.
60 |
61 | % I opted not to call this 'curve intersect' because it would be misleading
62 | % unless you accept that curves are pairwise linear constructs.
63 | % I tried to put emphasis on speed by vectorizing the code as much as possible.
64 | % There should still be enough room to optimize the code but I left those out
65 | % for the sake of clarity.
66 | % The math behind is given in:
67 | % http://local.wasp.uwa.edu.au/~pbourke/geometry/lineline2d/
68 | % If you really are interested in squeezing as much horse power as possible out
69 | % of this code I would advise to remove the argument checks and tweak the
70 | % creation of the OUT a little bit.
71 |
72 | %%% Argument check.
73 | %-------------------------------------------------------------------------------
74 |
75 | % HACK, in box_proposal.m first pt maybe VP3, might be nan, represents a vertical line
76 | if isnan(XY2(1))
77 | XY2(1:2) = [XY2(3) XY2(4)+1000];
78 | end
79 |
80 |
81 | validateattributes(XY1,{'numeric'},{'2d','finite'});
82 | validateattributes(XY2,{'numeric'},{'2d','finite'});
83 |
84 | [n_rows_1,n_cols_1] = size(XY1);
85 | [n_rows_2,n_cols_2] = size(XY2);
86 |
87 | if n_cols_1 ~= 4 || n_cols_2 ~= 4
88 | error('Arguments must be a Nx4 matrices.');
89 | end
90 |
91 | %%% Prepare matrices for vectorized computation of line intersection points.
92 | %-------------------------------------------------------------------------------
93 | X1 = repmat(XY1(:,1),1,n_rows_2);
94 | X2 = repmat(XY1(:,3),1,n_rows_2);
95 | Y1 = repmat(XY1(:,2),1,n_rows_2);
96 | Y2 = repmat(XY1(:,4),1,n_rows_2);
97 |
98 | XY2 = XY2';
99 |
100 | X3 = repmat(XY2(1,:),n_rows_1,1);
101 | X4 = repmat(XY2(3,:),n_rows_1,1);
102 | Y3 = repmat(XY2(2,:),n_rows_1,1);
103 | Y4 = repmat(XY2(4,:),n_rows_1,1);
104 |
105 | X4_X3 = (X4-X3);
106 | Y1_Y3 = (Y1-Y3);
107 | Y4_Y3 = (Y4-Y3);
108 | X1_X3 = (X1-X3);
109 | X2_X1 = (X2-X1);
110 | Y2_Y1 = (Y2-Y1);
111 |
112 | numerator_a = X4_X3 .* Y1_Y3 - Y4_Y3 .* X1_X3;
113 | numerator_b = X2_X1 .* Y1_Y3 - Y2_Y1 .* X1_X3;
114 | denominator = Y4_Y3 .* X2_X1 - X4_X3 .* Y2_Y1;
115 |
116 | u_a = numerator_a ./ denominator;
117 | u_b = numerator_b ./ denominator;
118 |
119 | % Find the adjacency matrix A of intersecting lines.
120 | INT_X = X1+X2_X1.*u_a;
121 | INT_Y = Y1+Y2_Y1.*u_a;
122 | INT_B = (u_a >= 0) & (u_a <= 1) & (u_b >= 0) & (u_b <= 1);
123 | if (nargin==3)
124 | if (infinite_line)
125 | INT_B=1;
126 | end
127 | end
128 |
129 | % PAR_B = denominator == 0;
130 | % COINC_B = (numerator_a == 0 & numerator_b == 0 & PAR_B);
131 |
132 |
133 | % Arrange output.
134 | result_struct.intAdjacencyMatrix = INT_B;
135 | result_struct.intMatrixX = INT_X .* INT_B; % intersection x coordinate
136 | result_struct.intMatrixY = INT_Y .* INT_B; % intersection y coordinate
137 | result_struct.u_a=u_a; % >1 extended
138 | result_struct.u_b=u_b; % >1 extended
139 |
140 | end
141 |
--------------------------------------------------------------------------------
/utils/line_utils/merge_break_lines_v2.m:
--------------------------------------------------------------------------------
1 | function all_lines_merge = merge_break_lines_v2(all_lines,pre_merge_dist_thre,pre_merge_angle_thre)
2 | % merge short edges into long. edges n*4 each edge should start from left to right! this code mainly from c++ line_lbd
3 | % thresholds for merging: merge_dist_thre in pixel: endpoint distance of two line segments
4 | % merge_angle_thre in degrees: angle between two lines.
5 |
6 | % ----- ---- two broken lines into one line
7 |
8 | % ------------ close but offsetted lines not considered here, see merge_break_proj_lines.m
9 | % ------------
10 |
11 |
12 | all_lines_merge = all_lines;
13 | can_force_merge = 1;
14 | counter = 0;
15 | total_line_number = size(all_lines,1);
16 | if (size(all_lines,1)>0)
17 | while ( (can_force_merge==1) && (counter<500))
18 | counter=counter+1;
19 | can_force_merge=0;
20 | line_vector=all_lines_merge(:,3:4)-all_lines_merge(:,1:2); % to compute the angles
21 | for seg1 = 1:total_line_number-1
22 | for seg2 = (seg1+1):total_line_number
23 | angle1= normalize_to_pi(atan2(line_vector(seg1,2),line_vector(seg1,1))/pi*180,false); % -90 ~ 90
24 | angle2= normalize_to_pi(atan2(line_vector(seg2,2),line_vector(seg2,1))/pi*180,false); % -90 ~ 90
25 | angle_diff = min(abs(angle1-angle2),180-abs(angle1-angle2));
26 | if (angle_diff < pre_merge_angle_thre)
27 | dist_1ed_to_2 = norm(all_lines_merge(seg1,3:4)-all_lines_merge(seg2,1:2)); % one segment's end is close to another's begin
28 | dist_2ed_to_1 = norm(all_lines_merge(seg2,3:4)-all_lines_merge(seg1,1:2));
29 |
30 | if ( (dist_1ed_to_2 < pre_merge_dist_thre) || (dist_2ed_to_1 < pre_merge_dist_thre))
31 | if all_lines_merge(seg1,1)all_lines_merge(seg2,3)
37 | merge_end = all_lines_merge(seg1,3:4);
38 | else
39 | merge_end = all_lines_merge(seg2,3:4);
40 | end
41 | merged_angle = normalize_to_pi(atan2(merge_end(2)-merge_start(2),merge_end(1)-merge_start(1))/pi*180,false);
42 | merge_angle_diff = min(abs(angle1-merged_angle),180-abs(angle1-merged_angle));
43 | if (merge_angle_diff0)
28 | while ( (can_force_merge==1) && (counter<500))
29 | counter=counter+1;
30 | can_force_merge=0;
31 | line_vector = all_lines_merge(:,3:4)-all_lines_merge(:,1:2); % to compute the angles
32 | for seg1 = 1:size(all_lines_merge,1)
33 | angle1= normalize_to_pi(atan2(line_vector(seg1,2),line_vector(seg1,1))/pi*180,false); % -90 ~ 90
34 | for seg2 = (seg1+1):size( all_lines_merge,1)
35 | angle2= normalize_to_pi(atan2(line_vector(seg2,2),line_vector(seg2,1))/pi*180,false); % -90 ~ 90
36 | angle_diff = min(abs(angle1-angle2),180-abs(angle1-angle2));
37 | if (angle_diff < pre_merge_angle_thre)
38 | [dist_1_bg_to_2,proj_1_bg_to_2]= point_distproj_line(all_lines_merge(seg2,1:2), all_lines_merge(seg2,3:4), all_lines_merge(seg1,1:2));
39 | [dist_1_ed_to_2,proj_1_ed_to_2]= point_distproj_line(all_lines_merge(seg2,1:2), all_lines_merge(seg2,3:4), all_lines_merge(seg1,3:4));
40 | [dist_2_bg_to_1,proj_2_bg_to_1]= point_distproj_line(all_lines_merge(seg1,1:2), all_lines_merge(seg1,3:4), all_lines_merge(seg2,1:2));
41 | [dist_2_ed_to_1,proj_2_ed_to_1]= point_distproj_line(all_lines_merge(seg1,1:2), all_lines_merge(seg1,3:4), all_lines_merge(seg2,3:4));
42 | if ( (dist_1_bg_to_2pre_proj_cover_thre) || (covering_2_to_1>pre_proj_cover_thre)) %at least one line is being overlapped larged delete one which is being overlapped more
47 | if (covering_1_to_2>covering_2_to_1)
48 | to_delete_ind=seg2;
49 | else
50 | to_delete_ind=seg1;
51 | end
52 | all_lines_merge(to_delete_ind,:) = [];
53 | can_force_merge=1;
54 | break;
55 | else % two lines have small overlap with each other, check their end point distance if close, then merge merge broken lines
56 | dist_1end_to_2bg = norm(all_lines_merge(seg1,3:4)-all_lines_merge(seg2,1:2));
57 | dist_2end_to_1bg = norm(all_lines_merge(seg2,3:4)-all_lines_merge(seg1,1:2));
58 | if ( (dist_1end_to_2bg < pre_merge_dist_thre) || (dist_2end_to_1bg < pre_merge_dist_thre))
59 | if all_lines_merge(seg1,1)all_lines_merge(seg2,3)
65 | merge_end = all_lines_merge(seg1,3:4);
66 | else
67 | merge_end = all_lines_merge(seg2,3:4);
68 | end
69 | merged_angle = normalize_to_pi(atan2(merge_end(2)-merge_start(2),merge_end(1)-merge_start(1))/pi*180,false);
70 | merge_angle_diff = min(abs(angle1-merged_angle),180-abs(angle1-merged_angle));
71 | if (merge_angle_diff1) % cut into [0 1]
16 | t=1;
17 | end
18 | if (t<0)
19 | t=0;
20 | end
21 | proj_percent=t;
22 | end
--------------------------------------------------------------------------------
/utils/line_utils/remove_short_lines.m:
--------------------------------------------------------------------------------
1 | function all_lines_long = remove_short_lines(all_lines,edge_length_threshold,angle_threshold)
2 | % lines n*4 remove short and near vertical lines
3 | % angle_threshold: angle difference to 90' too vertical lines are not assumed to be wall edges.
4 |
5 | if (size(all_lines,1)>0)
6 | line_vector=all_lines(:,3:4)-all_lines(:,1:2);
7 |
8 | all_line_angles = normalize_to_pi(atan2(line_vector(:,2),line_vector(:,1))/pi*180,false); % -90 ~ 90 degree
9 | if (nargin<=2)
10 | all_lines_long=all_lines( sum(line_vector.*line_vector,2)>edge_length_threshold*edge_length_threshold,:);
11 | else
12 | long_lines_ind = sum(line_vector.*line_vector,2)>edge_length_threshold*edge_length_threshold;
13 | angle_diff = min([abs(all_line_angles-90),180-abs(all_line_angles-90)],[],2); % angle diff wrt vertical line
14 | non_vertical_ind = angle_diff>angle_threshold;
15 | all_lines_long=all_lines((long_lines_ind.*non_vertical_ind)==1,:);
16 | end
17 | else
18 | all_lines_long=[];
19 | end
20 | end
21 |
--------------------------------------------------------------------------------
/utils/line_utils/seg_hit_boundary.m:
--------------------------------------------------------------------------------
1 | function hit_pt = seg_hit_boundary(line_1, line_segment2)
2 | % line_1 1*4 line_segment2 1*4 the output is float point.
3 | % compute the intersection of line_1 (directional infinite line along start to end) with line segments (not infinite line).
4 | % if not found, return [-1 -1]
5 | % the second line segments are either horizontal or vertical (rectangle boundary). a simplified version of lineSegmentIntersect
6 |
7 |
8 | % HACK, in box_proposal.m first pt of line_1 maybe VP3, might be nan. in this case, line_1 means a vertical line
9 | if isnan(line_1(1))
10 | line_1(1:2) = [line_1(3) line_1(4)+1000]; % vertical downward line
11 | end
12 |
13 | pt_start = line_1(1:2);
14 | pt_end = line_1(3:4);
15 |
16 |
17 | boundary_bgn = line_segment2(1:2);
18 | boundary_end = line_segment2(3:4);
19 |
20 | direc = pt_end-pt_start;
21 | hit_pt=[-1 -1];
22 | % line equation is (p_u,p_v)+lambda*(delta_u,delta_v) parameterized by lambda
23 | if (boundary_bgn(2)==boundary_end(2)) % if an horizontal edge
24 | lambd=(boundary_bgn(2)-pt_start(2))/direc(2);
25 | if (lambd>=0) % along ray direction
26 | hit_pt_tmp = pt_start+lambd*direc;
27 | if (boundary_bgn(1)<=hit_pt_tmp(1)) && (hit_pt_tmp(1)<=boundary_end(1)) % inside the segments
28 | hit_pt = hit_pt_tmp;
29 | hit_pt(2)= boundary_bgn(2); % floor operations might have un-expected things
30 | end
31 | end
32 | end
33 |
34 | if (boundary_bgn(1)==boundary_end(1)) % if an vertical edge
35 | lambd=(boundary_bgn(1)-pt_start(1))/direc(1);
36 | if (lambd>=0) % along ray direction
37 | hit_pt_tmp=pt_start+lambd*direc;
38 | if (boundary_bgn(2)<=hit_pt_tmp(2)) && (hit_pt_tmp(2)<=boundary_end(2)) % inside the segments
39 | hit_pt = hit_pt_tmp;
40 | hit_pt(1)= boundary_bgn(1); % floor operations might have un-expected things
41 | end
42 | end
43 | end
44 |
45 | end
--------------------------------------------------------------------------------