├── .gitignore
├── .vscode
└── settings.json
├── CMakeLists.txt
├── LICENSE
├── README.md
├── facenetModels
└── README.md
├── imgs
└── README.md
├── mtCNNModels
└── README.md
├── src
├── baseEngine.cpp
├── baseEngine.h
├── common.cpp
├── common.h
├── faceNet.cpp
├── faceNet.h
├── main.cpp
├── mtcnn.cpp
├── mtcnn.h
├── network.cpp
├── network.h
├── onet_rt.cpp
├── onet_rt.h
├── pBox.h
├── pnet_rt.cpp
├── pnet_rt.h
├── rnet_rt.cpp
├── rnet_rt.h
├── videoStreamer.cpp
└── videoStreamer.h
└── step01_pb_to_uff.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # build files
2 | build
3 |
4 | # clion specific
5 | .idea
6 | cmake-build-debug
7 | cmake-build-release
8 |
9 | # images for recognition
10 | imgs/*
11 | !imgs/README.md
12 |
13 | # machine learning models
14 | mtCNNModels/*
15 | !mtCNNModels/README.md
16 | facenetModels/*
17 | !facenetModels/README.md
18 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "files.associations": {
3 | "cmath": "cpp",
4 | "chrono": "cpp"
5 | }
6 | }
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
2 | set(PROJECT_NAME face_recogition_tensorRT)
3 | project(${PROJECT_NAME})# LANGUAGES CXX CUDA)
4 |
5 | set (CMAKE_CXX_STANDARD 11)
6 |
7 | # OpenCV
8 | find_package(OpenCV REQUIRED)
9 |
10 | # setup CUDA
11 | find_package(CUDA)
12 | message("-- CUDA version: ${CUDA_VERSION}")
13 |
14 | set(
15 | CUDA_NVCC_FLAGS
16 | ${CUDA_NVCC_FLAGS};
17 | -O3
18 | -gencode arch=compute_87,code=sm_87
19 | )
20 |
21 | # tensorRT
22 | message("CUDA_TOOLKIT_ROOT_DIR = ${CUDA_TOOLKIT_ROOT_DIR}")
23 |
24 | find_path(TENSORRT_INCLUDE_DIR NvInfer.h
25 | HINTS ${TENSORRT_ROOT} ${CUDA_TOOLKIT_ROOT_DIR} /usr/include
26 | PATH_SUFFIXES include)
27 | find_path(TENSORRT_INCLUDE_DIR NvInferPlugin.h
28 | HINTS ${TENSORRT_ROOT} ${CUDA_TOOLKIT_ROOT_DIR}
29 | PATH_SUFFIXES include)
30 | find_path(TENSORRT_INCLUDE_DIR NvCaffeParser.h
31 | HINTS ${TENSORRT_ROOT} ${CUDA_TOOLKIT_ROOT_DIR}
32 | PATH_SUFFIXES include)
33 | MESSAGE(STATUS "Found TensorRT headers at ${TENSORRT_INCLUDE_DIR}")
34 | find_library(TENSORRT_LIBRARY_INFER nvinfer
35 | HINTS ${TENSORRT_ROOT} ${TENSORRT_BUILD} ${CUDA_TOOLKIT_ROOT_DIR}
36 | PATH_SUFFIXES lib lib64 lib/x64 lib/aarch64-linux-gnu)
37 | find_library(TENSORRT_LIBRARY_INFER_PLUGIN nvinfer_plugin
38 | HINTS ${TENSORRT_ROOT} ${TENSORRT_BUILD} ${CUDA_TOOLKIT_ROOT_DIR}
39 | PATH_SUFFIXES lib lib64 lib/x64 lib/aarch64-linux-gnu)
40 | find_library(TENSORRT_LIBRARY_CAFFE_PARSER nvcaffe_parser
41 | HINTS ${TENSORRT_ROOT} ${TENSORRT_BUILD} ${CUDA_TOOLKIT_ROOT_DIR}
42 | PATH_SUFFIXES lib lib64 lib/x64 lib/aarch64-linux-gnu)
43 | find_library(TENSORRT_LIBRARY_PARSER nvparsers
44 | HINTS ${TENSORRT_ROOT} ${TENSORRT_BUILD} ${CUDA_TOOLKIT_ROOT_DIR}
45 | PATH_SUFFIXES lib lib64 lib/x64 lib/aarch64-linux-gnu)
46 | set(TENSORRT_LIBRARY ${TENSORRT_LIBRARY_INFER} ${TENSORRT_LIBRARY_INFER_PLUGIN} ${TENSORRT_LIBRARY_PARSER})
47 | message(STATUS "Find TensorRT libs at ${TENSORRT_LIBRARY}")
48 | include(FindPackageHandleStandardArgs)
49 | find_package_handle_standard_args(
50 | TENSORRT DEFAULT_MSG TENSORRT_INCLUDE_DIR TENSORRT_LIBRARY)
51 | if(NOT TENSORRT_FOUND)
52 | message(ERROR
53 | "Cannot find TensorRT library.")
54 | endif()
55 |
56 | message("TENSORRT_LIBRARY = ${TENSORRT_LIBRARY}")
57 |
58 | AUX_SOURCE_DIRECTORY(./src DIR_SRCS)
59 | message("DIR_SRCS = ${DIR_SRCS}")
60 | cuda_add_executable(${PROJECT_NAME} ${DIR_SRCS})
61 |
62 | target_link_libraries(${PROJECT_NAME} ${TENSORRT_LIBRARY})
63 | target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS})
64 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 |
635 | Copyright (C)
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | Copyright (C)
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
675 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Face Recognition for NVIDIA Jetson AGX Orin using TensorRT
2 | - This project is based on the implementation of this repo:
3 | [Face Recognition for NVIDIA Jetson (Nano) using TensorRT](https://github.com/nwesem/mtcnn_facenet_cpp_tensorRT). Since the original author is no longer updating his content, and many of the original content cannot be applied to the new Jetpack version and the new Jetson device. Therefore, I have modified the original author's content slightly to make it work for face recognition on the Jetson AGX Orin.
4 | - Face recognition with [Google FaceNet](https://arxiv.org/abs/1503.03832) architecture and retrained model by David Sandberg ([github.com/davidsandberg/facenet](https://github.com/davidsandberg/facenet)) using TensorRT and OpenCV.
5 | - Moreover, this project uses an adapted version of [PKUZHOU's implementation](https://github.com/PKUZHOU/MTCNN_FaceDetection_TensorRT)
6 | of the mtCNN for face detection. More info below.
7 |
8 | ## Hardware
9 | - Nvidia Jetson AGX Orin DVK
10 | - Logitech C922 Pro HD Stream Webcam
11 |
12 | If you want to use a CSI camera instead of USB Camera, set the boolean _isCSICam_ to true in [main.cpp](./src/main.cpp).
13 |
14 |
15 | ## Dependencies
16 | - JetPack 5.1
17 | - CUDA 11.4.19 + cuDNN 8.6.0
18 | - TensorRT 8.5.2
19 | - OpenCV 4.5.4
20 | - Tensorflow 2.11
21 |
22 |
23 | ## Installation
24 |
25 | #### 1. Install Tensorflow
26 | The following shows the steps to install Tensorflow for Jetpack 5.1. This was copied from the official [NVIDIA documentation](https://docs.nvidia.com/deeplearning/frameworks/install-tf-jetson-platform/index.html). I'm assuming you don't need to install it in a virtual environment. If yes, please refer to the documentation linked above. If you are not installing this on a jetson, please refer to the official tensorflow documentation.
27 |
28 | ```bash
29 | # Install system packages required by TensorFlow:
30 | sudo apt update
31 | sudo apt install libhdf5-serial-dev hdf5-tools libhdf5-dev zlib1g-dev zip libjpeg8-dev liblapack-dev libblas-dev gfortran
32 |
33 | # Install and upgrade pip3
34 | sudo apt install python3-pip
35 | sudo python3 -m pip install --upgrade pip
36 | sudo pip3 install -U testresources setuptools==65.5.0
37 |
38 | # Install the Python package dependencies
39 | sudo pip3 install -U numpy==1.22 future==0.18.2 mock==3.0.5 keras_preprocessing==1.1.2 keras_applications==1.0.8 gast==0.4.0 protobuf pybind11 cython pkgconfig packaging h5py==3.6.0
40 |
41 | # Install TensorFlow using the pip3 command. This command will install the latest version of TensorFlow compatible with JetPack 5.1.
42 | sudo pip3 install --extra-index-url https://developer.download.nvidia.com/compute/redist/jp/v51 tensorflow==2.11.0+nv23.01
43 | ```
44 |
45 |
46 | #### 3. Prune and freeze TensorFlow model or get frozen model in the link
47 | The inputs to the original model are an input tensor consisting of a
48 | single or multiple faces and a phase train tensor telling all batch
49 | normalisation layers that model is not in train mode. Batch
50 | normalisation uses a switch layer to decide if the model is currently
51 | trained or just used for inference. This switch layer cannot be
52 | processed in TensorRT which is why it needs to be removed. Apparently
53 | this can be done using freeze_graph from TensorFlow, but here is a link
54 | to model where the phase train tensor has already been removed from the
55 | saved model
56 | [github.com/apollo-time/facenet/raw/master/model/resnet/facenet.pb](https://github.com/apollo-time/facenet/raw/master/model/resnet/facenet.pb)
57 |
58 | #### 4. Convert frozen protobuf (.pb) model to UFF
59 | Use the convert-to-uff tool which is installed with tensorflow
60 | installation to convert the *.pb model to *.uff. The script will replace
61 | unsupported layers with custom layers implemented by
62 | [github.com/r7vme/tensorrt_l2norm_helper](https://github.com/r7vme/tensorrt_l2norm_helper).
63 | Please check the file for the user defined values and update them if
64 | needed. Do not worry if there are a few warnings about the
65 | TRT_L2NORM_HELPER plugin.
66 | ```bash
67 | cd path/to/project
68 | python3 step01_pb_to_uff.py
69 | ```
70 | You should now have a facenet.uff file in the [facenetModels folder](./facenetModels) which will be used as the input model to TensorRT.
71 |
72 |
73 | #### 4. Get mtCNN models
74 | This repo uses an [implementation by PKUZHOU](https://github.com/PKUZHOU/MTCNN_FaceDetection_TensorRT)
75 | of the [multi-task Cascaded Convolutional Neural Network (mtCNN)](https://arxiv.org/pdf/1604.02878.pdf)
76 | for face detection. The original implementation was adapted to return the bounding boxes such that it
77 | can be used as input to my FaceNet TensorRT implementation.
78 | You will need all models from the repo in the [mtCNNModels](./mtCNNModels) folder so please do this
79 | to download them:
80 | ```bash
81 | # go to one above project,
82 | cd path/to/project/..
83 | # clone PKUZHOUs repo,
84 | git clone https://github.com/PKUZHOU/MTCNN_FaceDetection_TensorRT
85 | # and move models into mtCNNModels folder
86 | mv MTCNN_FaceDetection_TensorRT/det* path/to/project/mtCNNModels
87 | ```
88 | After doing so you should have the following files in your [mtCNNModels](./mtCNNModels) folder:
89 | * det1_relu.caffemodel
90 | * det1_relu.prototxt
91 | * det2_relu.caffemodel
92 | * det2_relu.prototxt
93 | * det3_relu.caffemodel
94 | * det3_relu.prototxt
95 | * README.md
96 |
97 | Done you are ready to build the project!
98 |
99 | #### 5. Build the project
100 | ```bash
101 | mkdir build && cd build
102 | cmake -DCMAKE_BUILD_TYPE=Release ..
103 | make -j${nproc}
104 | ```
105 | If **not** run on Jetson platform set the path to your CUDA and TensorRT installation
106 | using _-DCUDA_TOOLKIT_ROOTDIR=path/to/cuda_ and _-DTENSORRT_ROOT=path/to/tensorRT_.
107 |
108 | ## NOTE
109 | **.uff and .engine files are GPU specific**, so if you use want to run
110 | this project on a different GPU or on another machine, always start over
111 | at step **3.** above.
112 |
113 | ## Usage
114 | Put images of people in the imgs folder. Please only use images that contain one face.
115 | **NEW FEATURE**:You can now add faces while the algorithm is running. When you see
116 | the OpenCV GUI, press "**N**" on your keyboard to add a new face. The camera input will stop until
117 | you have opened your terminal and put in the name of the person you want to add.
118 | ```bash
119 | ./face_recogition_tensorRT
120 | ```
121 | Press "**Q**" to quit and to show the stats (fps).
122 |
123 | _NOTE:_ This step might take a while when done the first time. TensorRT
124 | now parses and serializes the model from .uff to a runtime engine
125 | (.engine file).
126 |
127 | ## Performance
128 | Performance on **NVIDIA Jetson AGX Orin**
129 | * ~24ms for face detection using mtCNN
130 | * ~4ms per face for facenet inference
131 | * **Total:** ~30fps
132 |
133 | ## License
134 | Please respect all licenses of OpenCV and the data the machine learning models (mtCNN and Google FaceNet)
135 | were trained on.
136 |
137 |
--------------------------------------------------------------------------------
/facenetModels/README.md:
--------------------------------------------------------------------------------
1 | # faceNet models folder
2 | add parsed facenet.uff to this folder
3 |
--------------------------------------------------------------------------------
/imgs/README.md:
--------------------------------------------------------------------------------
1 | # Image directory
2 | This folder contains images of people you would like to recognize. The
3 | format of picture should *class_name*.jpg.
--------------------------------------------------------------------------------
/mtCNNModels/README.md:
--------------------------------------------------------------------------------
1 | # mtCNN models
2 | add all models of mtCNN to this folder
3 |
--------------------------------------------------------------------------------
/src/baseEngine.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by zhou on 18-5-4.
3 | //
4 |
5 | #include "baseEngine.h"
6 |
7 | int baseEngine::det1_relu_counter = 1;
8 |
9 | baseEngine::baseEngine(const char * prototxt,const char* model,const char* input_name,const char*location_name,
10 | const char* prob_name, const char *point_name) :
11 | prototxt(prototxt),
12 | model(model),
13 | INPUT_BLOB_NAME(input_name),
14 | OUTPUT_LOCATION_NAME(location_name),
15 | OUTPUT_PROB_NAME(prob_name),
16 | OUTPUT_POINT_NAME(point_name)
17 | {
18 | };
19 | baseEngine::~baseEngine() {
20 | shutdownProtobufLibrary();
21 | }
22 |
23 | void baseEngine::init(int row,int col) {
24 |
25 | }
26 | void baseEngine::caffeToGIEModel(const std::string &deployFile, // name for caffe prototxt
27 | const std::string &modelFile, // name for model
28 | const std::vector &outputs, // network outputs
29 | unsigned int maxBatchSize, // batch size - NB must be at least as large as the batch we want to run with)
30 | IHostMemory *&gieModelStream) // output buffer for the GIE model
31 | {
32 | size_t lastIdx = model.find_last_of(".");
33 | string enginePath = model.substr(0, lastIdx);
34 | if(enginePath.find("det1_relu") != std::string::npos) {
35 | enginePath.append(std::to_string(det1_relu_counter));
36 | enginePath.append(".engine");
37 | det1_relu_counter++;
38 | }
39 | else {
40 | enginePath.append(".engine");
41 | }
42 | std::cout << "rawName = " << enginePath << std::endl;
43 | if(fileExists(enginePath)) {
44 | std::vector trtModelStream_;
45 | size_t size{ 0 };
46 |
47 | std::ifstream file(enginePath, std::ios::binary);
48 | if (file.good())
49 | {
50 | file.seekg(0, file.end);
51 | size = file.tellg();
52 | file.seekg(0, file.beg);
53 | trtModelStream_.resize(size);
54 | std::cout << "size" << trtModelStream_.size() << std::endl;
55 | file.read(trtModelStream_.data(), size);
56 | file.close();
57 | }
58 | std::cout << "size" << size;
59 | IRuntime* runtime = createInferRuntime(gLogger);
60 | assert(runtime != nullptr);
61 | ICudaEngine *engine = runtime->deserializeCudaEngine(trtModelStream_.data(), size, nullptr);
62 | assert(engine);
63 | context = engine->createExecutionContext();
64 | std::cout << std::endl;
65 | }
66 | else {
67 | // create the builder
68 | IBuilder *builder = createInferBuilder(gLogger);
69 | IBuilderConfig* config = builder->createBuilderConfig();
70 |
71 | // parse the caffe model to populate the network, then set the outputs
72 | INetworkDefinition *network = builder->createNetworkV2(0U);
73 | ICaffeParser *parser = createCaffeParser();
74 |
75 | const IBlobNameToTensor *blobNameToTensor = parser->parse(deployFile.c_str(),
76 | modelFile.c_str(),
77 | *network,
78 | nvinfer1::DataType::kHALF);
79 | // specify which tensors are outputs
80 | for (auto &s : outputs)
81 | network->markOutput(*blobNameToTensor->find(s.c_str()));
82 |
83 | // Build the engine
84 | builder->setMaxBatchSize(maxBatchSize);
85 | config->setMaxWorkspaceSize(1 << 25);
86 | ICudaEngine *engine = builder->buildEngineWithConfig(*network, *config);
87 | assert(engine);
88 |
89 | context = engine->createExecutionContext();
90 |
91 | // Serialize engine
92 | ofstream planFile;
93 | planFile.open(enginePath);
94 | IHostMemory *serializedEngine = engine->serialize();
95 | planFile.write((char *) serializedEngine->data(), serializedEngine->size());
96 | planFile.close();
97 |
98 |
99 | // we don't need the network any more, and we can destroy the parser
100 | network->destroy();
101 | parser->destroy();
102 | builder->destroy();
103 | }
104 | }
--------------------------------------------------------------------------------
/src/baseEngine.h:
--------------------------------------------------------------------------------
1 | //
2 | // Created by zhou on 18-5-4.
3 | //
4 | #include "common.h"
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include "NvInfer.h"
13 | #include "NvCaffeParser.h"
14 | #ifndef MAIN_BASEENGINE_H
15 | #define MAIN_BASEENGINE_H
16 | using namespace nvinfer1;
17 | using namespace nvcaffeparser1;
18 | using namespace std;
19 |
20 |
21 | class baseEngine {
22 | public:
23 | static int det1_relu_counter;
24 | baseEngine(const char *prototxt,const char*model,const char*out_name,
25 | const char*location_name,const char*prob_name,const char *point_name = NULL);
26 | virtual ~baseEngine();
27 | virtual void caffeToGIEModel(const std::string& deployFile, // name for caffe prototxt
28 | const std::string& modelFile, // name for model
29 | const std::vector& outputs, // network outputs
30 | unsigned int maxBatchSize, // batch size - NB must be at least as large as the batch we want to run with)
31 | IHostMemory *&gieModelStream); // output buffer for the GIE model
32 | virtual void init(int row,int col);
33 | friend class Pnet;
34 | const string prototxt;
35 | const string model ;
36 | const char *INPUT_BLOB_NAME;
37 | const char *OUTPUT_PROB_NAME;
38 | const char *OUTPUT_LOCATION_NAME;
39 | const char *OUTPUT_POINT_NAME;
40 | Logger gLogger;
41 | IExecutionContext *context;
42 | };
43 |
44 |
45 | #endif //MAIN_BASEENGINE_H
46 |
--------------------------------------------------------------------------------
/src/common.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by zhou on 18-4-30.
3 | //
4 |
5 | #include "common.h"
6 |
7 |
8 | void* safeCudaMalloc(size_t memSize)
9 | {
10 | void* deviceMem;
11 | CHECK(cudaMalloc(&deviceMem, memSize));
12 | if (deviceMem == nullptr)
13 | {
14 | std::cerr << "Out of memory" << std::endl;
15 | exit(1);
16 | }
17 | return deviceMem;
18 | }
19 |
20 |
21 | std::vector>
22 | calculateBindingBufferSizes(const nvinfer1::ICudaEngine& engine, int nbBindings, int batchSize)
23 | {
24 | std::vector> sizes;
25 | for (int i = 0; i < nbBindings; ++i)
26 | {
27 | nvinfer1::Dims dims = engine.getBindingDimensions(i);
28 | nvinfer1::DataType dtype = engine.getBindingDataType(i);
29 |
30 | int64_t eltCount = volume(dims) * batchSize;
31 | sizes.push_back(std::make_pair(eltCount, dtype));
32 | }
33 |
34 | return sizes;
35 | }
36 |
37 |
38 | inline int64_t volume(const nvinfer1::Dims& d)
39 | {
40 | int64_t v = 1;
41 | for (int64_t i = 0; i < d.nbDims; i++)
42 | v *= d.d[i];
43 | return v;
44 | }
45 |
46 |
47 | void getFilePaths(std::string imagesPath, std::vector& paths) {
48 | std::cout << "Parsing Directory: " << imagesPath << std::endl;
49 | DIR *dir;
50 | struct dirent *entry;
51 | if ((dir = opendir (imagesPath.c_str())) != NULL) {
52 | while ((entry = readdir (dir)) != NULL) {
53 | std::string readmeCheck(entry->d_name);
54 | if (entry->d_type != DT_DIR && readmeCheck != "README.md") {
55 | struct Paths tempPaths;
56 | tempPaths.fileName = std::string(entry->d_name);
57 | tempPaths.absPath = imagesPath + "/" + tempPaths.fileName;
58 | paths.push_back(tempPaths);
59 | }
60 | }
61 | closedir (dir);
62 | }
63 | }
64 |
65 |
66 | void loadInputImage(std::string inputFilePath, cv::Mat& image, int videoFrameWidth, int videoFrameHeight) {
67 | image = cv::imread(inputFilePath.c_str());
68 | cv::resize(image, image, cv::Size(videoFrameWidth, videoFrameHeight));
69 | }
70 |
--------------------------------------------------------------------------------
/src/common.h:
--------------------------------------------------------------------------------
1 | //
2 | // Created by zhou on 18-4-30.
3 | //
4 |
5 | #ifndef _TRT_COMMON_H_
6 | #define _TRT_COMMON_H_
7 | #include "NvInfer.h"
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 | #include
17 | #include
18 | #include
19 |
20 | #define CHECK(status) \
21 | { \
22 | if (status != 0) \
23 | { \
24 | std::cout << "Cuda failure: " << status; \
25 | abort(); \
26 | } \
27 | }
28 |
29 |
30 | // Logger for GIE info/warning/errors
31 | class Logger : public nvinfer1::ILogger
32 | {
33 | public:
34 | void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override
35 | {
36 | // suppress info-level messages
37 | //if (severity == Severity::kINFO) return;
38 |
39 | switch (severity)
40 | {
41 | case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: "; break;
42 | case Severity::kERROR: std::cerr << "ERROR: "; break;
43 | case Severity::kWARNING: std::cerr << "WARNING: "; break;
44 | case Severity::kINFO: std::cerr << "INFO: "; break;
45 | default: std::cerr << "UNKNOWN: "; break;
46 | }
47 | std::cerr << msg << std::endl;
48 | }
49 | };
50 |
51 | struct Paths {
52 | std::string absPath;
53 | std::string fileName;
54 | };
55 |
56 | struct KnownID {
57 | std::string className;
58 | int classNumber;
59 | std::vector embeddedFace;
60 | };
61 |
62 | inline bool fileExists(const std::string &name) {
63 | std::ifstream f(name.c_str());
64 | return f.good();
65 | }
66 |
67 | void* safeCudaMalloc(size_t memSize);
68 | inline int64_t volume(const nvinfer1::Dims& d);
69 | std::vector>
70 | calculateBindingBufferSizes(const nvinfer1::ICudaEngine& engine, int nbBindings, int batchSize);
71 | void getFilePaths(std::string imagesPath, std::vector& paths);
72 | void loadInputImage(std::string inputFilePath, cv::Mat& image, int videoFrameWidth, int videoFrameHeight);
73 |
74 | #endif // _TRT_COMMON_H_
75 |
--------------------------------------------------------------------------------
/src/faceNet.cpp:
--------------------------------------------------------------------------------
1 | #include "faceNet.h"
2 | #include
3 | #include
4 |
5 | int FaceNetClassifier::m_classCount = 0;
6 |
7 | FaceNetClassifier::FaceNetClassifier
8 | (Logger gLogger, DataType dtype, const string uffFile, const string engineFile, int batchSize, bool serializeEngine,
9 | float knownPersonThreshold, int maxFacesPerScene, int frameWidth, int frameHeight) {
10 |
11 | m_INPUT_C = static_cast(3);
12 | m_INPUT_H = static_cast(160);
13 | m_INPUT_W = static_cast(160);
14 | m_frameWidth = static_cast(frameWidth);
15 | m_frameHeight = static_cast(frameHeight);
16 | m_gLogger = gLogger;
17 | m_dtype = dtype;
18 | m_uffFile = static_cast(uffFile);
19 | m_engineFile = static_cast(engineFile);
20 | m_batchSize = batchSize;
21 | m_serializeEngine = serializeEngine;
22 | m_maxFacesPerScene = maxFacesPerScene;
23 | m_croppedFaces.reserve(maxFacesPerScene);
24 | m_embeddings.reserve(128);
25 | m_knownPersonThresh = knownPersonThreshold;
26 |
27 | // load engine from .engine file or create new engine
28 | this->createOrLoadEngine();
29 | }
30 |
31 |
32 | void FaceNetClassifier::createOrLoadEngine() {
33 | if(fileExists(m_engineFile)) {
34 | std::vector trtModelStream_;
35 | size_t size{ 0 };
36 |
37 | std::ifstream file(m_engineFile, std::ios::binary);
38 | if (file.good())
39 | {
40 | file.seekg(0, file.end);
41 | size = file.tellg();
42 | file.seekg(0, file.beg);
43 | trtModelStream_.resize(size);
44 | std::cout << "size" << trtModelStream_.size() << std::endl;
45 | file.read(trtModelStream_.data(), size);
46 | file.close();
47 | }
48 | // std::cout << "size" << size;
49 | IRuntime* runtime = createInferRuntime(m_gLogger);
50 | assert(runtime != nullptr);
51 | m_engine = runtime->deserializeCudaEngine(trtModelStream_.data(), size, nullptr);
52 | std::cout << std::endl;
53 | }
54 | else {
55 | IBuilder *builder = createInferBuilder(m_gLogger);
56 | IBuilderConfig* config = builder->createBuilderConfig();
57 | INetworkDefinition *network = builder->createNetworkV2(0U);
58 | IUffParser *parser = createUffParser();
59 | parser->registerInput("input", Dims3(160, 160, 3), UffInputOrder::kNHWC);
60 | parser->registerOutput("Bottleneck/BatchNorm/batchnorm/add_1");
61 |
62 | if (!parser->parse(m_uffFile.c_str(), *network, m_dtype))
63 | {
64 | cout << "Failed to parse UFF\n";
65 | builder->destroy();
66 | parser->destroy();
67 | network->destroy();
68 | throw std::exception();
69 | }
70 |
71 | /* build engine */
72 | if (m_dtype == DataType::kHALF)
73 | {
74 | config->setFlag(BuilderFlag::kFP16);
75 | }
76 | else if (m_dtype == DataType::kINT8) {
77 | config->setFlag(BuilderFlag::kINT8);
78 | // ToDo
79 | //builder->setInt8Calibrator()
80 | }
81 | builder->setMaxBatchSize(m_batchSize);
82 | config->setMaxWorkspaceSize(1<<30);
83 | // strict will force selected datatype, even when another was faster
84 | //builder->setStrictTypeConstraints(true);
85 | // Disable DLA, because many layers are still not supported
86 | // and this causes additional latency.
87 | //builder->allowGPUFallback(true);
88 | //builder->setDefaultDeviceType(DeviceType::kDLA);
89 | //builder->setDLACore(1);
90 | m_engine = builder->buildEngineWithConfig(*network, *config);
91 |
92 | /* serialize engine and write to file */
93 | if(m_serializeEngine) {
94 | ofstream planFile;
95 | planFile.open(m_engineFile);
96 | IHostMemory *serializedEngine = m_engine->serialize();
97 | planFile.write((char *) serializedEngine->data(), serializedEngine->size());
98 | planFile.close();
99 | }
100 |
101 | /* break down */
102 | builder->destroy();
103 | parser->destroy();
104 | network->destroy();
105 | }
106 | m_context = m_engine->createExecutionContext();
107 | }
108 |
109 |
110 | void FaceNetClassifier::getCroppedFacesAndAlign(cv::Mat frame, std::vector outputBbox) {
111 | for(vector::iterator it=outputBbox.begin(); it!=outputBbox.end();it++){
112 | if((*it).exist){
113 | cv::Rect facePos(cv::Point((*it).y1, (*it).x1), cv::Point((*it).y2, (*it).x2));
114 | cv::Mat tempCrop = frame(facePos);
115 | struct CroppedFace currFace;
116 | cv::resize(tempCrop, currFace.faceMat, cv::Size(160, 160), 0, 0, cv::INTER_CUBIC);
117 | currFace.x1 = it->x1;
118 | currFace.y1 = it->y1;
119 | currFace.x2 = it->x2;
120 | currFace.y2 = it->y2;
121 | m_croppedFaces.push_back(currFace);
122 | }
123 | }
124 | //ToDo align
125 | }
126 |
127 | void FaceNetClassifier::preprocessFaces() {
128 | // preprocess according to facenet training and flatten for input to runtime engine
129 | for (int i = 0; i < m_croppedFaces.size(); i++) {
130 | //mean and std
131 | cv::cvtColor(m_croppedFaces[i].faceMat, m_croppedFaces[i].faceMat, cv::COLOR_RGB2BGR);
132 | cv::Mat temp = m_croppedFaces[i].faceMat.reshape(1, m_croppedFaces[i].faceMat.rows * 3);
133 | cv::Mat mean3;
134 | cv::Mat stddev3;
135 | cv::meanStdDev(temp, mean3, stddev3);
136 |
137 | double mean_pxl = mean3.at(0);
138 | double stddev_pxl = stddev3.at(0);
139 | cv::Mat image2;
140 | m_croppedFaces[i].faceMat.convertTo(image2, CV_64FC1);
141 | m_croppedFaces[i].faceMat = image2;
142 | // fix by peererror
143 | cv::Mat mat(4, 1, CV_64FC1);
144 | mat.at (0, 0) = mean_pxl;
145 | mat.at (1, 0) = mean_pxl;
146 | mat.at (2, 0) = mean_pxl;
147 | mat.at (3, 0) = 0;
148 | m_croppedFaces[i].faceMat = m_croppedFaces[i].faceMat - mat;
149 | // end fix
150 | m_croppedFaces[i].faceMat = m_croppedFaces[i].faceMat / stddev_pxl;
151 | m_croppedFaces[i].faceMat.convertTo(image2, CV_32FC3);
152 | m_croppedFaces[i].faceMat = image2;
153 | }
154 | }
155 |
156 |
157 | void FaceNetClassifier::doInference(float* inputData, float* output) {
158 | int size_of_single_input = 3 * 160 * 160 * sizeof(float);
159 | int size_of_single_output = 128 * sizeof(float);
160 | int inputIndex = m_engine->getBindingIndex("input");
161 | int outputIndex = m_engine->getBindingIndex("Bottleneck/BatchNorm/batchnorm/add_1");
162 |
163 | void* buffers[2];
164 |
165 | cudaMalloc(&buffers[inputIndex], m_batchSize * size_of_single_input);
166 | cudaMalloc(&buffers[outputIndex], m_batchSize * size_of_single_output);
167 |
168 | cudaStream_t stream;
169 | CHECK(cudaStreamCreate(&stream));
170 |
171 | // copy data to GPU and execute
172 | CHECK(cudaMemcpyAsync(buffers[inputIndex], inputData, m_batchSize * size_of_single_input, cudaMemcpyHostToDevice, stream));
173 | m_context->enqueue(m_batchSize, &buffers[0], stream, nullptr);
174 | CHECK(cudaMemcpyAsync(output, buffers[outputIndex], m_batchSize * size_of_single_output, cudaMemcpyDeviceToHost, stream));
175 | cudaStreamSynchronize(stream);
176 |
177 | // Release the stream and the buffers
178 | cudaStreamDestroy(stream);
179 | CHECK(cudaFree(buffers[inputIndex]));
180 | CHECK(cudaFree(buffers[outputIndex]));
181 | }
182 |
183 |
184 | void FaceNetClassifier::forwardAddFace(cv::Mat image, std::vector outputBbox,
185 | const string className) {
186 |
187 | //cv::resize(image, image, cv::Size(1280, 720), 0, 0, cv::INTER_CUBIC);
188 | getCroppedFacesAndAlign(image, outputBbox);
189 | if(!m_croppedFaces.empty()) {
190 | preprocessFaces();
191 | doInference((float*)m_croppedFaces[0].faceMat.ptr(0), m_output);
192 | struct KnownID person;
193 | person.className = className;
194 | person.classNumber = m_classCount;
195 | person.embeddedFace.insert(person.embeddedFace.begin(), m_output, m_output+128);
196 | m_knownFaces.push_back(person);
197 | m_classCount++;
198 | }
199 | m_croppedFaces.clear();
200 | }
201 |
202 | void FaceNetClassifier::forward(cv::Mat frame, std::vector outputBbox) {
203 | getCroppedFacesAndAlign(frame, outputBbox); // ToDo align faces according to points
204 | preprocessFaces();
205 | for(int i = 0; i < m_croppedFaces.size(); i++) {
206 | doInference((float*)m_croppedFaces[i].faceMat.ptr(0), m_output);
207 | m_embeddings.insert(m_embeddings.end(), m_output, m_output+128);
208 | }
209 | }
210 |
211 | void FaceNetClassifier::featureMatching(cv::Mat &image) {
212 |
213 | for(int i = 0; i < (m_embeddings.size()/128); i++) {
214 | double minDistance = 10.* m_knownPersonThresh;
215 | float currDistance = 0.;
216 | int winner = -1;
217 | for (int j = 0; j < m_knownFaces.size(); j++) {
218 | std:vector currEmbedding(128);
219 | std::copy_n(m_embeddings.begin()+(i*128), 128, currEmbedding.begin());
220 | currDistance = vectors_distance(currEmbedding, m_knownFaces[j].embeddedFace);
221 | // printf("The distance to %s is %.10f \n", m_knownFaces[j].className.c_str(), currDistance);
222 | // if ((currDistance < m_knownPersonThresh) && (currDistance < minDistance)) {
223 | if (currDistance < minDistance) {
224 | minDistance = currDistance;
225 | winner = j;
226 | }
227 | currEmbedding.clear();
228 | }
229 | float fontScaler = static_cast(m_croppedFaces[i].x2 - m_croppedFaces[i].x1)/static_cast(m_frameWidth);
230 | cv::rectangle(image, cv::Point(m_croppedFaces[i].y1, m_croppedFaces[i].x1), cv::Point(m_croppedFaces[i].y2, m_croppedFaces[i].x2),
231 | cv::Scalar(0,0,255), 2,8,0);
232 | if (minDistance <= m_knownPersonThresh) {
233 | cv::putText(image, m_knownFaces[winner].className, cv::Point(m_croppedFaces[i].y1+2, m_croppedFaces[i].x2-3),
234 | cv::FONT_HERSHEY_DUPLEX, 0.1 + 2*fontScaler, cv::Scalar(0,0,255,255), 1);
235 | }
236 | else if (minDistance > m_knownPersonThresh || winner == -1){
237 | cv::putText(image, "New Person", cv::Point(m_croppedFaces[i].y1+2, m_croppedFaces[i].x2-3),
238 | cv::FONT_HERSHEY_DUPLEX, 0.1 + 2*fontScaler , cv::Scalar(0,0,255,255), 1);
239 | }
240 | }
241 | }
242 |
243 | void FaceNetClassifier::addNewFace(cv::Mat &image, std::vector outputBbox) {
244 | std::cout << "Adding new person...\nPlease make sure there is only one face in the current frame.\n"
245 | << "What's your name? ";
246 | string newName;
247 | std::cin >> newName;
248 | std::cout << "Hi " << newName << ", you will be added to the database.\n";
249 | forwardAddFace(image, outputBbox, newName);
250 | string filePath = "../imgs/";
251 | filePath.append(newName);
252 | filePath.append(".jpg");
253 | cv::imwrite(filePath, image);
254 | }
255 |
256 | void FaceNetClassifier::resetVariables() {
257 | m_embeddings.clear();
258 | m_croppedFaces.clear();
259 | }
260 |
261 | FaceNetClassifier::~FaceNetClassifier() {
262 | // this leads to segfault
263 | // this->m_engine->destroy();
264 | // this->m_context->destroy();
265 | // std::cout << "FaceNet was destructed" << std::endl;
266 | }
267 |
268 | std::vector l2Normalize(const std::vector& vec) {
269 | float norm = 0.0;
270 | for (const auto& element : vec) {
271 | norm += element * element;
272 | }
273 | norm = std::sqrt(norm);
274 | std::vector normalizedVec(vec.size());
275 | for (std::size_t i = 0; i < vec.size(); ++i) {
276 | normalizedVec[i] = vec[i] / norm;
277 | }
278 | return normalizedVec;
279 | }
280 |
281 | // HELPER FUNCTIONS
282 | // Computes the distance between two std::vectors
283 | float vectors_distance(const std::vector& aa, const std::vector& bb) {
284 | std::vector a = l2Normalize(aa);
285 | std::vector b = l2Normalize(bb);
286 | std::vector auxiliary;
287 | std::transform (a.begin(), a.end(), b.begin(), std::back_inserter(auxiliary),//
288 | [](float element1, float element2) {return pow((element1-element2),2);});
289 | auxiliary.shrink_to_fit();
290 | float loopSum = 0.;
291 | for(auto it=auxiliary.begin(); it!=auxiliary.end(); ++it) loopSum += *it;
292 |
293 | return std::sqrt(loopSum);
294 | }
295 |
296 |
297 |
298 | inline unsigned int elementSize(nvinfer1::DataType t)
299 | {
300 | switch (t)
301 | {
302 | case nvinfer1::DataType::kINT32:
303 | // Fallthrough, same as kFLOAT
304 | case nvinfer1::DataType::kFLOAT: return 4;
305 | case nvinfer1::DataType::kHALF: return 2;
306 | case nvinfer1::DataType::kINT8: return 1;
307 | }
308 | assert(0);
309 | return 0;
310 | }
311 |
--------------------------------------------------------------------------------
/src/faceNet.h:
--------------------------------------------------------------------------------
1 | #ifndef FACE_RECOGNITION_FACENET_H
2 | #define FACE_RECOGNITION_FACENET_H
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 |
15 | #include
16 | #include
17 | #include
18 | #include "common.h"
19 | #include "pBox.h"
20 |
21 | using namespace nvinfer1;
22 | using namespace nvuffparser;
23 |
24 | struct CroppedFace {
25 | cv::Mat faceMat;
26 | int x1, y1, x2, y2;
27 | };
28 |
29 |
30 | class FaceNetClassifier
31 | {
32 | public:
33 | FaceNetClassifier(Logger gLogger, DataType dtype, const string uffFile, const string engineFile, int batchSize,
34 | bool serializeEngine, float knownPersonThreshold, int maxFacesPerScene, int frameWidth, int frameHeight);
35 | ~FaceNetClassifier();
36 |
37 | void createOrLoadEngine();
38 | void getCroppedFacesAndAlign(cv::Mat frame, std::vector outputBbox);
39 | void preprocessFaces();
40 | void doInference(float* inputData, float* output);
41 | void forwardAddFace(cv::Mat image, std::vector outputBbox, const string className);
42 | void forward(cv::Mat image, std::vector outputBbox);
43 | void featureMatching(cv::Mat &image);
44 | void addNewFace(cv::Mat &image, std::vector outputBbox);
45 | void resetVariables();
46 |
47 | private:
48 | static int m_classCount;
49 | int m_INPUT_C;
50 | int m_INPUT_H;
51 | int m_INPUT_W;
52 | int m_frameWidth, m_frameHeight;
53 | Logger m_gLogger;
54 | DataType m_dtype;
55 | string m_uffFile;
56 | string m_engineFile;
57 | int m_batchSize;
58 | bool m_serializeEngine;
59 | int m_maxFacesPerScene;
60 | ICudaEngine *m_engine;
61 | IExecutionContext *m_context;
62 | float m_output[128];
63 | std::vector m_embeddings;
64 | std::vector m_knownFaces;
65 | // std::vector m_croppedFaces;
66 | std::vector m_croppedFaces;
67 | float m_knownPersonThresh;
68 | };
69 |
70 | float vectors_distance(const std::vector& a, const std::vector& b);
71 | inline unsigned int elementSize(nvinfer1::DataType t);
72 |
73 | #endif //FACE_RECOGNITION_FACENET_H
74 |
--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include "faceNet.h"
8 | #include "videoStreamer.h"
9 | #include "network.h"
10 | #include "mtcnn.h"
11 |
12 | // Uncomment to print timings in milliseconds
13 | // #define LOG_TIMES
14 |
15 | using namespace nvinfer1;
16 | using namespace nvuffparser;
17 |
18 |
19 | int main()
20 | {
21 | Logger gLogger = Logger();
22 | // Register default TRT plugins (e.g. LRelu_TRT)
23 | if (!initLibNvInferPlugins(&gLogger, "")) { return 1; }
24 |
25 | // USER DEFINED VALUES
26 | const string uffFile="../facenetModels/facenet.uff";
27 | const string engineFile="../facenetModels/facenet.engine";
28 | DataType dtype = DataType::kHALF;
29 | //DataType dtype = DataType::kFLOAT;
30 | bool serializeEngine = true;
31 | int batchSize = 1;
32 | int nbFrames = 0;
33 | int videoFrameWidth = 640;
34 | int videoFrameHeight = 480;
35 | int maxFacesPerScene = 5;
36 | float knownPersonThreshold = 1.;
37 | bool isCSICam = false;
38 |
39 | // init facenet
40 | FaceNetClassifier faceNet = FaceNetClassifier(gLogger, dtype, uffFile, engineFile, batchSize, serializeEngine,
41 | knownPersonThreshold, maxFacesPerScene, videoFrameWidth, videoFrameHeight);
42 |
43 | // init opencv stuff
44 | VideoStreamer videoStreamer = VideoStreamer(0, videoFrameWidth, videoFrameHeight, 60, isCSICam);
45 | cv::Mat frame;
46 |
47 | // init mtCNN
48 | mtcnn mtCNN(videoFrameHeight, videoFrameWidth);
49 |
50 | //init Bbox and allocate memory for "maxFacesPerScene" faces per scene
51 | std::vector outputBbox;
52 | outputBbox.reserve(maxFacesPerScene);
53 |
54 | // get embeddings of known faces
55 | std::vector paths;
56 | cv::Mat image;
57 | getFilePaths("../imgs", paths);
58 | for(int i=0; i < paths.size(); i++) {
59 | loadInputImage(paths[i].absPath, image, videoFrameWidth, videoFrameHeight);
60 | outputBbox = mtCNN.findFace(image);
61 | std::size_t index = paths[i].fileName.find_last_of(".");
62 | std::string rawName = paths[i].fileName.substr(0,index);
63 | faceNet.forwardAddFace(image, outputBbox, rawName);
64 | faceNet.resetVariables();
65 | }
66 | outputBbox.clear();
67 |
68 | // loop over frames with inference
69 | auto globalTimeStart = chrono::steady_clock::now();
70 | while (true) {
71 | auto fps_start = chrono::steady_clock::now();
72 | videoStreamer.getFrame(frame);
73 | if (frame.empty()) {
74 | std::cout << "Empty frame! Exiting...\n Try restarting nvargus-daemon by "
75 | "doing: sudo systemctl restart nvargus-daemon" << std::endl;
76 | break;
77 | }
78 | auto startMTCNN = chrono::steady_clock::now();
79 | outputBbox = mtCNN.findFace(frame);
80 | auto endMTCNN = chrono::steady_clock::now();
81 | auto startForward = chrono::steady_clock::now();
82 | faceNet.forward(frame, outputBbox);
83 | auto endForward = chrono::steady_clock::now();
84 | auto startFeatM = chrono::steady_clock::now();
85 | faceNet.featureMatching(frame);
86 | auto endFeatM = chrono::steady_clock::now();
87 | faceNet.resetVariables();
88 |
89 | auto fps_end = chrono::steady_clock::now();
90 | auto milliseconds = chrono::duration_cast(fps_end-fps_start).count();
91 | float fps = (1000/milliseconds);
92 | std::string label = cv::format("FPS: %.2f ", fps);
93 | cv::putText(frame, label, cv::Point(15, 30), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 0), 2);
94 |
95 | cv::imshow("VideoSource", frame);
96 | nbFrames++;
97 | outputBbox.clear();
98 | frame.release();
99 |
100 | char keyboard = cv::waitKey(1);
101 | if (keyboard == 'q' || keyboard == 27)
102 | break;
103 | else if(keyboard == 'n') {
104 | auto dTimeStart = chrono::steady_clock::now();
105 | videoStreamer.getFrame(frame);
106 | outputBbox = mtCNN.findFace(frame);
107 | cv::imshow("VideoSource", frame);
108 | faceNet.addNewFace(frame, outputBbox);
109 | auto dTimeEnd = chrono::steady_clock::now();
110 | globalTimeStart += (dTimeEnd - dTimeStart);
111 | }
112 |
113 | #ifdef LOG_TIMES
114 | std::cout << "mtCNN took " << std::chrono::duration_cast(endMTCNN - startMTCNN).count() << "ms\n";
115 | std::cout << "Forward took " << std::chrono::duration_cast(endForward - startForward).count() << "ms\n";
116 | std::cout << "Feature matching took " << std::chrono::duration_cast(endFeatM - startFeatM).count() << "ms\n\n";
117 | #endif // LOG_TIMES
118 | }
119 | auto globalTimeEnd = chrono::steady_clock::now();
120 | cv::destroyAllWindows();
121 | videoStreamer.release();
122 | auto milliseconds = chrono::duration_cast(globalTimeEnd-globalTimeStart).count();
123 | double seconds = double(milliseconds)/1000.;
124 | double fps = nbFrames/seconds;
125 |
126 | std::cout << "Counted " << nbFrames << " frames in " << double(milliseconds)/1000. << " seconds!" <<
127 | " This equals " << fps << "fps.\n";
128 |
129 | return 0;
130 | }
131 |
132 |
--------------------------------------------------------------------------------
/src/mtcnn.cpp:
--------------------------------------------------------------------------------
1 | #include "mtcnn.h"
2 | // #define LOG
3 | mtcnn::mtcnn(int row, int col){
4 | //set NMS thresholds
5 | nms_threshold[0] = 0.7;
6 | nms_threshold[1] = 0.7;
7 | nms_threshold[2] = 0.7;
8 | //set minimal face size (weidth in pixels)
9 | int minsize = 60;
10 | /*config the pyramids */
11 | float minl = rowMIN_DET_SIZE){
18 | if(factor_count>0)m = m*factor;
19 | scales_.push_back(m);
20 | minl *= factor;
21 | factor_count++;
22 | }
23 | float minside = row::iterator it = scales_.begin(); it != scales_.end(); it++){
26 | if (*it > 1){
27 | cout << "the minsize is too small" << endl;
28 | while (1);
29 | }
30 | if (*it < (MIN_DET_SIZE / minside)){
31 | scales_.resize(count);
32 | break;
33 | }
34 | count++;
35 | }
36 |
37 | cout<<"\nStart generating mtCNN TenosrRT runtime models"<init(24,24);
54 | refineNet = new Rnet(*rnet_engine);
55 | cout<<"End generate rnet runtime models"<init(48,48);
60 | outNet = new Onet(*onet_engine);
61 | cout<<"End generating TensorRT runtime models"< mtcnn::findFace(cv::Mat &image){
69 | firstBbox_.clear();
70 | firstOrderScore_.clear();
71 | secondBbox_.clear();
72 | secondBboxScore_.clear();
73 | thirdBbox_.clear();
74 | thirdBboxScore_.clear();
75 |
76 | struct orderScore order;
77 | int count = 0;
78 |
79 | clock_t first_time = clock();
80 | for (size_t i = 0; i < scales_.size(); i++) {
81 | int changedH = (int)ceil(image.rows*scales_.at(i));
82 | int changedW = (int)ceil(image.cols*scales_.at(i));
83 | clock_t run_first_time = clock();
84 | resize(image, reImage, cv::Size(changedW, changedH), 0, 0, cv::INTER_LINEAR);
85 | (*simpleFace_[i]).run(reImage, scales_.at(i),pnet_engine[i]);
86 |
87 | #ifdef LOG
88 | run_first_time = clock() - run_first_time;
89 | cout<<"first model inference time is "<<1000*(double)run_first_time/CLOCKS_PER_SEC<::iterator it=(*simpleFace_[i]).boundingBox_.begin(); it!= (*simpleFace_[i]).boundingBox_.end();it++){
94 | if((*it).exist){
95 | firstBbox_.push_back(*it);
96 | order.score = (*it).score;
97 | order.oriOrder = count;
98 | firstOrderScore_.push_back(order);
99 | count++;
100 | }
101 | }
102 | (*simpleFace_[i]).bboxScore_.clear();
103 | (*simpleFace_[i]).boundingBox_.clear();
104 | }
105 | //the first stage's nms
106 | vector emptyBbox;
107 | if(count<1)return emptyBbox;
108 | nms(firstBbox_, firstOrderScore_, nms_threshold[0]);
109 | refineAndSquareBbox(firstBbox_, image.rows, image.cols,true);
110 | #ifdef LOG
111 | first_time = clock() - first_time;
112 | cout<<"first time is "<<1000*(double)first_time/CLOCKS_PER_SEC<::iterator it=firstBbox_.begin(); it!=firstBbox_.end();it++){
118 | if((*it).exist){
119 | cv::Rect temp((*it).y1, (*it).x1, (*it).y2-(*it).y1, (*it).x2-(*it).x1);
120 | cv::Mat secImage;
121 | resize(image(temp), secImage, cv::Size(24, 24), 0, 0, cv::INTER_LINEAR);
122 | transpose(secImage,secImage);
123 | refineNet->run(secImage,*rnet_engine);
124 | if(*(refineNet->score_->pdata+1)>refineNet->Rthreshold){
125 | memcpy(it->regreCoord, refineNet->location_->pdata, 4*sizeof(mydataFmt));
126 | it->area = (it->x2 - it->x1)*(it->y2 - it->y1);
127 | it->score = *(refineNet->score_->pdata+1);
128 | secondBbox_.push_back(*it);
129 | order.score = it->score;
130 | order.oriOrder = count++;
131 | secondBboxScore_.push_back(order);
132 | }
133 | else{
134 | (*it).exist=false;
135 | }
136 | }
137 | }
138 | if(count<1)return emptyBbox;
139 | nms(secondBbox_, secondBboxScore_, nms_threshold[1]);
140 | refineAndSquareBbox(secondBbox_, image.rows, image.cols,true);
141 | second_time = clock() - second_time;
142 | #ifdef LOG
143 | cout<<"second time is "<<1000*(double)second_time/CLOCKS_PER_SEC<::iterator it=secondBbox_.begin(); it!=secondBbox_.end();it++){
149 | if((*it).exist){
150 | cv::Rect temp((*it).y1, (*it).x1, (*it).y2-(*it).y1, (*it).x2-(*it).x1);
151 | cv::Mat thirdImage;
152 | resize(image(temp), thirdImage, cv::Size(48, 48), 0, 0, cv::INTER_LINEAR);
153 | transpose(thirdImage,thirdImage);
154 | outNet->run(thirdImage,*onet_engine);
155 | mydataFmt *pp=NULL;
156 | if(*(outNet->score_->pdata+1)>outNet->Othreshold){
157 | memcpy(it->regreCoord, outNet->location_->pdata, 4*sizeof(mydataFmt));
158 | it->area = (it->x2 - it->x1)*(it->y2 - it->y1);
159 | it->score = *(outNet->score_->pdata+1);
160 | pp = outNet->points_->pdata;
161 | for(int num=0;num<5;num++){
162 | (it->ppoint)[num] = it->y1 + (it->y2 - it->y1)*(*(pp+num));
163 | }
164 | for(int num=0;num<5;num++){
165 | (it->ppoint)[num+5] = it->x1 + (it->x2 - it->x1)*(*(pp+num+5));
166 | }
167 | thirdBbox_.push_back(*it);
168 | order.score = it->score;
169 | order.oriOrder = count++;
170 | thirdBboxScore_.push_back(order);
171 | }
172 | else{
173 | it->exist=false;
174 | }
175 | }
176 | }
177 |
178 | if(count<1)return emptyBbox;
179 | refineAndSquareBbox(thirdBbox_, image.rows, image.cols, true);
180 | nms(thirdBbox_, thirdBboxScore_, nms_threshold[2], "Min");
181 | #ifdef LOG
182 | third_time = clock() - third_time;
183 | cout<<"third time is "<<1000*(double)third_time/CLOCKS_PER_SEC<::iterator it=thirdBbox_.begin(); it!=thirdBbox_.end();it++){
188 | // if((*it).exist){
189 | // rectangle(image, cv::Point((*it).y1, (*it).x1), cv::Point((*it).y2, (*it).x2), cv::Scalar(0,0,255), 2,8,0);
190 | // for(int num=0;num<5;num++)
191 | // circle(image,cv::Point((int)*(it->ppoint+num), (int)*(it->ppoint+num+5)),3,cv::Scalar(0,255,255), -1);
192 | // }
193 | // }
194 |
195 |
196 | return thirdBbox_;
197 |
198 | }
199 |
--------------------------------------------------------------------------------
/src/mtcnn.h:
--------------------------------------------------------------------------------
1 | #ifndef MTCNN_H
2 | #define MTCNN_H
3 | #include "network.h"
4 | #include "pnet_rt.h"
5 | #include "rnet_rt.h"
6 | #include "onet_rt.h"
7 | class mtcnn
8 | {
9 | public:
10 | mtcnn(int row, int col);
11 | ~mtcnn();
12 | vector findFace(cv::Mat &image);
13 | private:
14 | cv::Mat reImage;
15 | float nms_threshold[3];
16 | vector scales_;
17 | Pnet_engine *pnet_engine;
18 | Pnet **simpleFace_;
19 | vector firstBbox_;
20 | vector firstOrderScore_;
21 | Rnet *refineNet;
22 | Rnet_engine *rnet_engine;
23 | vector secondBbox_;
24 | vector secondBboxScore_;
25 | Onet *outNet;
26 | Onet_engine *onet_engine;
27 | vector thirdBbox_;
28 | vector thirdBboxScore_;
29 | };
30 |
31 | #endif
--------------------------------------------------------------------------------
/src/network.cpp:
--------------------------------------------------------------------------------
1 | #include "network.h"
2 | void image2Matrix(const cv::Mat &image, const struct pBox *pbox){
3 | if ((image.data == NULL) || (image.type() != CV_8UC3)){
4 | cout << "image's type is wrong!!Please set CV_8UC3" << endl;
5 | return;
6 | }
7 | if (pbox->pdata == NULL){
8 | return;
9 | }
10 | mydataFmt *p = pbox->pdata;
11 | for (int rowI = 0; rowI < image.rows; rowI++){
12 | for (int colK = 0; colK < image.cols; colK++){
13 | *p = (image.at(rowI, colK)[2] - 127.5)*0.007812;
14 | *(p + image.rows*image.cols) = (image.at(rowI, colK)[1] - 127.5)*0.0078125;
15 | *(p + 2*image.rows*image.cols) = (image.at(rowI, colK)[0] - 127.5)*0.0078125;
16 | p++;
17 | }
18 | }
19 | }
20 | bool cmpScore(struct orderScore lsh, struct orderScore rsh){
21 | if(lsh.score &boundingBox_, vector &bboxScore_, const float overlap_threshold, string modelname){
27 | if(boundingBox_.empty()){
28 | return;
29 | }
30 | std::vector heros;
31 | //sort the score
32 | sort(bboxScore_.begin(), bboxScore_.end(), cmpScore);
33 |
34 | int order = 0;
35 | float IOU = 0;
36 | float maxX = 0;
37 | float maxY = 0;
38 | float minX = 0;
39 | float minY = 0;
40 | while(bboxScore_.size()>0){
41 | order = bboxScore_.back().oriOrder;
42 | bboxScore_.pop_back();
43 | if(order<0)continue;
44 | heros.push_back(order);
45 | boundingBox_.at(order).exist = false;//delete it
46 |
47 | for(int num=0;numboundingBox_.at(order).x1)?boundingBox_.at(num).x1:boundingBox_.at(order).x1;
51 | maxY = (boundingBox_.at(num).y1>boundingBox_.at(order).y1)?boundingBox_.at(num).y1:boundingBox_.at(order).y1;
52 | minX = (boundingBox_.at(num).x20)?(minX-maxX+1):0;
56 | maxY = ((minY-maxY+1)>0)?(minY-maxY+1):0;
57 | //IOU reuse for the area of two bbox
58 | IOU = maxX * maxY;
59 | if(!modelname.compare("Union"))
60 | IOU = IOU/(boundingBox_.at(num).area + boundingBox_.at(order).area - IOU);
61 | else if(!modelname.compare("Min")){
62 | IOU = IOU/((boundingBox_.at(num).areaoverlap_threshold){
65 | boundingBox_.at(num).exist=false;
66 | for(vector::iterator it=bboxScore_.begin(); it!=bboxScore_.end();it++){
67 | if((*it).oriOrder == num) {
68 | (*it).oriOrder = -1;
69 | break;
70 | }
71 | }
72 | }
73 | }
74 | }
75 | }
76 | for(int i=0;i &vecBbox, const int &height, const int &width, bool square = true){
80 | if(vecBbox.empty()){
81 | cout<<"Bbox is empty!!"<::iterator it=vecBbox.begin(); it!=vecBbox.end();it++){
88 | if((*it).exist){
89 | bbh = (*it).x2 - (*it).x1 + 1;
90 | bbw = (*it).y2 - (*it).y1 + 1;
91 | x1 = (*it).x1 + (*it).regreCoord[1]*bbh;
92 | y1 = (*it).y1 + (*it).regreCoord[0]*bbw;
93 | x2 = (*it).x2 + (*it).regreCoord[3]*bbh;
94 | y2 = (*it).y2 + (*it).regreCoord[2]*bbw;
95 |
96 |
97 |
98 | h = x2 - x1 + 1;
99 | w = y2 - y1 + 1;
100 |
101 | if(square)
102 | {
103 | maxSide = (h>w)?h:w;
104 | x1 = x1 + h*0.5 - maxSide*0.5;
105 | y1 = y1 + w*0.5 - maxSide*0.5;
106 | (*it).x2 = round(x1 + maxSide - 1);
107 | (*it).y2 = round(y1 + maxSide - 1);
108 | (*it).x1 = round(x1);
109 | (*it).y1 = round(y1);
110 | } else
111 | {
112 | (*it).x1 = x1;
113 | (*it).y1 = y1;
114 | (*it).x2 = x2;
115 | (*it).y2 = y2;
116 | }
117 |
118 |
119 |
120 | //boundary check
121 | if((*it).x1<0)(*it).x1=0;
122 | if((*it).y1<0)(*it).y1=0;
123 | if((*it).x2>height)(*it).x2 = height - 1;
124 | if((*it).y2>width)(*it).y2 = width - 1;
125 |
126 | it->area = (it->x2 - it->x1)*(it->y2 - it->y1);
127 | }
128 | }
129 | }
--------------------------------------------------------------------------------
/src/network.h:
--------------------------------------------------------------------------------
1 | //c++ network author : liqi
2 | //Nangjing University of Posts and Telecommunications
3 | //date 2017.5.21,20:27
4 | #ifndef NETWORK_H
5 | #define NETWORK_H
6 | #include "opencv2/imgproc/imgproc.hpp"
7 | #include "opencv2/highgui/highgui.hpp"
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 | #include "pBox.h"
17 | #include
18 | #include
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include
24 | #include "NvInfer.h"
25 | #include "NvCaffeParser.h"
26 |
27 | void image2Matrix(const cv::Mat &image, const struct pBox *pbox);
28 | bool cmpScore(struct orderScore lsh, struct orderScore rsh);
29 | void nms(vector &boundingBox_, vector &bboxScore_, const float overlap_threshold, string modelname = "Union");
30 | void refineAndSquareBbox(vector &vecBbox, const int &height, const int &width,bool square);
31 |
32 | #endif
--------------------------------------------------------------------------------
/src/onet_rt.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by zhou on 18-10-2.
3 | //
4 |
5 | #include "onet_rt.h"
6 |
7 | Onet_engine::Onet_engine() : baseEngine("../mtCNNModels/det3_relu.prototxt",
8 | "../mtCNNModels/det3_relu.caffemodel",
9 | "data",
10 | "conv6-2",
11 | "prob1",
12 | "conv6-3"
13 | ) {
14 | };
15 |
16 | Onet_engine::~Onet_engine() {
17 | shutdownProtobufLibrary();
18 | }
19 |
20 | void Onet_engine::init(int row, int col) {
21 | IHostMemory *gieModelStream{nullptr};
22 | const int max_batch_size = 1;
23 | //generate Tensorrt model
24 | caffeToGIEModel(prototxt, model, std::vector{OUTPUT_PROB_NAME, OUTPUT_LOCATION_NAME,OUTPUT_POINT_NAME}, max_batch_size,
25 | gieModelStream);
26 |
27 | }
28 |
29 |
30 | Onet::Onet(const Onet_engine &onet_engine) : BatchSize(1),
31 | INPUT_C(3),
32 | Engine(onet_engine.context->getEngine()) {
33 |
34 | Othreshold = 0.8;
35 | this->score_ = new pBox;
36 | this->location_ = new pBox;
37 | this->rgb = new pBox;
38 | this->points_ = new pBox;
39 | INPUT_W = 48;
40 | INPUT_H = 48;
41 | //calculate output shape
42 | this->score_->width = 1;
43 | this->score_->height = 1;
44 | this->score_->channel = 2;
45 |
46 | this->location_->width = 1;
47 | this->location_->height = 1;
48 | this->location_->channel = 4;
49 |
50 | this->points_->width = 1;
51 | this->points_->height = 1;
52 | this->points_->channel = 10;
53 |
54 |
55 | OUT_PROB_SIZE = this->score_->width * this->score_->height * this->score_->channel;
56 | OUT_LOCATION_SIZE = this->location_->width * this->location_->height * this->location_->channel;
57 | OUT_POINTS_SIZE = this->points_->width * this->points_->height * this->points_->channel;
58 | //allocate memory for outputs
59 | this->rgb->pdata = (float *) malloc(INPUT_C * INPUT_H * INPUT_W * sizeof(float));
60 | this->score_->pdata = (float *) malloc(2 * sizeof(float));
61 | this->location_->pdata = (float *) malloc(4 * sizeof(float));
62 | this->points_->pdata = (float *) malloc(10 * sizeof(float));
63 |
64 | assert(Engine.getNbBindings() == 4);
65 | inputIndex = Engine.getBindingIndex(onet_engine.INPUT_BLOB_NAME);
66 | outputProb = Engine.getBindingIndex(onet_engine.OUTPUT_PROB_NAME);
67 | outputLocation = Engine.getBindingIndex(onet_engine.OUTPUT_LOCATION_NAME);
68 | outputPoints = Engine.getBindingIndex(onet_engine.OUTPUT_POINT_NAME);
69 |
70 | //creat GPU buffers and stream
71 | CHECK(cudaMalloc(&buffers[inputIndex], BatchSize * INPUT_C * INPUT_H * INPUT_W * sizeof(float)));
72 | CHECK(cudaMalloc(&buffers[outputProb], BatchSize * OUT_PROB_SIZE * sizeof(float)));
73 | CHECK(cudaMalloc(&buffers[outputLocation], BatchSize * OUT_LOCATION_SIZE * sizeof(float)));
74 | CHECK(cudaMalloc(&buffers[outputPoints], BatchSize * OUT_POINTS_SIZE * sizeof(float)));
75 | CHECK(cudaStreamCreate(&stream));
76 | }
77 |
78 | Onet::~Onet() {
79 |
80 | delete (score_);
81 | delete (location_);
82 | cudaStreamDestroy(stream);
83 | CHECK(cudaFree(buffers[inputIndex]));
84 | CHECK(cudaFree(buffers[outputProb]));
85 | CHECK(cudaFree(buffers[outputLocation]));
86 | CHECK(cudaFree(buffers[outputPoints]));
87 | }
88 |
89 | void Onet::run(cv::Mat &image, const Onet_engine &onet_engine) {
90 |
91 |
92 | //DMA the input to the GPU ,execute the batch asynchronously and DMA it back;
93 | image2Matrix(image, this->rgb);
94 | CHECK(cudaMemcpyAsync(buffers[inputIndex], this->rgb->pdata,
95 | BatchSize * INPUT_C * INPUT_H * INPUT_W * sizeof(float),
96 | cudaMemcpyHostToDevice, stream));
97 | onet_engine.context->enqueue(BatchSize, buffers, stream, nullptr);
98 | CHECK(cudaMemcpyAsync(this->location_->pdata, buffers[outputLocation], BatchSize * OUT_LOCATION_SIZE* sizeof(float),
99 | cudaMemcpyDeviceToHost, stream));
100 | CHECK(cudaMemcpyAsync(this->score_->pdata, buffers[outputProb], BatchSize * OUT_PROB_SIZE* sizeof(float),
101 | cudaMemcpyDeviceToHost, stream));
102 | CHECK(cudaMemcpyAsync(this->points_->pdata, buffers[outputPoints], BatchSize * OUT_POINTS_SIZE* sizeof(float),
103 | cudaMemcpyDeviceToHost, stream));
104 | cudaStreamSynchronize(stream);
105 |
106 | }
107 |
--------------------------------------------------------------------------------
/src/onet_rt.h:
--------------------------------------------------------------------------------
1 | //
2 | // Created by zhou on 18-10-2.
3 | //
4 |
5 | #ifndef MAIN_ONET_RT_H
6 | #define MAIN_ONET_RT_H
7 | #include "baseEngine.h"
8 | #include "network.h"
9 |
10 |
11 | class Onet_engine : public baseEngine {
12 |
13 | public:
14 | Onet_engine();
15 | ~Onet_engine();
16 | void init(int row, int col);
17 | friend class Onet;
18 |
19 | };
20 |
21 | class Onet {
22 | public:
23 | Onet(const Onet_engine &onet_engine);
24 | ~Onet();
25 | void run(cv::Mat &image, const Onet_engine &engine);
26 | mydataFmt Othreshold;
27 | cudaStream_t stream;
28 | struct pBox *location_;
29 | struct pBox *score_;
30 | struct pBox *points_;
31 | struct pBox *rgb;
32 | private:
33 | const int BatchSize;
34 | const int INPUT_C;
35 | const ICudaEngine &Engine;
36 | //must be computed at runtime
37 | int INPUT_H;
38 | int INPUT_W;
39 | int OUT_PROB_SIZE;
40 | int OUT_LOCATION_SIZE;
41 | int OUT_POINTS_SIZE;
42 | int inputIndex,outputProb,outputLocation,outputPoints;
43 | void *buffers[4];
44 |
45 | };
46 | #endif //MAIN_ONET_RT_H
47 |
--------------------------------------------------------------------------------
/src/pBox.h:
--------------------------------------------------------------------------------
1 | #ifndef PBOX_H
2 | #define PBOX_H
3 | #include
4 | #include
5 |
6 | using namespace std;
7 | #define mydataFmt float
8 |
9 |
10 | struct pBox
11 | {
12 | mydataFmt *pdata;
13 | int width;
14 | int height;
15 | int channel;
16 | };
17 | struct Bbox
18 | {
19 | float score;
20 | int x1;
21 | int y1;
22 | int x2;
23 | int y2;
24 | float area;
25 | bool exist;
26 | mydataFmt ppoint[10];
27 | mydataFmt regreCoord[4];
28 | };
29 |
30 | struct orderScore
31 | {
32 | mydataFmt score;
33 | int oriOrder;
34 | };
35 | #endif
--------------------------------------------------------------------------------
/src/pnet_rt.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by zhou on 18-4-30.
3 | //
4 | #include "pnet_rt.h"
5 | #include
6 |
7 | // stuff we know about the network and the caffe input/output blobs
8 | Pnet_engine::Pnet_engine() : baseEngine("../mtCNNModels/det1_relu.prototxt",
9 | "../mtCNNModels/det1_relu.caffemodel",
10 | "data",
11 | "conv4-2",
12 | "prob1") {
13 | };
14 |
15 | Pnet_engine::~Pnet_engine() {
16 | shutdownProtobufLibrary();
17 | }
18 |
19 | void Pnet_engine::init(int row, int col) {
20 |
21 | //modifiy the input shape of prototxt, write to temp.prototxt
22 | int first_spce = 16, second_space = 4;
23 | fstream protofile;
24 | protofile.open(prototxt, ios::in);
25 | std::stringstream buffer;
26 | buffer << protofile.rdbuf();
27 | std::string contents(buffer.str());
28 | // std::cout << "contents = " << contents << std::endl;
29 | string::size_type position_h, position_w;
30 | position_h = contents.find("dim");
31 | while (isdigit(contents[position_h + first_spce])) {
32 | contents.erase(position_h + first_spce, 1);
33 | }
34 | contents.insert(position_h + first_spce, to_string(row));
35 | position_w = contents.find("dim", position_h + first_spce);
36 | while (isdigit(contents[position_w + second_space])) {
37 | contents.erase(position_w + second_space, 1);
38 | }
39 | contents.insert(position_w + second_space, to_string(col));
40 | protofile.close();
41 | protofile.open("temp.prototxt", ios::out);
42 | protofile.write(contents.c_str(), contents.size());
43 | protofile.close();
44 | IHostMemory *gieModelStream{nullptr};
45 | //generate Tensorrt model
46 | caffeToGIEModel("temp.prototxt", model, std::vector{OUTPUT_PROB_NAME, OUTPUT_LOCATION_NAME}, 1,
47 | gieModelStream);
48 |
49 | }
50 |
51 |
52 | Pnet::Pnet(int row, int col, const Pnet_engine &pnet_engine) : BatchSize(1),
53 | INPUT_C(3), Engine(pnet_engine.context->getEngine()) {
54 | Pthreshold = 0.6;
55 | nms_threshold = 0.5;
56 | this->score_ = new pBox;
57 | this->location_ = new pBox;
58 | this->rgb = new pBox;
59 | INPUT_W = col;
60 | INPUT_H = row;
61 | //calculate output shape
62 | this->score_->width = int(ceil((INPUT_W - 2) / 2.) - 4);
63 | this->score_->height = int(ceil((INPUT_H - 2) / 2.) - 4);
64 | this->score_->channel = 2;
65 |
66 | this->location_->width = int(ceil((INPUT_W - 2) / 2.) - 4);
67 | this->location_->height = int(ceil((INPUT_H - 2) / 2.) - 4);
68 | this->location_->channel = 4;
69 |
70 | OUT_PROB_SIZE = this->score_->width * this->score_->height * this->score_->channel;
71 | OUT_LOCATION_SIZE = this->location_->width * this->location_->height * this->location_->channel;
72 | //allocate memory for outputs
73 | this->rgb->pdata = (float *) malloc(INPUT_C * INPUT_H * INPUT_W * sizeof(float));
74 | this->score_->pdata = (float *) malloc(OUT_PROB_SIZE * sizeof(float));
75 | this->location_->pdata = (float *) malloc(OUT_LOCATION_SIZE * sizeof(float));
76 |
77 | assert(Engine.getNbBindings() == 3);
78 | inputIndex = Engine.getBindingIndex(pnet_engine.INPUT_BLOB_NAME),
79 | outputProb = Engine.getBindingIndex(pnet_engine.OUTPUT_PROB_NAME),
80 | outputLocation = Engine.getBindingIndex(pnet_engine.OUTPUT_LOCATION_NAME);
81 |
82 | //creat GPU buffers and stream
83 | CHECK(cudaMalloc(&buffers[inputIndex], BatchSize * INPUT_C * INPUT_H * INPUT_W * sizeof(float)));
84 | CHECK(cudaMalloc(&buffers[outputProb], BatchSize * OUT_PROB_SIZE * sizeof(float)));
85 | CHECK(cudaMalloc(&buffers[outputLocation], BatchSize * OUT_LOCATION_SIZE * sizeof(float)));
86 | CHECK(cudaStreamCreate(&stream));
87 | }
88 |
89 | Pnet::~Pnet() {
90 |
91 | delete (score_);
92 | delete (location_);
93 |
94 | cudaStreamDestroy(stream);
95 | CHECK(cudaFree(buffers[inputIndex]));
96 | CHECK(cudaFree(buffers[outputProb]));
97 | CHECK(cudaFree(buffers[outputLocation]));
98 | }
99 |
100 | void Pnet::run(cv::Mat &image, float scale, const Pnet_engine &pnet_engine) {
101 |
102 |
103 | //DMA the input to the GPU ,execute the batch asynchronously and DMA it back;
104 | image2Matrix(image, this->rgb);
105 | CHECK(cudaMemcpyAsync(buffers[inputIndex], this->rgb->pdata,
106 | BatchSize * INPUT_C * INPUT_H * INPUT_W * sizeof(float),
107 | cudaMemcpyHostToDevice, stream));
108 | pnet_engine.context->enqueue(BatchSize, buffers, stream, nullptr);
109 | CHECK(cudaMemcpyAsync(this->score_->pdata, buffers[outputProb], BatchSize * OUT_PROB_SIZE * sizeof(float),
110 | cudaMemcpyDeviceToHost, stream));
111 | CHECK(cudaMemcpyAsync(this->location_->pdata, buffers[outputLocation],
112 | BatchSize * OUT_LOCATION_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
113 | cudaStreamSynchronize(stream);
114 | generateBbox(this->score_, this->location_, scale);
115 |
116 | }
117 |
118 | void Pnet::generateBbox(const struct pBox *score, const struct pBox *location, mydataFmt scale) {
119 | //for pooling
120 | int stride = 2;
121 | int cellsize = 12;
122 | int count = 0;
123 | //score p
124 | mydataFmt *p = score->pdata + score->width * score->height;
125 | mydataFmt *plocal = location->pdata;
126 | struct Bbox bbox;
127 | struct orderScore order;
128 | for (int row = 0; row < score->height; row++) {
129 | for (int col = 0; col < score->width; col++) {
130 | if (*p > Pthreshold) {
131 | bbox.score = *p;
132 | order.score = *p;
133 | order.oriOrder = count;
134 | bbox.x1 = round((stride * row + 1) / scale);
135 | bbox.y1 = round((stride * col + 1) / scale);
136 | bbox.x2 = round((stride * row + 1 + cellsize) / scale);
137 | bbox.y2 = round((stride * col + 1 + cellsize) / scale);
138 | bbox.exist = true;
139 | bbox.area = (bbox.x2 - bbox.x1) * (bbox.y2 - bbox.y1);
140 | for (int channel = 0; channel < 4; channel++)
141 | bbox.regreCoord[channel] = *(plocal + channel * location->width * location->height);
142 | boundingBox_.push_back(bbox);
143 | bboxScore_.push_back(order);
144 | count++;
145 | }
146 | p++;
147 | plocal++;
148 | }
149 | }
150 |
151 | }
--------------------------------------------------------------------------------
/src/pnet_rt.h:
--------------------------------------------------------------------------------
1 | //
2 | // Created by zhou on 18-4-30.
3 | //
4 |
5 | #ifndef MAIN_PNET_RT_H
6 | #define MAIN_PNET_RT_H
7 |
8 | #include "network.h"
9 | #include "common.h"
10 | #include "baseEngine.h"
11 | #endif //MAIN_PNET_RT_H
12 | using namespace nvinfer1;
13 | using namespace nvcaffeparser1;
14 |
15 | class Pnet_engine:public baseEngine
16 | {
17 |
18 | public:
19 | Pnet_engine();
20 | ~Pnet_engine();
21 | void init(int row,int col);
22 | friend class Pnet;
23 |
24 | };
25 |
26 |
27 |
28 | class Pnet
29 | {
30 | public:
31 | Pnet(int row,int col,const Pnet_engine& pnet_engine);
32 | ~Pnet();
33 | void run(cv::Mat &image, float scale,const Pnet_engine& engine);
34 | float nms_threshold;
35 | mydataFmt Pthreshold;
36 | cudaStream_t stream;
37 |
38 | vector boundingBox_;
39 | vector bboxScore_;
40 | private:
41 |
42 | const int BatchSize ;
43 | const int INPUT_C ;
44 | const ICudaEngine &Engine;
45 | //must be computed at runtime
46 | int INPUT_H ;
47 | int INPUT_W ;
48 | int OUT_PROB_SIZE;
49 | int OUT_LOCATION_SIZE;
50 | int inputIndex,
51 | outputProb,
52 | outputLocation;
53 | void *buffers[3];
54 | struct pBox *location_;
55 | struct pBox *score_;
56 | struct pBox *rgb;
57 |
58 | void generateBbox(const struct pBox *score, const struct pBox *location, mydataFmt scale);
59 | };
60 |
61 |
--------------------------------------------------------------------------------
/src/rnet_rt.cpp:
--------------------------------------------------------------------------------
1 |
2 | //Created by zhou on 18-5-4.
3 |
4 | #include "rnet_rt.h"
5 |
6 |
7 | Rnet_engine::Rnet_engine() : baseEngine("../mtCNNModels/det2_relu.prototxt",
8 | "../mtCNNModels/det2_relu.caffemodel",
9 | "data",
10 | "conv5-2",
11 | "prob1"
12 |
13 | ) {
14 | };
15 |
16 | Rnet_engine::~Rnet_engine() {
17 | shutdownProtobufLibrary();
18 | }
19 |
20 | void Rnet_engine::init(int row, int col) {
21 |
22 | IHostMemory *gieModelStream{nullptr};
23 | const int max_batch_size = 1;
24 | //generate Tensorrt model
25 | caffeToGIEModel(prototxt, model, std::vector{OUTPUT_PROB_NAME, OUTPUT_LOCATION_NAME}, max_batch_size,
26 | gieModelStream);
27 |
28 | }
29 |
30 |
31 | Rnet::Rnet(const Rnet_engine &rnet_engine) : BatchSize(1),
32 | INPUT_C(3),
33 | Engine(rnet_engine.context->getEngine()) {
34 |
35 | Rthreshold = 0.7;
36 | this->score_ = new pBox;
37 | this->location_ = new pBox;
38 | this->rgb = new pBox;
39 | INPUT_W = 24;
40 | INPUT_H = 24;
41 | //calculate output shape
42 | this->score_->width = 1;
43 | this->score_->height = 1;
44 | this->score_->channel = 2;
45 |
46 | this->location_->width = 1;
47 | this->location_->height = 1;
48 | this->location_->channel= 4;
49 |
50 | OUT_PROB_SIZE = this->score_->width * this->score_->height * this->score_->channel;
51 | OUT_LOCATION_SIZE = this->location_->width * this->location_->height * this->location_->channel;
52 | //allocate memory for outputs
53 | this->rgb->pdata = (float *) malloc(INPUT_C * INPUT_H * INPUT_W * sizeof(float));
54 | this->score_->pdata = (float *) malloc(2 * sizeof(float));
55 | this->location_->pdata = (float *) malloc(4 * sizeof(float));
56 |
57 | assert(Engine.getNbBindings() == 3);
58 | inputIndex = Engine.getBindingIndex(rnet_engine.INPUT_BLOB_NAME);
59 | outputProb = Engine.getBindingIndex(rnet_engine.OUTPUT_PROB_NAME);
60 | outputLocation = Engine.getBindingIndex(rnet_engine.OUTPUT_LOCATION_NAME);
61 | //creat GPU buffers and stream
62 | CHECK(cudaMalloc(&buffers[inputIndex], BatchSize * INPUT_C * INPUT_H * INPUT_W * sizeof(float)));
63 | CHECK(cudaMalloc(&buffers[outputProb], BatchSize * OUT_PROB_SIZE * sizeof(float)));
64 | CHECK(cudaMalloc(&buffers[outputLocation], BatchSize * OUT_LOCATION_SIZE * sizeof(float)));
65 | CHECK(cudaStreamCreate(&stream));
66 | }
67 |
68 | Rnet::~Rnet() {
69 | delete (score_);
70 | delete (location_);
71 | cudaStreamDestroy(stream);
72 | CHECK(cudaFree(buffers[inputIndex]));
73 | CHECK(cudaFree(buffers[outputProb]));
74 | CHECK(cudaFree(buffers[outputLocation]));
75 | }
76 |
77 | void Rnet::run(cv::Mat &image, const Rnet_engine &rnet_engine) {
78 | //DMA the input to the GPU ,execute the batch asynchronously and DMA it back;
79 | image2Matrix(image, this->rgb);
80 | CHECK(cudaMemcpyAsync(buffers[inputIndex], this->rgb->pdata,
81 | BatchSize * INPUT_C * INPUT_H * INPUT_W * sizeof(float),
82 | cudaMemcpyHostToDevice, stream));
83 | rnet_engine.context->enqueue(BatchSize, buffers, stream, nullptr);
84 | CHECK(cudaMemcpyAsync(this->location_->pdata, buffers[outputLocation], BatchSize * OUT_LOCATION_SIZE* sizeof(float),
85 | cudaMemcpyDeviceToHost, stream));
86 | CHECK(cudaMemcpyAsync(this->score_->pdata, buffers[outputProb], BatchSize * OUT_PROB_SIZE* sizeof(float),
87 | cudaMemcpyDeviceToHost, stream));
88 | cudaStreamSynchronize(stream);
89 |
90 | }
91 |
--------------------------------------------------------------------------------
/src/rnet_rt.h:
--------------------------------------------------------------------------------
1 | //
2 | // Created by zhou on 18-5-4.
3 | //
4 |
5 | #ifndef MAIN_RNET_RT_H
6 | #define MAIN_RNET_RT_H
7 |
8 | #include "baseEngine.h"
9 | #include "network.h"
10 |
11 |
12 | class Rnet_engine : public baseEngine {
13 |
14 | public:
15 | Rnet_engine();
16 | ~Rnet_engine();
17 | void init(int row, int col);
18 | friend class Rnet;
19 |
20 | };
21 |
22 | class Rnet {
23 | public:
24 | Rnet(const Rnet_engine &rnet_engine);
25 | ~Rnet();
26 | void run(cv::Mat &image, const Rnet_engine &engine);
27 | mydataFmt Rthreshold;
28 | cudaStream_t stream;
29 | struct pBox *location_;
30 | struct pBox *score_;
31 | struct pBox *rgb;
32 | private:
33 | const int BatchSize;
34 | const int INPUT_C;
35 | const ICudaEngine &Engine;
36 | //must be computed at runtime
37 | int INPUT_H;
38 | int INPUT_W;
39 | int OUT_PROB_SIZE;
40 | int OUT_LOCATION_SIZE;
41 | int inputIndex,outputProb,outputLocation;
42 | void *buffers[3];
43 |
44 | };
45 |
46 |
47 | #endif //MAIN_RNET_RT_H
48 |
--------------------------------------------------------------------------------
/src/videoStreamer.cpp:
--------------------------------------------------------------------------------
1 | #include "videoStreamer.h"
2 |
3 | VideoStreamer::VideoStreamer(int nmbrDevice, int videoWidth, int videoHeight, int frameRate, bool isCSICam) {
4 | if(isCSICam) {
5 | m_videoWidth = videoWidth;
6 | m_videoHeight = videoHeight;
7 | m_frameRate = frameRate;
8 |
9 | std::string pipeline = gstreamer_pipeline(videoWidth, videoHeight, videoWidth,
10 | videoHeight, frameRate);
11 | std::cout << "Using pipeline: \n\t" << pipeline << "\n";
12 |
13 | m_capture = new cv::VideoCapture(pipeline, cv::CAP_GSTREAMER);
14 | if(!m_capture->isOpened()) {
15 | std::cerr << "Failed to open CSI camera."<< std::endl;
16 | }
17 | }
18 | else {
19 | m_capture = new cv::VideoCapture(nmbrDevice);
20 | if (!m_capture->isOpened()){
21 | //error in opening the video input
22 | std::cerr << "Failed to open USB camera." << std::endl;
23 | }
24 | m_videoWidth = videoWidth;
25 | m_videoHeight = videoHeight;
26 | m_capture->set(cv::CAP_PROP_FRAME_WIDTH, m_videoWidth);
27 | m_capture->set(cv::CAP_PROP_FRAME_HEIGHT, m_videoHeight);
28 | }
29 | }
30 |
31 | VideoStreamer::VideoStreamer(std::string filename, int videoWith, int videoHeight) {
32 | m_capture = new cv::VideoCapture(filename);
33 | if (!m_capture->isOpened()){
34 | //error in opening the video input
35 | std::cerr << "Unable to open file!" << std::endl;
36 | }
37 | // ToDo set filename width+height doesn't work with m_capture.set(...)
38 | }
39 |
40 | void VideoStreamer::setResolutionDevice(int width, int height) {
41 | m_videoWidth = width;
42 | m_videoHeight = height;
43 | m_capture->set(cv::CAP_PROP_FRAME_WIDTH, m_videoWidth);
44 | m_capture->set(cv::CAP_PROP_FRAME_HEIGHT, m_videoHeight);
45 | }
46 |
47 | void VideoStreamer::setResoltionFile(int width, int height) {
48 | // ToDo set resolution for input files
49 | }
50 |
51 | void VideoStreamer::getFrame(cv::Mat &frame) {
52 | *m_capture >> frame;
53 | }
54 |
55 | void VideoStreamer::assertResolution() {
56 | // currently wrong, since m_capture->get returns max/default width, height
57 | // but a function like this would be good to ensure good performance
58 | assert(m_videoWidth == m_capture->get(cv::CAP_PROP_FRAME_WIDTH));
59 | assert(m_videoHeight == m_capture->get(cv::CAP_PROP_FRAME_HEIGHT));
60 | }
61 |
62 | std::string VideoStreamer::gstreamer_pipeline (int capture_width, int capture_height, int display_width, int display_height, int frameRate, int flip_method) {
63 | return "nvarguscamerasrc ! video/x-raw(memory:NVMM), width=(int)" + std::to_string(capture_width) + ", height=(int)" +
64 | std::to_string(capture_height) + ", format=(string)NV12, framerate=(fraction)" + std::to_string(frameRate) +
65 | "/1 ! nvvidconv flip-method=" + std::to_string(flip_method) + " ! video/x-raw, width=(int)" + std::to_string(display_width) + ", height=(int)" +
66 | std::to_string(display_height) + ", format=(string)BGRx ! videoconvert ! video/x-raw, format=(string)BGR ! appsink";
67 | }
68 |
69 | void VideoStreamer::release() {
70 | m_capture->release();
71 | }
72 |
73 | VideoStreamer::~VideoStreamer() {
74 |
75 | }
76 |
--------------------------------------------------------------------------------
/src/videoStreamer.h:
--------------------------------------------------------------------------------
1 | #ifndef VIDEO_INPUT_WRAPPER_VIDEOSTREAMER_H
2 | #define VIDEO_INPUT_WRAPPER_VIDEOSTREAMER_H
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 |
10 |
11 | class VideoStreamer {
12 | private:
13 | int m_videoWidth;
14 | int m_videoHeight;
15 | int m_frameRate;
16 | cv::VideoCapture *m_capture;
17 |
18 | public:
19 | VideoStreamer(int nmbrDevice, int videoWidth, int videoHeight, int frameRate, bool isCSICam);
20 | VideoStreamer(std::string filename, int videoWidth, int videoHeight);
21 | ~VideoStreamer();
22 | void setResolutionDevice(int width, int height);
23 | void setResoltionFile(int width, int height);
24 | void assertResolution();
25 | void getFrame(cv::Mat &frame);
26 | std::string gstreamer_pipeline (int capture_width, int capture_height, int display_width, int display_height, int frameRate, int flip_method=0);
27 | void release();
28 | };
29 |
30 | #endif //VIDEO_INPUT_WRAPPER_VIDEOSTREAMER_H
31 |
--------------------------------------------------------------------------------
/step01_pb_to_uff.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import graphsurgeon as gs
3 | import tensorflow as tf
4 | import uff
5 |
6 | if __name__ == "__main__":
7 | # USER DEFINED VALUES
8 | output_nodes = ["Bottleneck/BatchNorm/batchnorm/add_1"]
9 | input_node = "input"
10 | pb_file = "./facenet.pb"
11 | uff_file = "./facenetModels/facenet.uff"
12 | # END USER DEFINED VALUES
13 |
14 | # read tensorflow graph
15 | dynamic_graph = gs.DynamicGraph(pb_file)
16 | # write UFF to file
17 | uff_model = uff.from_tensorflow(dynamic_graph.as_graph_def(), output_nodes=output_nodes, output_filename=uff_file, text=False)
--------------------------------------------------------------------------------