├── CMakeLists.txt
├── README.md
├── client
├── Module
│ ├── imager.py
│ └── windows_client.py
├── README.md
├── myclient.py
├── note.txt
└── utils.py
├── environment.yaml
├── launch
└── run.launch
├── msg
├── ActElem.msg
└── Person.msg
├── package.xml
├── requirements.txt
├── scripts
├── listener.py
├── talker.py
└── testing.py
└── src
├── Modules
├── act_talker.py
├── camera.py
├── detection.py
├── imager.py
├── midprocessor.py
├── myserver.py
├── poser.py
└── recognizer.py
├── core
├── backbone.py
├── camera.py
├── common.py
├── config.py
├── dataset.py
├── operation.py
├── utils.py
└── yolov3.py
├── data
├── anchors
│ └── basline_anchors.txt
├── classes
│ ├── coco.names
│ └── yymnist.names
└── log
│ └── events.out.tfevents.1576027966.dongjai.16966.6502.v2
├── dataprocessing.py
└── mediandensity.json
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 2.8.3)
2 | project(act_recognizer)
3 |
4 | ## Compile as C++11, supported in ROS Kinetic and newer
5 | # add_compile_options(-std=c++11)
6 |
7 | ## Find catkin macros and libraries
8 | ## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
9 | ## is used, also find other catkin packages
10 | find_package(catkin REQUIRED COMPONENTS
11 | roscpp
12 | rospy
13 | std_msgs
14 | message_generation
15 | )
16 |
17 | ## System dependencies are found with CMake's conventions
18 | # find_package(Boost REQUIRED COMPONENTS system)
19 |
20 |
21 | ## Uncomment this if the package has a setup.py. This macro ensures
22 | ## modules and global scripts declared therein get installed
23 | ## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html
24 | # catkin_python_setup()
25 |
26 | ################################################
27 | ## Declare ROS messages, services and actions ##
28 | ################################################
29 |
30 | ## To declare and build messages, services or actions from within this
31 | ## package, follow these steps:
32 | ## * Let MSG_DEP_SET be the set of packages whose message types you use in
33 | ## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...).
34 | ## * In the file package.xml:
35 | ## * add a build_depend tag for "message_generation"
36 | ## * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET
37 | ## * If MSG_DEP_SET isn't empty the following dependency has been pulled in
38 | ## but can be declared for certainty nonetheless:
39 | ## * add a exec_depend tag for "message_runtime"
40 | ## * In this file (CMakeLists.txt):
41 | ## * add "message_generation" and every package in MSG_DEP_SET to
42 | ## find_package(catkin REQUIRED COMPONENTS ...)
43 | ## * add "message_runtime" and every package in MSG_DEP_SET to
44 | ## catkin_package(CATKIN_DEPENDS ...)
45 | ## * uncomment the add_*_files sections below as needed
46 | ## and list every .msg/.srv/.action file to be processed
47 | ## * uncomment the generate_messages entry below
48 | ## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...)
49 |
50 | ## Generate messages in the 'msg' folder
51 | add_message_files(
52 | FILES
53 | ActElem.msg
54 | Person.msg
55 | # Persons.msg
56 | # Message2.msg
57 | )
58 |
59 | ## Generate services in the 'srv' folder
60 | # add_service_files(
61 | # FILES
62 | # Service1.srv
63 | # Service2.srv
64 | # )
65 |
66 | ## Generate actions in the 'action' folder
67 | # add_action_files(
68 | # FILES
69 | # Action1.action
70 | # Action2.action
71 | # )
72 |
73 | ## Generate added messages and services with any dependencies listed here
74 | generate_messages(
75 | DEPENDENCIES
76 | std_msgs
77 | )
78 |
79 | ################################################
80 | ## Declare ROS dynamic reconfigure parameters ##
81 | ################################################
82 |
83 | ## To declare and build dynamic reconfigure parameters within this
84 | ## package, follow these steps:
85 | ## * In the file package.xml:
86 | ## * add a build_depend and a exec_depend tag for "dynamic_reconfigure"
87 | ## * In this file (CMakeLists.txt):
88 | ## * add "dynamic_reconfigure" to
89 | ## find_package(catkin REQUIRED COMPONENTS ...)
90 | ## * uncomment the "generate_dynamic_reconfigure_options" section below
91 | ## and list every .cfg file to be processed
92 |
93 | ## Generate dynamic reconfigure parameters in the 'cfg' folder
94 | # generate_dynamic_reconfigure_options(
95 | # cfg/DynReconf1.cfg
96 | # cfg/DynReconf2.cfg
97 | # )
98 |
99 | ###################################
100 | ## catkin specific configuration ##
101 | ###################################
102 | ## The catkin_package macro generates cmake config files for your package
103 | ## Declare things to be passed to dependent projects
104 | ## INCLUDE_DIRS: uncomment this if your package contains header files
105 | ## LIBRARIES: libraries you create in this project that dependent projects also need
106 | ## CATKIN_DEPENDS: catkin_packages dependent projects also need
107 | ## DEPENDS: system dependencies of this project that dependent projects also need
108 | catkin_package(
109 | INCLUDE_DIRS include
110 | LIBRARIES act_recognizer
111 | CATKIN_DEPENDS roscpp rospy std_msgs message_runtime
112 | DEPENDS system_lib
113 | )
114 |
115 | ###########
116 | ## Build ##
117 | ###########
118 |
119 | ## Specify additional locations of header files
120 | ## Your package locations should be listed before other locations
121 | include_directories(
122 | # include
123 | ${catkin_INCLUDE_DIRS}
124 | )
125 |
126 | ## Declare a C++ library
127 | # add_library(${PROJECT_NAME}
128 | # src/${PROJECT_NAME}/act_recognizer.cpp
129 | # )
130 |
131 | ## Add cmake target dependencies of the library
132 | ## as an example, code may need to be generated before libraries
133 | ## either from message generation or dynamic reconfigure
134 | # add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
135 |
136 | ## Declare a C++ executable
137 | ## With catkin_make all packages are built within a single CMake context
138 | ## The recommended prefix ensures that target names across packages don't collide
139 | # add_executable(${PROJECT_NAME}_node src/act_recognizer_node.cpp)
140 |
141 | ## Rename C++ executable without prefix
142 | ## The above recommended prefix causes long target names, the following renames the
143 | ## target back to the shorter version for ease of user use
144 | ## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node"
145 | # set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "")
146 |
147 | ## Add cmake target dependencies of the executable
148 | ## same as for the library above
149 | # add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
150 |
151 | ## Specify libraries to link a library or executable target against
152 | # target_link_libraries(${PROJECT_NAME}_node
153 | # ${catkin_LIBRARIES}
154 | # )
155 |
156 | #############
157 | ## Install ##
158 | #############
159 |
160 | # all install targets should use catkin DESTINATION variables
161 | # See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html
162 |
163 | ## Mark executable scripts (Python etc.) for installation
164 | ## in contrast to setup.py, you can choose the destination
165 | # install(PROGRAMS
166 | # scripts/my_python_script
167 | # DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
168 | # )
169 |
170 | ## Mark executables for installation
171 | ## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html
172 | # install(TARGETS ${PROJECT_NAME}_node
173 | # RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
174 | # )
175 |
176 | ## Mark libraries for installation
177 | ## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html
178 | # install(TARGETS ${PROJECT_NAME}
179 | # ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
180 | # LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
181 | # RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION}
182 | # )
183 |
184 | ## Mark cpp header files for installation
185 | # install(DIRECTORY include/${PROJECT_NAME}/
186 | # DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
187 | # FILES_MATCHING PATTERN "*.h"
188 | # PATTERN ".svn" EXCLUDE
189 | # )
190 |
191 | ## Mark other files for installation (e.g. launch and bag files, etc.)
192 | # install(FILES
193 | # # myfile1
194 | # # myfile2
195 | # DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
196 | # )
197 |
198 | #############
199 | ## Testing ##
200 | #############
201 |
202 | ## Add gtest based cpp test target and link libraries
203 | # catkin_add_gtest(${PROJECT_NAME}-test test/test_act_recognizer.cpp)
204 | # if(TARGET ${PROJECT_NAME}-test)
205 | # target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})
206 | # endif()
207 |
208 | ## Add folders to be run by python nosetests
209 | # catkin_add_nosetests(test)
210 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Skeleton-based-action-recognition
2 | Yolov3, Openpose, Tensorflow2, ROS, multi-thread
3 |
4 | It also support for remote GPU server. You can grap a frame from D435 in local and then send the data to remote server to process the data and return the result.
5 |
6 | This is my final year project "3D Action Recognition based on Openpose and YOLO".
7 |
8 | ## Installation
9 |
10 | ### 0. install openpose python api
11 |
12 | Following the openpose homepage instruction to install [openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/doc/installation.md) and compile the [python api](https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/doc/installation.md#python-api).
13 |
14 |
15 | ### 1. create a conda env.
16 |
17 | ```
18 | conda create -n tensorflow2 python=3.6
19 | pip install -r requirements.txt
20 | ```
21 |
22 | ### 2. create a ROS package
23 |
24 | At first, following the [ros wiki](http://wiki.ros.org/cn/ROS/Tutorials/InstallingandConfiguringROSEnvironment) instruction to install ROS and create a ROS workspace
25 |
26 | Then, create a ROS package which names `act_recognizer`.
27 | ```
28 | cd catkin_ws/src
29 | mkdir -p act_recognizer
30 | ```
31 |
32 | Therefore, in your ROS workspace file folder, the structure will be as the following,
33 | ```
34 | ->~/catkin_ws
35 | ----->build
36 | ----->devel
37 | ----->src
38 | --------->act_recognizer
39 | ```
40 |
41 | ### 3. clone this repo
42 |
43 | Cloen the repo and copy all files to ROS package `act_recognizer`
44 |
45 | Modify the `act_talker.py`. Likes,
46 | ```
47 | sys.path.append('{$your root path}/catkin_ws/src/act_recognizer')
48 | ```
49 |
50 | Modify the `config.py`. Change the path about YOLO data, such as `YOLO.CLASSES` and `YOLO.ANCHORS`.
51 |
52 | Change your openpose python api path in `Module/poser.py`, so that your code can import pyopenpose correctly.
53 |
54 | Additionally, you also have to change the openpose model path.
55 | ```
56 | Module/poser.py
57 | ----->class PoseLoader()
58 | --------->self._params["model_folder"] = your openpose model folder
59 | ```
60 |
61 | ### 4. download yolo and mlp checkpoints
62 |
63 | Download checkpoints from [BaiduYun](https://pan.baidu.com/s/1XAbQe_AZBWuXT1MvYESh4w) the extract code is *cxj6*. Then move `yolov3.weights` into checkpoints folder `checkpoints/YOLO` and `mlp.h5` to `checkpoints`. You should create a `checkpoints` folder first probably.
64 |
65 | ```
66 | cd act_recognizer/src
67 | mkdir -p checkpoints/YOLO
68 | ```
69 |
70 | ### 5. run the code
71 |
72 | The ROS package whic written by Python do not need to compile.
73 | We have to specify the python interpreter in all `.py` files in the first line to use the conda env `tensorflow2`'s python interpreter.
74 | Likes this,
75 |
76 | ```
77 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python
78 | ```
79 |
80 | Then, run the code following,
81 | ```
82 | roscore
83 | cd catkin_ws
84 | conda activate tensorflow2
85 | source ./devel/setup.bash
86 | roslaunch act_recognizer run.launch
87 | ```
88 |
89 | ## Citation and Reference
90 | [Openpose from CMU](https://github.com/kevinchan04/openpose)
91 |
92 | [Yolov3 tensorflow2 from YunYang1994](https://github.com/YunYang1994/TensorFlow2.0-Examples/tree/master/4-Object_Detection/YOLOV3)
93 |
--------------------------------------------------------------------------------
/client/Module/imager.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import time
4 | from threading import Thread
5 | from queue import Queue
6 |
7 | import cv2
8 | import scipy.misc
9 | import numpy as np
10 |
11 | class ImageReader():
12 | def __init__(self, opt, queueSize=128):
13 | self.opt = opt
14 | self.rgb_path = opt.rgb_path
15 | self.dp_path = opt.dp_path
16 |
17 | for root, dirs, files in os.walk(self.rgb_path):
18 | self.rgb_len = len(files)
19 | for root, dirs, files in os.walk(self.dp_path):
20 | self.dp_len = len(files)
21 | assert self.rgb_len == self.dp_len, "RGB和深度图数量不相等"
22 |
23 | if opt.sp:#单线程
24 | self.rgb_queue = Queue(maxsize=queueSize)
25 | self.dp_queue = Queue(maxsize=queueSize)
26 |
27 | def start_worker(self, target, path):
28 | if self.opt.sp:
29 | p = Thread(target=target, args=(), kwargs={"num": 0, "path": path})#target是該線程需要完成的方法函數
30 | # else:
31 | p.start()
32 | return p
33 |
34 | def start(self):
35 | self.rgb_image_worker = self.start_worker(self.rgb_reader_process, path=self.rgb_path)
36 | self.dp_image_worker = self.start_worker(self.dp_reader_process, path=self.dp_path)
37 | return self
38 |
39 | def stop(self):
40 | self.rgb_image_worker.join()
41 | self.dp_image_worker.join()
42 | self.clear_queues()
43 |
44 | def clear_queues(self):
45 | self.clear(self.rgb_queue)
46 | self.clear(self.dp_queue)
47 |
48 | def clear(self, queue):
49 | while not queue.empty():
50 | queue.get()
51 |
52 | def wait_and_put(self, queue, item):
53 | queue.put(item)
54 |
55 | def wait_and_get(self, queue):
56 | return queue.get()
57 |
58 | def rgb_reader_process(self, **kargcs):
59 | while kargcs["num"] < self.rgb_len:
60 | rgb_name = "color_" + str(kargcs["num"]) + ".png"
61 | rgb_file = kargcs["path"] + "/" + rgb_name
62 | # print(rgb_file)
63 | frame = cv2.imread(rgb_file, -1)
64 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
65 | self.wait_and_put(self.rgb_queue, (rgb_name, kargcs["num"], frame))
66 | kargcs["num"] += 1
67 |
68 | def dp_reader_process(self, **kargcs):
69 | while kargcs["num"] < self.dp_len:
70 | dp_name = "depth_8bit_" + str(kargcs["num"]) + ".png"
71 | dp_file = kargcs["path"] + "/" + dp_name
72 | frame = cv2.imread(dp_file, -1)
73 | self.wait_and_put(self.dp_queue, (dp_name, kargcs["num"], frame))
74 | kargcs["num"] += 1
75 |
76 | def read_rgb(self):
77 | return self.wait_and_get(self.rgb_queue)
78 |
79 | def read_dp(self):
80 | return self.wait_and_get(self.dp_queue)
81 |
82 |
--------------------------------------------------------------------------------
/client/Module/windows_client.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import time
4 | from threading import Thread
5 | from queue import Queue
6 | import pickle
7 |
8 | import cv2
9 |
10 | class WindowsClient():
11 | def __init__(self, opt, queueSize=128):
12 | self.opt = opt
13 | self.host = opt.host
14 | self.port = opt.port
15 | from multiprocessing.connection import Client
16 | self.client = Client((self.host, self.port))
17 |
18 | if opt.sp:
19 | self.frame_queue = Queue(maxsize=queueSize)
20 | self.output_queue = Queue(maxsize=queueSize)
21 |
22 | def start_worker(self, target):
23 | if self.opt.sp:
24 | p = Thread(target=target)#target是該線程需要完成的方法函數
25 | # else:
26 | p.start()
27 | return p
28 |
29 | def start(self):
30 | self.client_worker = self.start_worker(self.client_worker_process)
31 | self.client_recv_worker = self.start_worker(self.client_recv_process)
32 | return self
33 |
34 | def stop(self):
35 | self.client_worker.join()
36 | self.client_recv_worker.join()
37 | self.clear_queues()
38 |
39 | def clear_queues(self):
40 | self.clear(self.frame_queue)
41 | self.clear(self.output_queue)
42 |
43 | def clear(self, queue):
44 | while not queue.empty():
45 | queue.get()
46 |
47 | def wait_and_put(self, queue, item):
48 | queue.put(item)
49 |
50 | def wait_and_get(self, queue):
51 | return queue.get()
52 |
53 | def client_worker_process(self, **kargcs):
54 | while True:
55 | (rgb_name, rgb_No, rgb_frame, dp_name, dps_No, dp_frame) = self.wait_and_get(self.frame_queue)
56 | data_string = pickle.dumps((rgb_name, rgb_No, rgb_frame, dp_name, dps_No, dp_frame))
57 | # byte_size = sys.getsizeof(data_string)
58 | # print(byte_size)
59 | self.client.send(data_string)
60 | # print('Send', type(data_string))
61 | # time.sleep(0.02)
62 |
63 | # pred_data_byte = self.client.recv()
64 | # # (rgb_name, rgb_No, rgb_frame, dp_name, dps_No, dp_frame) = pickle.loads(pred_data_byte)
65 | # (bboxes, pred_data) = pickle.loads(pred_data_byte)
66 | # # print("Recv", pred_data)
67 | # self.wait_and_put(self.output_queue, (bboxes, pred_data))
68 |
69 | def client_recv_process(self):
70 | while True:
71 | pred_data_byte = self.client.recv()
72 | # (rgb_name, rgb_No, rgb_frame, dp_name, dps_No, dp_frame) = pickle.loads(pred_data_byte)
73 | (bboxes, pred_data) = pickle.loads(pred_data_byte)
74 | # print("Recv", pred_data)
75 | self.wait_and_put(self.output_queue, (bboxes, pred_data))
76 |
77 | def read_data(self):
78 | return self.wait_and_get(self.output_queue)
79 |
80 | def put_data(self, rgb_name, rgb_No, rgb_frame, dp_name, dp_No, dp_frame):
81 | self.wait_and_put(self.frame_queue, (rgb_name, rgb_No, rgb_frame, dp_name, dp_No, dp_frame))
82 |
83 |
--------------------------------------------------------------------------------
/client/README.md:
--------------------------------------------------------------------------------
1 | This is for client.
2 |
3 | It contains the D435 camera class and client class.
4 |
5 | Using pyhton pickle and socket to send frame data to remote GPU server. After the data process, client will receive the result from server.
6 |
7 | Before start, you have to change the IP address and port both in client and server.
8 |
--------------------------------------------------------------------------------
/client/myclient.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import sys
3 | import time
4 | from threading import Thread
5 | from queue import Queue
6 | import pickle
7 |
8 | from tqdm import tqdm
9 | import cv2
10 |
11 | import utils
12 |
13 | from Modules.imager import ImageReader
14 | from Modules.windows_client import WindowsClient
15 |
16 | parser = argparse.ArgumentParser(description='Debug')
17 | parser.add_argument('--rgb_path', type=str, default=r"I:\0walk/RGB",
18 | help='path to store rgb images')
19 | parser.add_argument('--dp_path', type=str, default=r"I:\0walk/depth",
20 | help='path to store depth images')
21 | parser.add_argument('--cam_json', type=str, default=r"/home/dongjai/catkin_ws/src/act_recognizer/src/mediandensity.json",
22 | help='the camera setting json file path')
23 | parser.add_argument('--width', type=int, default=640,
24 | help='image width')
25 | parser.add_argument('--height', type=int, default=480,
26 | help='image height')
27 | parser.add_argument('--fps', type=int, default=30,
28 | help='camera fps')
29 | parser.add_argument('--sp', default=True, action='store_true', #多线程系统显存不足
30 | help='Use single process')
31 | parser.add_argument('--d435', type=bool, default=False,
32 | help='Using the D435 cam')
33 | parser.add_argument('--host', type=str, default='10.60.2.65',#10.60.2.47 10.60.2.65
34 | help='server_host')
35 | parser.add_argument('--port', type=int, default=10086,#60001 10086
36 | help='server_port')
37 |
38 | args = parser.parse_args()
39 | # args, unknown = parser.parse_known_args()#NOTE : using in ROS environment
40 | # label_dict = {0: "walk", 1: "stop", 2: "come", 3: "phone", 4: "open", 5: "stand"}
41 | label_list = ["walk", "stop", "come", "phone", "open", "stand"]
42 |
43 | #Start the Image Reader Thread
44 | image_reader = ImageReader(args)
45 | image_reader.start()
46 |
47 | my_windows_client = WindowsClient(args)
48 | my_windows_client.start()
49 |
50 | if args.d435:
51 | print('Starting D435, press Ctrl + C to terminate...')
52 | sys.stdout.flush()
53 | im_names_desc = tqdm(loop())
54 | else:
55 | data_len = image_reader.rgb_len
56 | im_names_desc = tqdm(range(data_len), dynamic_ncols=True)#终端输出
57 |
58 | try:
59 | for i in im_names_desc:
60 | # (rgb_nums, rgb_frame) = camera_reader.read_rgb()
61 | (rgb_name, rgb_nums, rgb_frame) = image_reader.read_rgb()
62 |
63 | # (dp_nums, dp_frame) = camera_reader.read_dp()
64 | (dp_name, dp_nums, dp_frame) = image_reader.read_dp()
65 |
66 | my_windows_client.put_data(rgb_name, rgb_nums, rgb_frame, dp_name, dp_nums, dp_frame)
67 |
68 | (bboxes, pred_data) = my_windows_client.read_data()
69 | # print("pred data: ", pred_data)
70 |
71 | image = utils.draw_bbox(rgb_frame, bboxes, pred=pred_data, classes=label_list)
72 | cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
73 | rgb_frame = cv2.cvtColor(rgb_frame, cv2.COLOR_RGB2BGR)
74 | cv2.imshow("result", rgb_frame)
75 | if cv2.waitKey(1) & 0xFF == ord('q'):
76 | break
77 |
78 | image_reader.stop()
79 | my_windows_client.stop()
80 | except KeyboardInterrupt:
81 | image_reader.stop()
82 | my_windows_client.stop()
83 |
--------------------------------------------------------------------------------
/client/note.txt:
--------------------------------------------------------------------------------
1 | VPN限速了
2 | 大概是500kb/s到1MB/s
3 | 所以需要0.5s左右传一个组合的数据
4 | 在服务器上单独运行程序可以做到realtime,openpose运行时间比yolo更低
--------------------------------------------------------------------------------
/client/utils.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import random
3 | import colorsys
4 | import numpy as np
5 |
6 | def draw_bbox(image, bboxes, pred=None, num_classes=6, classes=None, show_label=True):
7 | """
8 | bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates.
9 | """
10 | image_h, image_w, _ = image.shape
11 | # num_classes += 1# 多一个类别,因为可能只检测到人或骨骼
12 | hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
13 | colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
14 | colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
15 |
16 | random.seed(0)
17 | random.shuffle(colors)
18 | random.seed(None)
19 |
20 | if bboxes and pred:
21 | for i, bbox in enumerate(bboxes):
22 | coor = np.array(bbox[:4], dtype=np.int32)
23 | fontScale = 0.5
24 | class_ind = pred[i][1]
25 | bbox_color = colors[class_ind]
26 | bbox_thick = int(0.6 * (image_h + image_w) / 600)
27 | c1, c2 = (coor[0], coor[1]), (coor[2], coor[3])
28 | cv2.rectangle(image, c1, c2, bbox_color, bbox_thick)
29 |
30 | if show_label:
31 | if pred != None:
32 | bbox_mess = '%s %s' % (classes[class_ind], pred[i][1])
33 | else:
34 | bbox_mess = '%s' % (classes[class_ind])
35 | t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick//2)[0]
36 | cv2.rectangle(image, c1, (c1[0] + t_size[0], c1[1] - t_size[1] - 3), bbox_color, -1) # filled
37 |
38 | cv2.putText(image, bbox_mess, (c1[0], c1[1]-2), cv2.FONT_HERSHEY_SIMPLEX,
39 | fontScale, (0, 0, 0), bbox_thick//2, lineType=cv2.LINE_AA)
40 |
41 | return image
--------------------------------------------------------------------------------
/environment.yaml:
--------------------------------------------------------------------------------
1 | name: tensorflow2
2 | channels:
3 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free
4 | - defaults
5 | dependencies:
6 | - _libgcc_mutex=0.1=main
7 | - _nb_ext_conf=0.4.0=py36_1
8 | - anaconda-client=1.6.3=py36_0
9 | - bleach=1.5.0=py36_0
10 | - ca-certificates=2019.11.27=0
11 | - certifi=2019.11.28=py36_0
12 | - clyent=1.2.2=py36_0
13 | - cudatoolkit=10.0.130=0
14 | - cudnn=7.6.4=cuda10.0_0
15 | - decorator=4.1.2=py36_0
16 | - entrypoints=0.2.3=py36_0
17 | - html5lib=0.9999999=py36_0
18 | - ipykernel=4.6.1=py36_0
19 | - ipython=6.1.0=py36_0
20 | - ipython_genutils=0.2.0=py36_0
21 | - ipywidgets=6.0.0=py36_0
22 | - jedi=0.10.2=py36_2
23 | - jinja2=2.9.6=py36_0
24 | - jsonschema=2.6.0=py36_0
25 | - jupyter_client=5.1.0=py36_0
26 | - jupyter_core=4.3.0=py36_0
27 | - libedit=3.1.20181209=hc058e9b_0
28 | - libffi=3.2.1=hd88cf55_4
29 | - libgcc-ng=9.1.0=hdf63c60_0
30 | - libsodium=1.0.10=0
31 | - libstdcxx-ng=9.1.0=hdf63c60_0
32 | - markupsafe=1.0=py36_0
33 | - mistune=0.7.4=py36_0
34 | - nb_anacondacloud=1.4.0=py36_0
35 | - nb_conda=2.2.0=py36_0
36 | - nb_conda_kernels=2.1.0=py36_0
37 | - nbconvert=5.2.1=py36_0
38 | - nbformat=4.4.0=py36_0
39 | - nbpresent=3.0.2=py36_0
40 | - ncurses=6.1=he6710b0_1
41 | - notebook=5.0.0=py36_0
42 | - openssl=1.1.1d=h7b6447c_3
43 | - pandocfilters=1.4.2=py36_0
44 | - path.py=10.3.1=py36_0
45 | - pexpect=4.2.1=py36_0
46 | - pickleshare=0.7.4=py36_0
47 | - pip=19.3.1=py36_0
48 | - prompt_toolkit=1.0.15=py36_0
49 | - ptyprocess=0.5.2=py36_0
50 | - pygments=2.2.0=py36_0
51 | - python=3.6.9=h265db76_0
52 | - pyyaml=3.12=py36_0
53 | - pyzmq=16.0.2=py36_0
54 | - readline=7.0=h7b6447c_5
55 | - setuptools=42.0.2=py36_0
56 | - simplegeneric=0.8.1=py36_1
57 | - singledispatch=3.4.0.3=py36_0
58 | - sqlite=3.30.1=h7b6447c_0
59 | - terminado=0.6=py36_0
60 | - testpath=0.3.1=py36_0
61 | - tk=8.6.8=hbc83047_0
62 | - tornado=4.5.2=py36_0
63 | - traitlets=4.3.2=py36_0
64 | - wcwidth=0.1.7=py36_0
65 | - wheel=0.33.6=py36_0
66 | - widgetsnbextension=3.0.2=py36_0
67 | - xz=5.2.4=h14c3975_4
68 | - yaml=0.1.6=0
69 | - zeromq=4.1.5=0
70 | - zlib=1.2.11=h7b6447c_3
71 | - pip:
72 | - absl-py==0.8.1
73 | - altgraph==0.16.1
74 | - astor==0.8.0
75 | - astroid==2.3.3
76 | - cachetools==3.1.1
77 | - chardet==3.0.4
78 | - cycler==0.10.0
79 | - distro==1.4.0
80 | - easydict==1.9
81 | - gast==0.2.2
82 | - google-auth==1.7.2
83 | - google-auth-oauthlib==0.4.1
84 | - google-pasta==0.1.8
85 | - grpcio==1.25.0
86 | - h5py==2.10.0
87 | - idna==2.8
88 | - isort==4.3.21
89 | - keras==2.3.1
90 | - keras-applications==1.0.8
91 | - keras-preprocessing==1.1.0
92 | - kiwisolver==1.1.0
93 | - lazy-object-proxy==1.4.3
94 | - markdown==3.1.1
95 | - matplotlib==3.1.2
96 | - mccabe==0.6.1
97 | - numpy==1.17.0
98 | - oauthlib==3.1.0
99 | - opencv-python==4.1.2.30
100 | - opt-einsum==3.1.0
101 | - pandas==0.25.3
102 | - pillow==6.2.0
103 | - protobuf==3.11.1
104 | - pyasn1==0.4.8
105 | - pyasn1-modules==0.2.7
106 | - pyinstaller==3.5
107 | - pylint==2.4.4
108 | - pyparsing==2.4.5
109 | - pyrealsense2==2.31.0.1235
110 | - python-dateutil==2.8.1
111 | - pytz==2019.3
112 | - requests==2.22.0
113 | - requests-oauthlib==1.3.0
114 | - rospkg==1.2.3
115 | - rsa==4.0
116 | - scipy==1.1.0
117 | - seaborn==0.9.0
118 | - six==1.13.0
119 | - tb-nightly==1.14.0a20190301
120 | - tensorboard==1.14.0
121 | - tensorflow==2.0.0a0
122 | - tensorflow-estimator==2.0.1
123 | - tensorflow-gpu==2.0.0
124 | - termcolor==1.1.0
125 | - tf-estimator-nightly==1.14.0.dev2019030115
126 | - tqdm==4.40.2
127 | - typed-ast==1.4.0
128 | - urllib3==1.25.7
129 | - werkzeug==0.16.0
130 | - wget==3.2
131 | - wrapt==1.11.2
132 | prefix: /home/dongjai/anaconda3/envs/tensorflow2
133 |
134 |
--------------------------------------------------------------------------------
/launch/run.launch:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/msg/ActElem.msg:
--------------------------------------------------------------------------------
1 | float32 x
2 | float32 y
3 | float32 z
4 | uint16 act_cls
5 |
--------------------------------------------------------------------------------
/msg/Person.msg:
--------------------------------------------------------------------------------
1 | ActElem[] xyz_cls
2 |
--------------------------------------------------------------------------------
/package.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | act_recognizer
4 | 0.0.0
5 | The act_recognizer package
6 |
7 |
8 |
9 |
10 | dongjai
11 |
12 |
13 |
14 |
15 |
16 | TODO
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 | catkin
52 | roscpp
53 | rospy
54 | std_msgs
55 | message_generation
56 | roscpp
57 | rospy
58 | std_msgs
59 | message_generation
60 | roscpp
61 | rospy
62 | std_msgs
63 | message_runtime
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py==0.9.0
2 | actionlib==1.12.0
3 | altgraph==0.16.1
4 | anaconda-client==1.6.3
5 | angles==1.9.12
6 | astor==0.8.1
7 | astroid==2.3.3
8 | bleach==1.5.0
9 | bondpy==1.8.3
10 | cachetools==4.0.0
11 | camera-calibration==1.14.0
12 | camera-calibration-parsers==1.11.13
13 | catkin==0.7.20
14 | catkin-pkg==0.4.16
15 | certifi==2019.11.28
16 | chardet==3.0.4
17 | cloudpickle==1.2.2
18 | clyent==1.2.2
19 | controller-manager==0.17.0
20 | controller-manager-msgs==0.17.0
21 | cv-bridge==1.13.0
22 | cycler==0.10.0
23 | Cython==0.29.14
24 | decorator==4.4.1
25 | diagnostic-analysis==1.9.3
26 | diagnostic-common-diagnostics==1.9.3
27 | diagnostic-updater==1.9.3
28 | distro==1.4.0
29 | docutils==0.16
30 | dynamic-reconfigure==1.6.0
31 | easydict==1.9
32 | empy==3.3.4
33 | entrypoints==0.2.3
34 | gast==0.2.2
35 | gazebo-plugins==2.8.6
36 | gazebo-ros==2.8.6
37 | gencpp==0.6.2
38 | geneus==2.2.6
39 | genlisp==0.4.16
40 | genmsg==0.5.12
41 | gennodejs==2.0.1
42 | genpy==0.6.9
43 | google-auth==1.11.3
44 | google-auth-oauthlib==0.4.1
45 | google-pasta==0.2.0
46 | grpcio==1.27.2
47 | h5py==2.10.0
48 | html5lib==0.9999999
49 | idna==2.9
50 | image-geometry==1.13.0
51 | imageio==2.4.1
52 | interactive-markers==1.11.4
53 | ipykernel==4.6.1
54 | ipython==6.1.0
55 | ipython-genutils==0.2.0
56 | ipywidgets==6.0.0
57 | isort==4.3.21
58 | jedi==0.10.2
59 | Jinja2==2.9.6
60 | joint-state-publisher==1.12.14
61 | jsonschema==2.6.0
62 | jupyter-client==5.1.0
63 | jupyter-core==4.3.0
64 | kdl-parser-py==1.13.1
65 | Keras==2.3.1
66 | Keras-Applications==1.0.8
67 | Keras-Preprocessing==1.1.0
68 | kiwisolver==1.1.0
69 | laser-geometry==1.6.4
70 | lazy-object-proxy==1.4.3
71 | lxml==4.2.6
72 | Markdown==3.2.1
73 | MarkupSafe==1.0
74 | matplotlib==3.2.1
75 | mccabe==0.6.1
76 | message-filters==1.14.3
77 | mistune==0.7.4
78 | mock==3.0.5
79 | nb-anacondacloud==1.4.0
80 | nb-conda==2.2.0
81 | nb-conda-kernels==2.1.0
82 | nbconvert==5.2.1
83 | nbformat==4.4.0
84 | nbpresent==3.0.2
85 | networkx==2.4
86 | nltk==3.3
87 | notebook==5.0.0
88 | numpy==1.18.2
89 | oauthlib==3.1.0
90 | opencv-python==3.4.8.29
91 | opt-einsum==3.1.0
92 | pandas==0.25.3
93 | pandocfilters==1.4.2
94 | pexpect==4.2.1
95 | pickleshare==0.7.4
96 | Pillow==7.0.0
97 | progressbar2==3.38.0
98 | prompt-toolkit==1.0.15
99 | protobuf==3.11.3
100 | ptyprocess==0.5.2
101 | pyasn1==0.4.8
102 | pyasn1-modules==0.2.8
103 | pycocotools==2.0.0
104 | Pygments==2.2.0
105 | PyInstaller==3.5
106 | pylint==2.4.4
107 | pyparsing==2.4.6
108 | pyrealsense2==2.31.0.1235
109 | python-dateutil==2.8.1
110 | python-qt-binding==0.4.0
111 | python-utils==2.3.0
112 | pytz==2019.3
113 | PyWavelets==1.1.1
114 | PyYAML==3.12
115 | pyzmq==16.0.2
116 | qt-dotgraph==0.4.0
117 | qt-gui==0.4.0
118 | qt-gui-cpp==0.4.0
119 | qt-gui-py-common==0.4.0
120 | requests==2.23.0
121 | requests-oauthlib==1.3.0
122 | resource-retriever==1.12.6
123 | rosbag==1.14.3
124 | rosboost-cfg==1.14.7
125 | rosclean==1.14.7
126 | roscreate==1.14.7
127 | rosgraph==1.14.3
128 | roslaunch==1.14.3
129 | roslib==1.14.7
130 | roslint==0.11.2
131 | roslz4==1.14.3
132 | rosmake==1.14.7
133 | rosmaster==1.14.3
134 | rosmsg==1.14.3
135 | rosnode==1.14.3
136 | rosparam==1.14.3
137 | rospkg==1.2.3
138 | rospy==1.14.3
139 | rosservice==1.14.3
140 | rostest==1.14.3
141 | rostopic==1.14.3
142 | rosunit==1.14.7
143 | roswtf==1.14.3
144 | rqt-action==0.4.9
145 | rqt-bag==0.4.12
146 | rqt-bag-plugins==0.4.12
147 | rqt-console==0.4.9
148 | rqt-dep==0.4.9
149 | rqt-graph==0.4.11
150 | rqt-gui==0.5.0
151 | rqt-gui-py==0.5.0
152 | rqt-image-view==0.4.14
153 | rqt-launch==0.4.8
154 | rqt-logger-level==0.4.8
155 | rqt-moveit==0.5.7
156 | rqt-msg==0.4.8
157 | rqt-nav-view==0.5.7
158 | rqt-plot==0.4.9
159 | rqt-pose-view==0.5.8
160 | rqt-publisher==0.4.8
161 | rqt-py-common==0.5.0
162 | rqt-py-console==0.4.8
163 | rqt-reconfigure==0.5.1
164 | rqt-robot-dashboard==0.5.7
165 | rqt-robot-monitor==0.5.9
166 | rqt-robot-steering==0.5.10
167 | rqt-runtime-monitor==0.5.7
168 | rqt-rviz==0.6.0
169 | rqt-service-caller==0.4.8
170 | rqt-shell==0.4.9
171 | rqt-srv==0.4.8
172 | rqt-tf-tree==0.6.0
173 | rqt-top==0.4.8
174 | rqt-topic==0.4.11
175 | rqt-web==0.4.8
176 | rsa==4.0
177 | rviz==1.13.7
178 | scikit-image==0.14.5
179 | scikit-learn==0.20.4
180 | scipy==1.1.0
181 | seaborn==0.9.0
182 | sensor-msgs==1.12.7
183 | simplegeneric==0.8.1
184 | singledispatch==3.4.0.3
185 | six==1.14.0
186 | smach==2.0.1
187 | smach-ros==2.0.1
188 | smclib==1.8.3
189 | tb-nightly==1.14.0a20190301
190 | tensorboard==2.0.2
191 | tensorflow==2.0.0a0
192 | tensorflow-estimator==1.14.0
193 | tensorflow-gpu==2.0.0
194 | termcolor==1.1.0
195 | terminado==0.6
196 | terminaltables==3.1.0
197 | testpath==0.3
198 | tf==1.12.0
199 | tf-conversions==1.12.0
200 | tf-estimator-nightly==1.14.0.dev2019030115
201 | tf2-geometry-msgs==0.6.5
202 | tf2-kdl==0.6.5
203 | tf2-py==0.6.5
204 | tf2-ros==0.6.5
205 | topic-tools==1.14.3
206 | torch==1.4.0
207 | torchvision==0.5.0
208 | tornado==4.5.2
209 | tqdm==4.43.0
210 | traitlets==4.3.2
211 | typed-ast==1.4.0
212 | urdfdom-py==0.4.2
213 | urllib3==1.25.8
214 | wcwidth==0.1.7
215 | Werkzeug==1.0.0
216 | wget==3.2
217 | widgetsnbextension==3.0.2
218 | wrapt==1.12.1
219 | xacro==1.13.5
220 |
--------------------------------------------------------------------------------
/scripts/listener.py:
--------------------------------------------------------------------------------
1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python
2 | import os
3 | import sys
4 | import rospy
5 | from std_msgs.msg import String
6 | sys.path.append('/home/dongjai/catkin_ws/src/act_recognizer')
7 | from act_recognizer.msg import Persons
8 | from act_recognizer.msg import Person
9 | from act_recognizer.msg import ActElem
10 |
11 | def callback(data):
12 | rospy.logwarn(data.xyz_cls)
13 | #print(data.xyz_cls)
14 |
15 | def listener():
16 | rospy.init_node('suber_xyzcls', anonymous=True)
17 | rospy.Subscriber('chatter', Person, callback)
18 | rospy.spin()
19 |
20 | if __name__ == '__main__':
21 | listener()
22 |
23 |
24 |
25 | # In ROS, nodes are uniquely named. If two nodes with the same
26 | # name are launched, the previous one is kicked off. The
27 | # anonymous=True flag means that rospy will choose a unique
28 | # name for our 'listener' node so that multiple listeners can
29 | # run simultaneously.
30 |
--------------------------------------------------------------------------------
/scripts/talker.py:
--------------------------------------------------------------------------------
1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python
2 |
3 | import argparse
4 | import sys
5 | import time
6 | from threading import Thread
7 | from queue import Queue
8 |
9 | from tqdm import tqdm
10 | import cv2
11 | import rospy # if using ros, turn it on
12 |
13 | sys.path.append('/home/dongjai/catkin_ws/src/act_recognizer/src/')
14 | import core.utils as utils
15 | from Modules.camera import CamReader
16 | from Modules.imager import ImageReader
17 | from Modules.detection import DetectionLoader
18 | from Modules.poser import PoseLoader
19 | from Modules.midprocessor import MidProcessor
20 | from Modules.recognizer import ActionRecognizer
21 | from Modules.act_talker import Talker # using a ros publisher
22 | # from Modules.myserver import myServer # using a remote server
23 |
24 | def getTime(time1=0):
25 | if not time1:
26 | return time.time()
27 | else:
28 | interval = time.time() - time1
29 | return time.time(), interval
30 |
31 | def loop():
32 | n = 0
33 | while True:
34 | yield n
35 | n += 1
36 |
37 | parser = argparse.ArgumentParser(description='Debug')
38 | ### for training set record
39 | parser.add_argument('--is_train', type=bool, default=False,
40 | help='is_train is True means write records from Openpose and Normalization into a txt file.')
41 | parser.add_argument('--training_set', type=str, default=r"/home/dongjai/action data set/dataset/twoperson/0walk.txt",
42 | help='txt file to store training set.')
43 | ### for inference
44 | parser.add_argument('--rgb_path', type=str, default=r"/home/dongjai/action data set/dataset/twoperson/4open/RGB",
45 | help='path to store rgb images')
46 | parser.add_argument('--dp_path', type=str, default=r"/home/dongjai/action data set/dataset/twoperson/4open/depth",
47 | help='path to store depth images')
48 | parser.add_argument('--cam_json', type=str, default=r"/home/dongjai/catkin_ws/src/act_recognizer/src/mediandensity.json",
49 | help='the camera setting json file path')
50 | parser.add_argument('--width', type=int, default=640,
51 | help='image width')
52 | parser.add_argument('--height', type=int, default=480,
53 | help='image height')
54 | parser.add_argument('--fps', type=int, default=30,
55 | help='camera fps')
56 | parser.add_argument('--sp', default=True, action='store_true', #多线程系统显存不足
57 | help='Use single process')
58 | parser.add_argument('--d435', type=bool, default=False,
59 | help='Using the D435 cam')
60 | parser.add_argument('--yolo_gpu_usages', type=float, default=0.4,
61 | help='restrict the gpu memory')
62 | parser.add_argument('--nn_pix', type=int, default=3,
63 | help='extend pixel for depth image')
64 | parser.add_argument('--kps_usage_num', type=int, default=5,
65 | help='the number of kps be used in recognizor')
66 | parser.add_argument('--kp_num', type=list, default=[0, 4, 7, 9, 12],
67 | help='the kps be used in recognizor')
68 | parser.add_argument('--yolo_weights', type=str, default=r"/home/dongjai/catkin_ws/src/act_recognizer/src/checkpoints/YOLO/yolov3.weights",
69 | help='yolov3 weights')
70 | parser.add_argument('--mlp_weights', type=str, default=r"/home/dongjai/catkin_ws/src/act_recognizer/src/checkpoints/new_mlp20_35_15.h5",
71 | help='mlp weights')
72 | parser.add_argument('--host', type=str, default='10.60.2.65',
73 | help='server_host')
74 | parser.add_argument('--port', type=int, default=10086,
75 | help='server_port')
76 |
77 | # args = parser.parse_args()
78 | args, unknown = parser.parse_known_args()#NOTE : using in ROS environment
79 | # label_dict = {0: "walk", 1: "stop", 2: "come", 3: "phone", 4: "open", 5: "stand"}
80 | label_list = ["walk", "stop", "come", "phone", "open", "stand"]
81 | #For data statstic
82 | walk_t, stand_t, come_t, stop_t, open_t, phone_t = 0,0,0,0,0,0
83 | last_action_name, action_stable_time, stable_action_name = None, None, None
84 |
85 | mid_proc_time, mlp_pred_time = 0, 0
86 | det_loader_put_time, det_time = 0, 0
87 | pose_loader_put_time, det_openpose_time = 0, 0
88 |
89 | #Start the Server to receive data from client
90 | # proc_server = myServer(args)
91 | # proc_server.start()
92 |
93 | #Start the Camera Thread
94 | # camera_reader = CamReader(args)
95 | # camera_reader.start()
96 |
97 | #Start the Image Reader Thread
98 | image_reader = ImageReader(args)
99 | image_reader.start()
100 |
101 | #Start the YOLO Detection Thread
102 | det_loader = DetectionLoader(args)
103 | det_loader.start()
104 |
105 | #Start the Openpose Estimation Thread
106 | pose_loader = PoseLoader(args)
107 | pose_loader.start()
108 |
109 | #Start the Middle Data Processing Thread
110 | mid_processor = MidProcessor(args)
111 | mid_processor.start()
112 |
113 | #Start the MLP Action Recognition Thread
114 | act_recognizer = ActionRecognizer(args)
115 | act_recognizer.start()
116 |
117 | #Start the ROS Talker Thread to publish recognition data
118 | ros_talker = Talker(args)
119 | ros_talker.start()
120 |
121 | if args.d435:
122 | print('Starting D435, press Ctrl + C to terminate...')
123 | sys.stdout.flush()
124 | im_names_desc = tqdm(loop())
125 | else:
126 | # data_len = 2000
127 | data_len = image_reader.rgb_len
128 | im_names_desc = tqdm(range(data_len), dynamic_ncols=True)#终端输出
129 |
130 | try:
131 | for i in im_names_desc:
132 | pred_data = None
133 | start_time = getTime()
134 |
135 | # (rgb_name, rgb_nums, rgb_frame, dp_name, dp_nums, dp_frame) = proc_server.read_data()
136 |
137 | # (rgb_nums, rgb_frame) = camera_reader.read_rgb()
138 | (rgb_name, rgb_nums, rgb_frame) = image_reader.read_rgb()
139 | # ckpt_time, rgb_read_time = getTime(start_time)
140 |
141 | # (dp_nums, dp_frame) = camera_reader.read_dp()
142 | (dp_name, dp_nums, dp_frame) = image_reader.read_dp()
143 | ckpt_time, img_read_time = getTime(start_time)
144 |
145 | det_loader.put_image(rgb_nums, rgb_frame)
146 | ckpt_time, det_loader_put_time = getTime(ckpt_time)
147 |
148 | (det_nums, bboxes) = det_loader.read()
149 | ckpt_time, det_time = getTime(ckpt_time)
150 |
151 | pose_loader.put_image(rgb_nums, rgb_frame)
152 | ckpt_time, pose_loader_put_time = getTime(ckpt_time)
153 |
154 | (kp_nums, kps_list) = pose_loader.read()
155 | ckpt_time, det_openpose_time = getTime(ckpt_time)
156 |
157 | if kps_list and bboxes:# if detect some human in camera
158 | mid_processor.put_data(kp_nums, kps_list, dp_nums, dp_frame, det_nums, bboxes)
159 | ckpt_time, mid_proc_put_time = getTime(ckpt_time)
160 |
161 | (kp_nums, output) = mid_processor.read()
162 | ckpt_time, mid_proc_time = getTime(ckpt_time)
163 | # output = False #It is used to pass the following loop
164 | if output and not args.is_train:
165 | # print(output)
166 | act_recognizer.put_data(kp_nums, output)
167 | (out_nums, pred_data) = act_recognizer.read()
168 | ckpt_time, mlp_pred_time = getTime(ckpt_time)
169 | # cv2.imwrite("/home/dongjai/catkin_ws/src/act_recognizer/store/random/{}.png".format(rgb_nums), image)
170 | print(pred_data)
171 | #Using ROS talker to publish the information.
172 | ros_talker.put_data(pred_data)
173 | for i in pred_data:
174 | action_t = i[-1]
175 | if action_t == 0:
176 | walk_t +=1
177 | if action_t == 5:
178 | stand_t += 1
179 | if action_t == 1:
180 | stop_t +=1
181 | if action_t == 3:
182 | phone_t +=1
183 | if action_t == 2:
184 | come_t +=1
185 | if action_t == 4:
186 | open_t +=1
187 | else:
188 | pred_data = None
189 | # proc_server.get_data(bboxes, pred_data)
190 | image = utils.draw_bbox(rgb_frame, bboxes, pred=pred_data, classes=label_list)
191 | cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
192 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
193 | cv2.imshow("result", image)
194 | # cv2.imwrite("/home/dongjai/catkin_ws/src/act_recognizer/store/2come/{}.png".format(rgb_nums), image)
195 | if cv2.waitKey(1) & 0xFF == ord('q'):
196 | break
197 | # im_names_desc.set_description(
198 | # 'img time: {img:.4f} | det put: {put:.4f} | det: {det:.4f} | opt: {opt:.4f} | mid: {mid:.4f} | mlp: {mlp:.4f}'.format(
199 | # img=img_read_time, put=det_loader_put_time, det=det_time, opt=det_openpose_time, mid=mid_proc_time, mlp=mlp_pred_time)
200 | # )
201 | print("walk: {}, stand: {}, stop: {}, come:{}, open:{}, phone:{}".format(walk_t, stand_t, stop_t, come_t, open_t, phone_t))
202 | # proc_server.stop()
203 | image_reader.stop()
204 | # camera_reader.stop()
205 | det_loader.stop()
206 | pose_loader.stop()
207 | mid_processor.stop()
208 | act_recognizer.stop()
209 | ros_talker.stop()
210 | except KeyboardInterrupt:
211 | print("walk: {}, stand: {}, stop: {}, come:{}, open:{}, phone:{}".format(walk_t, stand_t, stop_t, come_t, open_t, phone_t))
212 | # proc_server.stop()
213 | image_reader.stop()
214 | # camera_reader.stop()
215 | det_loader.stop()
216 | pose_loader.stop()
217 | mid_processor.stop()
218 | act_recognizer.stop()
219 | ros_talker.stop()
220 |
221 |
222 |
223 |
224 |
225 |
--------------------------------------------------------------------------------
/scripts/testing.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import sys
3 | import time
4 | from threading import Thread
5 | from queue import Queue
6 |
7 | from tqdm import tqdm
8 | import cv2
9 | # import rospy
10 |
11 | sys.path.append('/home/dongjai/catkin_ws/src/act_recognizer/src/')
12 | import core.utils as utils
13 | from Modules.camera import CamReader
14 | from Modules.imager import ImageReader
15 | from Modules.detection import DetectionLoader
16 | from Modules.poser import PoseLoader
17 | from Modules.midprocessor import MidProcessor
18 | from Modules.recognizer import ActionRecognizer
19 |
20 | def getTime(time1=0):
21 | if not time1:
22 | return time.time()
23 | else:
24 | interval = time.time() - time1
25 | return time.time(), interval
26 |
27 | def loop():
28 | n = 0
29 | while True:
30 | yield n
31 | n += 1
32 |
33 | parser = argparse.ArgumentParser(description='Debug')
34 | parser.add_argument('--rgb_path', type=str, default=r"/home/dongjai/action data set/dataset/multiperson/2come/RGB",
35 | help='path to store rgb images')
36 | parser.add_argument('--dp_path', type=str, default=r"/home/dongjai/action data set/dataset/multiperson/2come/depth",
37 | help='path to store depth images')
38 | parser.add_argument('--cam_json', type=str, default=r"/home/dongjai/catkin_ws/src/act_recognizer/src/mediandensity.json",
39 | help='the camera setting json file path')
40 | parser.add_argument('--width', type=int, default=640,
41 | help='image width')
42 | parser.add_argument('--height', type=int, default=480,
43 | help='image height')
44 | parser.add_argument('--fps', type=int, default=30,
45 | help='camera fps')
46 | parser.add_argument('--sp', default=True, action='store_true', #多线程系统显存不足
47 | help='Use single process')
48 | parser.add_argument('--d435', type=bool, default=True,
49 | help='Using the D435 cam')
50 | parser.add_argument('--yolo_gpu_usages', type=float, default=0.4,
51 | help='restrict the gpu memory')
52 | parser.add_argument('--nn_pix', type=int, default=3,
53 | help='extend pixel for depth image')
54 | parser.add_argument('--kps_usage_num', type=int, default=5,
55 | help='the number of kps be used in recognizor')
56 |
57 | args = parser.parse_args()
58 | # label_dict = {0: "walk", 1: "stop", 2: "come", 3: "phone", 4: "open", 5: "stand"}
59 | label_list = ["walk", "stop", "come", "phone", "open", "stand"]
60 | walk_t, stand_t, come_t, stop_t, open_t, phone_t = 0,0,0,0,0,0
61 | last_action_name, action_stable_time, stable_action_name = None, None, None
62 |
63 | mid_proc_time, mlp_pred_time = 0, 0
64 | det_loader_put_time, det_time = 0, 0
65 | pose_loader_put_time, det_openpose_time = 0, 0
66 |
67 | det_loader = DetectionLoader(args)
68 | det_loader.start()
69 |
70 | camera_reader = CamReader(args)
71 | camera_reader.start()
72 |
73 | if args.d435:
74 | print('Starting D435, press Ctrl + C to terminate...')
75 | sys.stdout.flush()
76 | im_names_desc = tqdm(loop())
77 | else:
78 | data_len = image_reader.rgb_len
79 | im_names_desc = tqdm(range(data_len), dynamic_ncols=True)#终端输出
80 |
81 | try:
82 | for i in im_names_desc:
83 | (rgb_name, rgb_nums, rgb_frame) = camera_reader.read_rgb()
84 | (dp_name, dp_nums, dp_frame) = camera_reader.read_dp()
85 |
86 | det_loader.put_image(rgb_name, rgb_nums, rgb_frame)
87 | (rgb_name, det_nums, bboxes) = det_loader.read()
88 |
89 | image = rgb_frame
90 | cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
91 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
92 | cv2.imshow("result", image)
93 | if cv2.waitKey(1) & 0xFF == ord('q'):
94 | break
95 | camera_reader.stop()
96 | det_loader.stop()
97 | except KeyboardInterrupt:
98 | # print("walk: {}, stand: {}, stop: {}, come:{}, open:{}, phone:{}".format(walk_t, stand_t, stop_t, come_t, open_t, phone_t))
99 | # image_reader.stop()
100 | camera_reader.stop()
101 | det_loader.stop()
102 | # pose_loader.stop()
103 | # mid_processor.stop()
104 | # act_recognizer.stop()
--------------------------------------------------------------------------------
/src/Modules/act_talker.py:
--------------------------------------------------------------------------------
1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python
2 | import os
3 | import sys
4 | from threading import Thread
5 | from queue import Queue
6 |
7 | import rospy
8 | from std_msgs.msg import String
9 | sys.path.append('/home/dongjai/catkin_ws/src/act_recognizer')
10 | from act_recognizer.msg import Person
11 | from act_recognizer.msg import ActElem
12 | # from act_recognizer.msg import Persons
13 |
14 | import cv2
15 | import scipy.misc
16 | import numpy as np
17 |
18 | class Talker():
19 | def __init__(self, opt, queueSize=128):
20 | self.opt = opt
21 | self._talker, self._talk_rate = self._init_talker()
22 |
23 | if opt.sp:
24 | self.data_queue = Queue(maxsize=queueSize)
25 | self.talk_queue = Queue(maxsize=queueSize)
26 | else:
27 | self.data_queue = mp.Queue(maxsize=queueSize)
28 | self.talk_queue = mp.Queue(maxsize=queueSize)
29 |
30 | def _init_talker(self):
31 | print("Initializing ROS Talker......")
32 | pub = rospy.Publisher('chatter', Person, queue_size=10)
33 | rospy.init_node('puber_xyzcls', anonymous=True)
34 | rate = rospy.Rate(10) # 10hz
35 | print("Successfully Initialized ROS Talker")
36 | return pub, rate
37 |
38 | def start_worker(self, target):
39 | if self.opt.sp:
40 | p = Thread(target=target, args=(), kwargs={})#target是該線程需要完成的方法函數
41 | # else:
42 | p.start()
43 | return p
44 |
45 | def start(self):
46 | self.talker_worker = self.start_worker(self.talker_process)
47 | return self
48 |
49 | def stop(self):
50 | self.talker_worker.join()
51 | self.clear_queues()
52 |
53 | def clear_queues(self):
54 | self.clear(self.talk_queue)
55 |
56 | def clear(self, queue):
57 | while not queue.empty():
58 | queue.get()
59 |
60 | def wait_and_put(self, queue, item):
61 | queue.put(item)
62 |
63 | def wait_and_get(self, queue):
64 | return queue.get()
65 |
66 | def talker_process(self, **kargcs):
67 | while not rospy.is_shutdown():
68 | pred_data_list = self.wait_and_get(self.data_queue)
69 | person_msg = self.persondata_to_msg(pred_data_list)
70 |
71 | # hello_str = "hello world %s" % rospy.get_time()
72 | # rospy.loginfo(person_msg)
73 | self._talker.publish(person_msg)
74 | self._talk_rate.sleep()
75 |
76 | def persondata_to_msg(self, pred_data_list):
77 | # persons = Persons()
78 | person = Person()
79 | for i in pred_data_list:
80 | act_elem = ActElem()
81 | act_elem.x = i[0][0]
82 | act_elem.y = i[0][1]
83 | act_elem.z = i[0][2]
84 | act_elem.act_cls = i[-1]
85 | person.xyz_cls.append(act_elem)
86 | # persons.persons.append(person)
87 | # person.xyz_cls.clear()
88 | return person
89 |
90 | def put_data(self, pred_data_list):
91 | self.wait_and_put(self.data_queue, (pred_data_list))
92 |
93 |
--------------------------------------------------------------------------------
/src/Modules/camera.py:
--------------------------------------------------------------------------------
1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python
2 | import os
3 | import sys
4 | import time
5 | from threading import Thread
6 | from queue import Queue
7 |
8 | import cv2
9 | import scipy.misc
10 | import numpy as np
11 |
12 | from core import camera as cam
13 | import pyrealsense2 as rs
14 | import json
15 |
16 | class CamReader():
17 | def __init__(self, opt, queueSize=10):
18 | self.opt = opt
19 | self.json_file_path = opt.cam_json
20 | self.width = opt.width
21 | self.height = opt.height
22 | self.fps = opt.fps
23 |
24 | if opt.sp:#单线程
25 | self.rgb_queue = Queue(maxsize=queueSize)
26 | self.dp_queue = Queue(maxsize=queueSize)
27 |
28 | def start_worker(self, target):
29 | if self.opt.sp:
30 | p = Thread(target=target, args=(), kwargs={})#target是該線程需要完成的方法函數
31 | # else:
32 | p.start()
33 | return p
34 |
35 | def start(self):
36 | self.cam_worker = self.start_worker(self.cam_reader_process)
37 | return self
38 |
39 | def stop(self):
40 | self.cam_worker.join()
41 | self.clear_queues()
42 |
43 | def clear_queues(self):
44 | self.clear(self.rgb_queue)
45 | self.clear(self.dp_queue)
46 |
47 | def clear(self, queue):
48 | while not queue.empty():
49 | queue.get()
50 |
51 | def wait_and_put(self, queue, item):
52 | queue.put(item)
53 |
54 | def wait_and_get(self, queue):
55 | return queue.get()
56 |
57 | def cam_reader_process(self, **kargcs):
58 | print("Loading Intel Realsense D435......")
59 | try:
60 | print(self.json_file_path)
61 | cam.try_to_advanced_mode(self.json_file_path)
62 | except Exception as e:
63 | print(e)
64 | pass
65 | # Configure depth and color streams
66 | pipeline = rs.pipeline()
67 | config = rs.config()
68 | config.enable_stream(rs.stream.depth, self.width, self.height, rs.format.z16, self.fps)
69 | config.enable_stream(rs.stream.color, self.width, self.height, rs.format.bgr8, self.fps)
70 | # Start streaming
71 | pipeline.start(config)
72 | cam.wait4cam_init(pipeline, WaitTimes=10) #等待相机初始化调节自动曝光
73 | #align
74 | align_to = rs.stream.color
75 | align = rs.align(align_to)
76 | print("Camera Starting")
77 | frame_num = 0
78 | while True:
79 | frames = pipeline.wait_for_frames()
80 | aligned_frames = align.process(frames)# Align the depth frame to color frame
81 | aligned_depth_frame = aligned_frames.get_depth_frame()# Get aligned depth and color frames
82 | color_frame = aligned_frames.get_color_frame()
83 | if not aligned_depth_frame or not color_frame:#只有一张rgb和depth不对应的话就舍去
84 | continue
85 | # dp_frame = np.asanyarray(aligned_depth_frame.get_data()) #16bit
86 | frame = np.asanyarray(color_frame.get_data())
87 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
88 | self.wait_and_put(self.rgb_queue, (frame_num, frame))
89 | self.wait_and_put(self.dp_queue, (frame_num, aligned_depth_frame))
90 | frame_num += 1
91 |
92 | def read_rgb(self):
93 | return self.wait_and_get(self.rgb_queue)
94 |
95 | def read_dp(self):
96 | return self.wait_and_get(self.dp_queue)
--------------------------------------------------------------------------------
/src/Modules/detection.py:
--------------------------------------------------------------------------------
1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python
2 | import os
3 | from threading import Thread
4 | from queue import Queue
5 |
6 | import cv2
7 | import scipy.misc
8 | import numpy as np
9 | import tensorflow as tf
10 |
11 | import sys
12 | sys.path.append('..')
13 | from core.yolov3 import YOLOv3, decode
14 | import core.utils as utils
15 | import core.operation as operation
16 |
17 | class DetectionLoader():
18 | def __init__(self, opt, queueSize=128):
19 | self.opt = opt
20 | self._num_classes = 80
21 | self._input_size = 416
22 | self._gpu_usages = opt.yolo_gpu_usages#0.4
23 | self.detector = self._load_detector()
24 |
25 | # initialize the queue used to store data
26 | """
27 | image_queue: the buffer storing pre-processed images for object detection
28 | det_queue: the buffer storing human detection results
29 | pose_queue: the buffer storing post-processed cropped human image for pose estimation
30 | """
31 | if opt.sp:
32 | self._stopped = False
33 | self.image_queue = Queue(maxsize=queueSize)#input queue
34 | self.det_queue = Queue(maxsize=queueSize)
35 | else:
36 | self._stopped = mp.Value('b', False)
37 | self.image_queue = mp.Queue(maxsize=queueSize)
38 | self.det_queue = mp.Queue(maxsize=queueSize)
39 |
40 | def _load_detector(self):
41 | print("Loading YOLO Model......")
42 | config_tf = tf.compat.v1.ConfigProto()
43 | config_tf.gpu_options.per_process_gpu_memory_fraction = self._gpu_usages
44 | tf.compat.v1.Session(config=config_tf)
45 | input_layer = tf.keras.layers.Input([self._input_size, self._input_size, 3])
46 | feature_maps = YOLOv3(input_layer)
47 | bbox_tensors = []
48 | for i, fm in enumerate(feature_maps):
49 | bbox_tensor = decode(fm, i)
50 | bbox_tensors.append(bbox_tensor)
51 | detector = tf.keras.Model(input_layer, bbox_tensors)
52 | utils.load_weights(detector, self.opt.yolo_weights)
53 | # detector.summary()
54 | print("Successfully Loading YOLO Model")
55 | return detector
56 |
57 | def start_worker(self, target):
58 | if self.opt.sp:
59 | p = Thread(target=target)#target是該線程需要完成的方法函數
60 | # else:
61 | p.start()
62 | return p
63 |
64 | def start(self):
65 | self.detection_worker = self.start_worker(self.detection_process)
66 | return self
67 |
68 | def stop(self):
69 | self.detection_worker.join()
70 | self.clear_queues()
71 |
72 | def clear_queues(self):
73 | self.clear(self.image_queue)
74 | self.clear(self.det_queue)
75 |
76 | def clear(self, queue):
77 | while not queue.empty():
78 | queue.get()
79 |
80 | def wait_and_put(self, queue, item):
81 | queue.put(item)
82 |
83 | def wait_and_get(self, queue):
84 | return queue.get()
85 |
86 | def detection_process(self , **kargcs):
87 | while True:
88 | (nums, frame) = self.wait_and_get(self.image_queue)
89 | frame_size = frame.shape[:2]
90 | image_data = utils.image_preporcess(np.copy(frame), [self._input_size, self._input_size])#裁剪图片成416x416
91 | image_data = image_data[np.newaxis, ...].astype(np.float32)#转换成32位浮点数格式
92 |
93 | pred_bbox = self.detector.predict(image_data)
94 | pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
95 | pred_bbox = tf.concat(pred_bbox, axis=0)
96 |
97 | bboxes = utils.postprocess_boxes(pred_bbox, frame_size, self._input_size, 0.3)#对预测出来的bouding box作后处理
98 | bboxes = utils.nms(bboxes, 0.45, method='nms')#非极大抑制nms选择概括性最高的一个框
99 |
100 | only_one_bboxes = operation.detect_only_one(bboxes, IDi=0)#只输出一个类别
101 | self.wait_and_put(self.det_queue, (nums, only_one_bboxes))
102 |
103 | def put_image(self, nums, frame):
104 | self.wait_and_put(self.image_queue, (nums, frame))
105 |
106 | def read(self):
107 | return self.wait_and_get(self.det_queue)
108 |
--------------------------------------------------------------------------------
/src/Modules/imager.py:
--------------------------------------------------------------------------------
1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python
2 | import os
3 | import time
4 | from threading import Thread
5 | from queue import Queue
6 |
7 | import cv2
8 | import scipy.misc
9 | import numpy as np
10 |
11 | class ImageReader():
12 | def __init__(self, opt, queueSize=128):
13 | self.opt = opt
14 | self.rgb_path = opt.rgb_path
15 | self.dp_path = opt.dp_path
16 |
17 | for root, dirs, files in os.walk(self.rgb_path):
18 | self.rgb_len = len(files)
19 | for root, dirs, files in os.walk(self.dp_path):
20 | self.dp_len = len(files)
21 | assert self.rgb_len == self.dp_len, "RGB和深度图数量不相等"
22 |
23 | if opt.sp:#单线程
24 | self.rgb_queue = Queue(maxsize=queueSize)
25 | self.dp_queue = Queue(maxsize=queueSize)
26 |
27 | def start_worker(self, target, path):
28 | if self.opt.sp:
29 | p = Thread(target=target, args=(), kwargs={"num": 0, "path": path})#target是該線程需要完成的方法函數
30 | # else:
31 | p.start()
32 | return p
33 |
34 | def start(self):
35 | self.rgb_image_worker = self.start_worker(self.rgb_reader_process, path=self.rgb_path)
36 | self.dp_image_worker = self.start_worker(self.dp_reader_process, path=self.dp_path)
37 | return self
38 |
39 | def stop(self):
40 | self.rgb_image_worker.join()
41 | self.dp_image_worker.join()
42 | self.clear_queues()
43 |
44 | def clear_queues(self):
45 | self.clear(self.rgb_queue)
46 | self.clear(self.dp_queue)
47 |
48 | def clear(self, queue):
49 | while not queue.empty():
50 | queue.get()
51 |
52 | def wait_and_put(self, queue, item):
53 | queue.put(item)
54 |
55 | def wait_and_get(self, queue):
56 | return queue.get()
57 |
58 | def rgb_reader_process(self, **kargcs):
59 | while kargcs["num"] < self.rgb_len:
60 | rgb_name = "color_" + str(kargcs["num"]) + ".png"
61 | rgb_file = kargcs["path"] + "/" + rgb_name
62 | # print(rgb_file)
63 | frame = cv2.imread(rgb_file, -1)
64 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
65 | self.wait_and_put(self.rgb_queue, (rgb_name, kargcs["num"], frame))
66 | kargcs["num"] += 1
67 |
68 | def dp_reader_process(self, **kargcs):
69 | while kargcs["num"] < self.dp_len:
70 | dp_name = "depth_8bit_" + str(kargcs["num"]) + ".png"
71 | dp_file = kargcs["path"] + "/" + dp_name
72 | frame = cv2.imread(dp_file, -1)
73 | self.wait_and_put(self.dp_queue, (dp_name, kargcs["num"], frame))
74 | kargcs["num"] += 1
75 |
76 | def read_rgb(self):
77 | return self.wait_and_get(self.rgb_queue)
78 |
79 | def read_dp(self):
80 | return self.wait_and_get(self.dp_queue)
81 |
82 |
83 |
--------------------------------------------------------------------------------
/src/Modules/midprocessor.py:
--------------------------------------------------------------------------------
1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python
2 | import os
3 | import sys
4 | import time
5 | from sys import platform
6 | from threading import Thread
7 | from queue import Queue
8 |
9 | import cv2
10 | import scipy.misc
11 | import numpy as np
12 |
13 | sys.path.append('..')
14 | import core.utils as utils
15 | import core.operation as operation
16 |
17 | class MidProcessor():
18 | def __init__(self, opt, queueSize=128):
19 | self.opt = opt
20 | self.nn_pix = opt.nn_pix
21 | self.is_train = opt.is_train
22 | self.training_set = opt.training_set
23 |
24 | if opt.sp:#单线程
25 | self.data_queue = Queue(maxsize=queueSize)
26 | self.fuse_queue = Queue(maxsize=queueSize)
27 |
28 | def start_worker(self, target):
29 | if self.opt.sp:
30 | p = Thread(target=target, args=(), kwargs={})#target是該線程需要完成的方法函數
31 | # else:
32 | p.start()
33 | return p
34 |
35 | def start(self):
36 | self.fuse_worker = self.start_worker(self.fuse_process)
37 | return self
38 |
39 | def stop(self):
40 | self.fuse_worker.join()
41 | self.clear_queues()
42 |
43 | def clear_queues(self):
44 | self.clear(self.data_queue)
45 | self.clear(self.fuse_queue)
46 |
47 | def clear(self, queue):
48 | while not queue.empty():
49 | queue.get()
50 |
51 | def wait_and_put(self, queue, item):
52 | queue.put(item)
53 |
54 | def wait_and_get(self, queue):
55 | return queue.get()
56 |
57 | def fuse_process(self, **kargcs):
58 | while True:
59 | (kp_nums, kps_list, dp_nums, dp_frame, det_nums, bboxes) = self.wait_and_get(self.data_queue)
60 | # print(kp_nums, dp_nums, det_nums)
61 | assert (kp_nums == dp_nums and dp_nums == det_nums), "队列数据顺序不匹配"
62 | # print("kp list: ",kps_list)
63 | # print("bboxes: ",bboxes)
64 | _flag = self._filter_error(kps_list, bboxes)
65 | if not _flag or len(kps_list) != len(bboxes):
66 | print("error")
67 | self.wait_and_put(self.fuse_queue, ((kp_nums, None)))
68 | else:
69 | output = []
70 | for kp_list in kps_list:
71 | #[x, y, z]获得5个点映射到原图frame的3维坐标(x, y, z),和附近的几个点
72 | if self.opt.d435:
73 | dp_nparray = operation.get_keypoints_xyz(kp_list=kp_list, nn_pix=self.nn_pix, live=True, live_frame=dp_frame)
74 | else:
75 | dp_nparray = operation.get_keypoints_xyz(kp_list=kp_list, depth_frame=dp_frame, nn_pix=self.nn_pix)
76 | #求平均值替代中心点深度值
77 | dp_list = operation.get_mean_dep_val_xyz(kp_list=kp_list, dp_nparray=dp_nparray)
78 | #坐标归一化
79 | dp_array_out = operation.norm_keypoints_xyz(dp_list, norm_p_index=0, rgb_size=[640, 480])
80 | #坐标平移,让坐标值都变成非负数
81 | dp_array_no_neg = operation.no_negative(array=dp_array_out)
82 | #将小于0.001的数字认为是0
83 | dp_array_no_neg_zero = operation.filter_zero(dp_array_no_neg, threadhold=0.001)
84 | if self.is_train:
85 | file_txt = open(self.training_set, 'a')#根据不同的类别修改txt的文件命名前缀
86 | file_txt.write(str(dp_array_no_neg))
87 | file_txt.write('\n')
88 | file_txt.close()
89 | dp_array_no_neg_zero = np.expand_dims(dp_array_no_neg_zero, axis=-1)
90 | # dp_array_no_neg_zero = np.expand_dims(dp_array_no_neg_zero, axis=0)
91 | # print(dp_array_no_neg_zero)
92 |
93 | output.append([dp_list[0], dp_array_no_neg_zero])#kp_list[0] means the nose joint of body
94 | self.wait_and_put(self.fuse_queue, (kp_nums, output))
95 |
96 | def _filter_error(self, kps_list, bboxes):
97 | kps_len = len(kps_list)
98 | bboxes_len = len(bboxes)
99 | if not kps_len == bboxes_len:
100 | print("目标检测和姿态估计人数结果不匹配")
101 | return False
102 | kps_cet, bboxes_cet = [], []
103 | kps_array = np.asarray(kps_list)
104 | for i in bboxes:
105 | x_min, y_min = i[0], i[1]
106 | x_max, y_max = i[2], i[3]
107 | _j = 0
108 | for j in kps_array:
109 | # print("J:",j)
110 | for p in j:
111 | # print("P: ",p)
112 | if p[0] >= x_min and p[0] <= x_max:
113 | if p[1] >= y_min and p[1] <= y_max:
114 | _j += 1
115 | # print("-j: ", _j)
116 | if _j >= 10:#too many kps in one bbox, error occur in openpose
117 | print("too many kps in one bbox, error occur in openpose")
118 | return False
119 | break
120 | return True
121 |
122 | def filter_IoU_error(self, kps_list, bboxes):
123 | '''
124 | bboxes:[x_min,y_min,x_max,y_max,prob,cls]
125 | '''
126 | kps_len = len(kps_list)
127 | bboxes_len = len(bboxes)
128 | assert kps_len == bboxes_len, "目标检测和姿态估计人数结果不匹配"
129 | for i in kps_list:
130 | for j in i:
131 | if j[0] == 0 or j[1] == 0:
132 | i.remove(j)
133 | kps_box = []
134 | kps_array = np.asarray(kps_list)
135 | for i in kps_array:
136 | x_min, y_min = np.amin(i, 0)
137 | x_max, y_max = np.amax(i, 0)
138 | # x_cet = x_min + int((x_max - x_min) / 2)
139 | # y_cet = y_min + int((y_max - y_min) / 2)
140 | kps_box.append([x_min, y_min, x_max, y_max])
141 | print("kps_box: ",kps_box)
142 | _t = 0
143 | for i in kps_box:
144 | for j in bboxes:
145 | print(j[:4])
146 | _IoU = self.IOU(i, j[:4])
147 | print(_IoU)
148 | if _IoU > 0.4:
149 | _t += 1
150 | if _t != len(kps_box):
151 | print("出现错误, 跳过一帧")
152 | # else
153 | pass
154 |
155 | def IOU(self, box1, box2):
156 | '''
157 | :param box1:[x1,y1,x2,y2] 左上角的坐标与右下角的坐标
158 | :param box2:[x1,y1,x2,y2]
159 | :return: iou_ratio--交并比
160 | '''
161 | width1 = abs(box1[2] - box1[0])
162 | height1 = abs(box1[1] - box1[3]) # 这里y1-y2是因为一般情况y1>y2,为了方便采用绝对值
163 | width2 = abs(box2[2] - box2[0])
164 | height2 = abs(box2[1] - box2[3])
165 | x_max = max(box1[0],box1[2],box2[0],box2[2])
166 | y_max = max(box1[1],box1[3],box2[1],box2[3])
167 | x_min = min(box1[0],box1[2],box2[0],box2[2])
168 | y_min = min(box1[1],box1[3],box2[1],box2[3])
169 | iou_width = x_min + width1 + width2 - x_max
170 | iou_height = y_min + height1 + height2 - y_max
171 | if iou_width <= 0 or iou_height <= 0:
172 | iou_ratio = 0
173 | else:
174 | iou_area = iou_width * iou_height # 交集的面积
175 | box1_area = width1 * height1
176 | box2_area = width2 * height2
177 | iou_ratio = iou_area / (box1_area + box2_area - iou_area) # 并集的面积
178 | return iou_ratio
179 |
180 | def put_data(self, kp_nums, kp_list, dp_nums, dp_frame, det_nums, bboxes):
181 | self.wait_and_put(self.data_queue, (kp_nums, kp_list, dp_nums, dp_frame, det_nums, bboxes))
182 |
183 | def read(self):
184 | return self.wait_and_get(self.fuse_queue)
185 |
--------------------------------------------------------------------------------
/src/Modules/myserver.py:
--------------------------------------------------------------------------------
1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python
2 | import os
3 | import sys
4 | from threading import Thread
5 | from queue import Queue
6 | import pickle
7 |
8 |
9 | class myServer():
10 | def __init__(self, opt, queueSize=128):
11 | self.opt = opt
12 | self.host = opt.host
13 | self.port = opt.port
14 | from multiprocessing.connection import Listener
15 | server_sock = Listener((self.host, self.port))
16 |
17 | self.conn = server_sock.accept()
18 | print('Server Listening')
19 |
20 | if opt.sp:
21 | self.data_queue = Queue(maxsize=queueSize)
22 | self.outptu_queue = Queue(maxsize=queueSize)
23 |
24 | def start_worker(self, target):
25 | if self.opt.sp:
26 | p = Thread(target=target, args=(), kwargs={})#target是該線程需要完成的方法函數
27 | # else:
28 | p.start()
29 | return p
30 |
31 | def start(self):
32 | self.server_worker = self.start_worker(self.frame_reader_process)
33 | self.data_send_worker = self.start_worker(self.data_send_process)
34 | return self
35 |
36 | def stop(self):
37 | self.server_worker.join()
38 | self.data_send_worker.join()
39 | self.clear_queues()
40 |
41 | def clear_queues(self):
42 | self.clear(self.data_queue)
43 |
44 | def clear(self, queue):
45 | while not queue.empty():
46 | queue.get()
47 |
48 | def wait_and_put(self, queue, item):
49 | queue.put(item)
50 |
51 | def wait_and_get(self, queue):
52 | return queue.get()
53 |
54 | def frame_reader_process(self, **kargcs):
55 | while True:
56 | data_bytes = self.conn.recv()
57 | data = pickle.loads(data_bytes)
58 | # print('Received:', type(data))
59 | self.wait_and_put(self.data_queue, data)
60 |
61 | def data_send_process(self):
62 | while True:
63 | (bboxes, pred_data) = self.wait_and_get(self.outptu_queue)
64 | pred_data_bytes = pickle.dumps((bboxes, pred_data))
65 | self.conn.send(pred_data_bytes)
66 |
67 | def read_data(self):
68 | return self.wait_and_get(self.data_queue)
69 |
70 | def get_data(self, bboxes, pred_data):
71 | self.wait_and_put(self.outptu_queue, (bboxes, pred_data))
--------------------------------------------------------------------------------
/src/Modules/poser.py:
--------------------------------------------------------------------------------
1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python
2 | import os
3 | import sys
4 | from sys import platform
5 | from threading import Thread
6 | from queue import Queue
7 |
8 | import cv2
9 | import scipy.misc
10 | import numpy as np
11 |
12 | sys.path.append('..')
13 | import core.utils as utils
14 | import core.operation as operation
15 |
16 | sys.path.append('/home/dongjai/openpose/build/python')
17 | from openpose import pyopenpose as op
18 |
19 | class PoseLoader():
20 | def __init__(self, opt, queueSize=128):
21 | self.opt = opt
22 | self.kp_num = opt.kp_num
23 | self._params = dict()
24 | self._params["model_folder"] = "/home/dongjai/openpose/models/"
25 | self._params["model_pose"] = "COCO"
26 | self._params["face"] = False
27 | self._params["hand"] = False
28 | self.pose_estimator = self._load_estimator()
29 |
30 | if opt.sp:
31 | self._stopped = False
32 | self.image_queue = Queue(maxsize=queueSize)#input queue
33 | self.pose_queue = Queue(maxsize=queueSize)
34 | else:
35 | self._stopped = mp.Value('b', False)
36 | self.image_queue = mp.Queue(maxsize=queueSize)
37 | self.pose_queue = mp.Queue(maxsize=queueSize)
38 |
39 | def _load_estimator(self):
40 | print("Loading Openpose......")
41 | self._opWrapper = op.WrapperPython()
42 | self._opWrapper.configure(self._params)
43 | self._opWrapper.start()
44 |
45 | datum = op.Datum()
46 | print("Successfully Loading Openpose")
47 | return datum
48 |
49 | def start_worker(self, target):
50 | if self.opt.sp:
51 | p = Thread(target=target, args=(), kwargs={})#target是該線程需要完成的方法函數
52 | # else:
53 | p.start()
54 | return p
55 |
56 | def start(self):
57 | self.estimator_worker = self.start_worker(self.estimator_process)
58 | return self
59 |
60 | def stop(self):
61 | self.estimator_worker.join()
62 | self.clear_queues()
63 |
64 | def clear_queues(self):
65 | self.clear(self.image_queue)
66 | self.clear(self.pose_queue)
67 |
68 | def clear(self, queue):
69 | while not queue.empty():
70 | queue.get()
71 |
72 | def wait_and_put(self, queue, item):
73 | queue.put(item)
74 |
75 | def wait_and_get(self, queue):
76 | return queue.get()
77 |
78 | def estimator_process(self, **kargcs):
79 | while True:
80 | (nums, frame) = self.wait_and_get(self.image_queue)
81 | self.pose_estimator.cvInputData = frame
82 | self._opWrapper.emplaceAndPop([self.pose_estimator])
83 | if not operation.is_get_keypoints(self.pose_estimator):
84 | print("No keypoints")
85 | self.wait_and_put(self.pose_queue, (nums, None))
86 | else:
87 | kps_list = operation.get_keypoints_xy(datum=self.pose_estimator, kp_num=self.kp_num)
88 | self.wait_and_put(self.pose_queue, (nums, kps_list))
89 |
90 | def put_image(self, nums, frame):
91 | self.wait_and_put(self.image_queue, (nums, frame))
92 |
93 | def read(self):
94 | return self.wait_and_get(self.pose_queue)
95 |
--------------------------------------------------------------------------------
/src/Modules/recognizer.py:
--------------------------------------------------------------------------------
1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python
2 | import sys
3 | from threading import Thread
4 | from queue import Queue
5 |
6 | import cv2
7 | import scipy.misc
8 | import numpy as np
9 | import tensorflow as tf
10 |
11 | sys.path.append('..')
12 | import core.utils as utils
13 | import core.operation as operation
14 |
15 | class ActionRecognizer():
16 | def __init__(self, opt, queueSize=128):
17 | self.opt = opt
18 | self.kps_size = opt.kps_usage_num
19 | self.mlp_weights = opt.mlp_weights
20 |
21 | self.act_recognizer = self._load_recognizer()
22 |
23 | if opt.sp:
24 | self._stopped = False
25 | self.data_queue = Queue(maxsize=queueSize)#input queue
26 | self.act_queue = Queue(maxsize=queueSize)
27 | else:
28 | self._stopped = mp.Value('b', False)
29 | self.data_queue = mp.Queue(maxsize=queueSize)
30 | self.act_queue = mp.Queue(maxsize=queueSize)
31 |
32 | def _load_recognizer(self):
33 | print("Loading Recognizer......")
34 | mlp_inputs = tf.keras.Input(shape=(3, self.kps_size, 1))
35 | mlp_outputs = operation.my_mlp(mlp_inputs)
36 | mlp_model = tf.keras.Model(inputs=mlp_inputs, outputs=mlp_outputs)
37 | mlp_model.compile(
38 | optimizer = tf.keras.optimizers.Adam(learning_rate=0.001),
39 | loss = tf.keras.losses.sparse_categorical_crossentropy,
40 | metrics = [tf.keras.metrics.sparse_categorical_accuracy]
41 | )
42 | mlp_model.load_weights(self.mlp_weights)
43 | print("Successfully Loading Recognizer")
44 | return mlp_model
45 |
46 | def start_worker(self, target):
47 | if self.opt.sp:
48 | p = Thread(target=target, args=(), kwargs={})#target是該線程需要完成的方法函數
49 | # else:
50 | p.start()
51 | return p
52 |
53 | def start(self):
54 | self.recognizer_worker = self.start_worker(self.recognizer_process)
55 | return self
56 |
57 | def stop(self):
58 | self.recognizer_worker.join()
59 | self.clear_queues()
60 |
61 | def clear_queues(self):
62 | self.clear(self.data_queue)
63 | self.clear(self.act_queue)
64 |
65 | def clear(self, queue):
66 | while not queue.empty():
67 | queue.get()
68 |
69 | def wait_and_put(self, queue, item):
70 | queue.put(item)
71 |
72 | def wait_and_get(self, queue):
73 | return queue.get()
74 |
75 | def recognizer_process(self, **kargcs):
76 | while True:
77 | (nums, data) = self.wait_and_get(self.data_queue)
78 | len_data = len(data)
79 | # print(len_data)
80 | _xyz_data = []
81 | # action_name = []
82 | for _t in data:
83 | _xyz_data.append(_t[1].tolist())
84 | _q = np.asarray(_xyz_data)
85 | action_pred = self.act_recognizer.predict(_q, batch_size=len_data)
86 | # print("pred: ",action_pred)
87 | action_name = operation.get_action_name(label=action_pred)#需要测试多人的情况
88 | # action_name.append(operation.get_action_name(label=action_pred))
89 | # print("name: ",action_name)
90 | for i, _t in enumerate(data):
91 | _t[1] = action_name[i]
92 | self.wait_and_put(self.act_queue, (nums, data))
93 |
94 | def put_data(self, nums, data):
95 | self.wait_and_put(self.data_queue, (nums, data))
96 |
97 | def read(self):
98 | return self.wait_and_get(self.act_queue)
--------------------------------------------------------------------------------
/src/core/backbone.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 | # coding=utf-8
3 | #================================================================
4 | # Copyright (C) 2019 * Ltd. All rights reserved.
5 | #
6 | # Editor : VIM
7 | # File name : backbone.py
8 | # Author : YunYang1994
9 | # Created date: 2019-07-11 23:37:51
10 | # Description :
11 | #
12 | #================================================================
13 |
14 | import tensorflow as tf
15 | import core.common as common
16 |
17 |
18 | def darknet53(input_data):
19 |
20 | input_data = common.convolutional(input_data, (3, 3, 3, 32))
21 | input_data = common.convolutional(input_data, (3, 3, 32, 64), downsample=True)
22 |
23 | for i in range(1):
24 | input_data = common.residual_block(input_data, 64, 32, 64)
25 |
26 | input_data = common.convolutional(input_data, (3, 3, 64, 128), downsample=True)
27 |
28 | for i in range(2):
29 | input_data = common.residual_block(input_data, 128, 64, 128)
30 |
31 | input_data = common.convolutional(input_data, (3, 3, 128, 256), downsample=True)
32 |
33 | for i in range(8):
34 | input_data = common.residual_block(input_data, 256, 128, 256)
35 |
36 | route_1 = input_data
37 | input_data = common.convolutional(input_data, (3, 3, 256, 512), downsample=True)
38 |
39 | for i in range(8):
40 | input_data = common.residual_block(input_data, 512, 256, 512)
41 |
42 | route_2 = input_data
43 | input_data = common.convolutional(input_data, (3, 3, 512, 1024), downsample=True)
44 |
45 | for i in range(4):
46 | input_data = common.residual_block(input_data, 1024, 512, 1024)
47 |
48 | return route_1, route_2, input_data
49 |
50 |
51 |
--------------------------------------------------------------------------------
/src/core/camera.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import random
3 | import colorsys
4 | import numpy as np
5 | import copy
6 | from core.config import cfg
7 | import pyrealsense2 as rs
8 | import json
9 |
10 | def wait4cam_init(rspipeline, WaitTimes):
11 | for i in range(WaitTimes):
12 | tmp_frame = rspipeline.wait_for_frames()
13 |
14 | def find_device_that_supports_advanced_mode():
15 | DS5_product_ids = ["0AD1", "0AD2", "0AD3", "0AD4", "0AD5", "0AF6", "0AFE", "0AFF", "0B00", "0B01", "0B03", "0B07","0B3A"]
16 | ctx = rs.context()
17 | ds5_dev = rs.device()
18 | devices = ctx.query_devices()
19 | for dev in devices:
20 | if dev.supports(rs.camera_info.product_id) and str(dev.get_info(rs.camera_info.product_id)) in DS5_product_ids:
21 | if dev.supports(rs.camera_info.name):
22 | print("Found device that supports advanced mode:", dev.get_info(rs.camera_info.name))
23 | return dev
24 | raise Exception("No device that supports advanced mode was found")
25 |
26 | def try_to_advanced_mode(json_file_path):
27 | dev = find_device_that_supports_advanced_mode()
28 | advnc_mode = rs.rs400_advanced_mode(dev)
29 | print("Advanced mode is", "enabled" if advnc_mode.is_enabled() else "disabled")
30 |
31 | # Loop until we successfully enable advanced mode
32 | while not advnc_mode.is_enabled():
33 | print("Trying to enable advanced mode...")
34 | advnc_mode.toggle_advanced_mode(True)
35 | # At this point the device will disconnect and re-connect.
36 | print("Sleeping for 5 seconds...")
37 | time.sleep(5)
38 | # The 'dev' object will become invalid and we need to initialize it again
39 | dev = find_device_that_supports_advanced_mode()
40 | advnc_mode = rs.rs400_advanced_mode(dev)
41 | print("Advanced mode is", "enabled" if advnc_mode.is_enabled() else "disabled")
42 |
43 | # Load json setting file
44 | jsonload = json.load(open(json_file_path))
45 | json_string = str(jsonload).replace("'", '\"')
46 | advnc_mode.load_json(json_string)
47 | return dev, advnc_mode
48 |
49 | def get_depth_distance(event,x,y,flags,param):
50 | if event == cv2.EVENT_MOUSEMOVE:
51 | # print the distance of depth for pixel of mouse
52 | print("point: " + str(x) + ", " + str(y))
53 | print("pixel: ", aligned_depth_8bit[y, x])
54 | print("dis: " + str(aligned_depth_frame.get_distance(x, y)))
--------------------------------------------------------------------------------
/src/core/common.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 | # coding=utf-8
3 | #================================================================
4 | # Copyright (C) 2019 * Ltd. All rights reserved.
5 | #
6 | # Editor : VIM
7 | # File name : common.py
8 | # Author : YunYang1994
9 | # Created date: 2019-07-11 23:12:53
10 | # Description :
11 | #
12 | #================================================================
13 |
14 | import tensorflow as tf
15 |
16 | class BatchNormalization(tf.keras.layers.BatchNormalization):
17 | """
18 | "Frozen state" and "inference mode" are two separate concepts.
19 | `layer.trainable = False` is to freeze the layer, so the layer will use
20 | stored moving `var` and `mean` in the "inference mode", and both `gama`
21 | and `beta` will not be updated !
22 | """
23 | def call(self, x, training=False):
24 | #if not training:
25 | if training is not None:
26 | training = tf.constant(False)
27 | training = tf.logical_and(training, self.trainable)
28 | return super().call(x, training)
29 |
30 | def convolutional(input_layer, filters_shape, downsample=False, activate=True, bn=True):
31 | if downsample:
32 | input_layer = tf.keras.layers.ZeroPadding2D(((1, 0), (1, 0)))(input_layer)
33 | padding = 'valid'
34 | strides = 2
35 | else:
36 | strides = 1
37 | padding = 'same'
38 |
39 | conv = tf.keras.layers.Conv2D(filters=filters_shape[-1], kernel_size = filters_shape[0], strides=strides, padding=padding,
40 | use_bias=not bn, kernel_regularizer=tf.keras.regularizers.l2(0.0005),
41 | kernel_initializer=tf.random_normal_initializer(stddev=0.01),
42 | bias_initializer=tf.constant_initializer(0.))(input_layer)
43 |
44 | if bn: conv = BatchNormalization()(conv)
45 | if activate == True: conv = tf.nn.leaky_relu(conv, alpha=0.1)
46 |
47 | return conv
48 |
49 | def residual_block(input_layer, input_channel, filter_num1, filter_num2):
50 | short_cut = input_layer
51 | conv = convolutional(input_layer, filters_shape=(1, 1, input_channel, filter_num1))
52 | conv = convolutional(conv , filters_shape=(3, 3, filter_num1, filter_num2))
53 |
54 | residual_output = short_cut + conv
55 | return residual_output
56 |
57 | def upsample(input_layer):
58 | return tf.image.resize(input_layer, (input_layer.shape[1] * 2, input_layer.shape[2] * 2), method='nearest')
59 |
60 |
--------------------------------------------------------------------------------
/src/core/config.py:
--------------------------------------------------------------------------------
1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python
2 | # coding=utf-8
3 | #================================================================
4 | # Copyright (C) 2019 * Ltd. All rights reserved.
5 | #
6 | # Editor : VIM
7 | # File name : config.py
8 | # Author : YunYang1994
9 | # Created date: 2019-02-28 13:06:54
10 | # Description :
11 | #
12 | #================================================================
13 |
14 | from easydict import EasyDict as edict
15 |
16 | __C = edict()
17 | # Consumers can get config by: from config import cfg
18 |
19 | cfg = __C
20 |
21 | # YOLO options
22 | __C.YOLO = edict()
23 |
24 | # Set the class name
25 | __C.YOLO.CLASSES = '/home/dongjai/catkin_ws/src/act_recognizer/src/data/classes/coco.names'
26 | #__C.YOLO.CLASSES = "./data/classes/yymnist.names"
27 | __C.YOLO.ANCHORS = '/home/dongjai/catkin_ws/src/act_recognizer/src/data/anchors/basline_anchors.txt'
28 | __C.YOLO.STRIDES = [8, 16, 32]
29 | __C.YOLO.ANCHOR_PER_SCALE = 3
30 | __C.YOLO.IOU_LOSS_THRESH = 0.5
31 |
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/src/core/dataset.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 | # coding=utf-8
3 | #================================================================
4 | # Copyright (C) 2019 * Ltd. All rights reserved.
5 | #
6 | # Editor : VIM
7 | # File name : dataset.py
8 | # Author : YunYang1994
9 | # Created date: 2019-03-15 18:05:03
10 | # Description :
11 | #
12 | #================================================================
13 |
14 | import os
15 | import cv2
16 | import random
17 | import numpy as np
18 | import tensorflow as tf
19 | import core.utils as utils
20 | from core.config import cfg
21 |
22 |
23 |
24 | class Dataset(object):
25 | """implement Dataset here"""
26 | def __init__(self, dataset_type):
27 | self.annot_path = cfg.TRAIN.ANNOT_PATH if dataset_type == 'train' else cfg.TEST.ANNOT_PATH
28 | self.input_sizes = cfg.TRAIN.INPUT_SIZE if dataset_type == 'train' else cfg.TEST.INPUT_SIZE
29 | self.batch_size = cfg.TRAIN.BATCH_SIZE if dataset_type == 'train' else cfg.TEST.BATCH_SIZE
30 | self.data_aug = cfg.TRAIN.DATA_AUG if dataset_type == 'train' else cfg.TEST.DATA_AUG
31 |
32 | self.train_input_sizes = cfg.TRAIN.INPUT_SIZE
33 | self.strides = np.array(cfg.YOLO.STRIDES)
34 | self.classes = utils.read_class_names(cfg.YOLO.CLASSES)
35 | self.num_classes = len(self.classes)
36 | self.anchors = np.array(utils.get_anchors(cfg.YOLO.ANCHORS))
37 | self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE
38 | self.max_bbox_per_scale = 150
39 |
40 | self.annotations = self.load_annotations(dataset_type)
41 | self.num_samples = len(self.annotations)
42 | self.num_batchs = int(np.ceil(self.num_samples / self.batch_size))
43 | self.batch_count = 0
44 |
45 |
46 | def load_annotations(self, dataset_type):
47 | with open(self.annot_path, 'r') as f:
48 | txt = f.readlines()
49 | annotations = [line.strip() for line in txt if len(line.strip().split()[1:]) != 0]
50 | np.random.shuffle(annotations)
51 | return annotations
52 |
53 | def __iter__(self):
54 | return self
55 |
56 | def __next__(self):
57 |
58 | with tf.device('/cpu:0'):
59 | self.train_input_size = random.choice(self.train_input_sizes)
60 | self.train_output_sizes = self.train_input_size // self.strides
61 |
62 | batch_image = np.zeros((self.batch_size, self.train_input_size, self.train_input_size, 3), dtype=np.float32)
63 |
64 | batch_label_sbbox = np.zeros((self.batch_size, self.train_output_sizes[0], self.train_output_sizes[0],
65 | self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
66 | batch_label_mbbox = np.zeros((self.batch_size, self.train_output_sizes[1], self.train_output_sizes[1],
67 | self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
68 | batch_label_lbbox = np.zeros((self.batch_size, self.train_output_sizes[2], self.train_output_sizes[2],
69 | self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
70 |
71 | batch_sbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)
72 | batch_mbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)
73 | batch_lbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)
74 |
75 | num = 0
76 | if self.batch_count < self.num_batchs:
77 | while num < self.batch_size:
78 | index = self.batch_count * self.batch_size + num
79 | if index >= self.num_samples: index -= self.num_samples
80 | annotation = self.annotations[index]
81 | image, bboxes = self.parse_annotation(annotation)
82 | label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes = self.preprocess_true_boxes(bboxes)
83 |
84 | batch_image[num, :, :, :] = image
85 | batch_label_sbbox[num, :, :, :, :] = label_sbbox
86 | batch_label_mbbox[num, :, :, :, :] = label_mbbox
87 | batch_label_lbbox[num, :, :, :, :] = label_lbbox
88 | batch_sbboxes[num, :, :] = sbboxes
89 | batch_mbboxes[num, :, :] = mbboxes
90 | batch_lbboxes[num, :, :] = lbboxes
91 | num += 1
92 | self.batch_count += 1
93 | batch_smaller_target = batch_label_sbbox, batch_sbboxes
94 | batch_medium_target = batch_label_mbbox, batch_mbboxes
95 | batch_larger_target = batch_label_lbbox, batch_lbboxes
96 |
97 | return batch_image, (batch_smaller_target, batch_medium_target, batch_larger_target)
98 | else:
99 | self.batch_count = 0
100 | np.random.shuffle(self.annotations)
101 | raise StopIteration
102 |
103 | def random_horizontal_flip(self, image, bboxes):
104 |
105 | if random.random() < 0.5:
106 | _, w, _ = image.shape
107 | image = image[:, ::-1, :]
108 | bboxes[:, [0,2]] = w - bboxes[:, [2,0]]
109 |
110 | return image, bboxes
111 |
112 | def random_crop(self, image, bboxes):
113 |
114 | if random.random() < 0.5:
115 | h, w, _ = image.shape
116 | max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1)
117 |
118 | max_l_trans = max_bbox[0]
119 | max_u_trans = max_bbox[1]
120 | max_r_trans = w - max_bbox[2]
121 | max_d_trans = h - max_bbox[3]
122 |
123 | crop_xmin = max(0, int(max_bbox[0] - random.uniform(0, max_l_trans)))
124 | crop_ymin = max(0, int(max_bbox[1] - random.uniform(0, max_u_trans)))
125 | crop_xmax = max(w, int(max_bbox[2] + random.uniform(0, max_r_trans)))
126 | crop_ymax = max(h, int(max_bbox[3] + random.uniform(0, max_d_trans)))
127 |
128 | image = image[crop_ymin : crop_ymax, crop_xmin : crop_xmax]
129 |
130 | bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - crop_xmin
131 | bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - crop_ymin
132 |
133 | return image, bboxes
134 |
135 | def random_translate(self, image, bboxes):
136 |
137 | if random.random() < 0.5:
138 | h, w, _ = image.shape
139 | max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1)
140 |
141 | max_l_trans = max_bbox[0]
142 | max_u_trans = max_bbox[1]
143 | max_r_trans = w - max_bbox[2]
144 | max_d_trans = h - max_bbox[3]
145 |
146 | tx = random.uniform(-(max_l_trans - 1), (max_r_trans - 1))
147 | ty = random.uniform(-(max_u_trans - 1), (max_d_trans - 1))
148 |
149 | M = np.array([[1, 0, tx], [0, 1, ty]])
150 | image = cv2.warpAffine(image, M, (w, h))
151 |
152 | bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + tx
153 | bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + ty
154 |
155 | return image, bboxes
156 |
157 | def parse_annotation(self, annotation):
158 |
159 | line = annotation.split()
160 | image_path = line[0]
161 | if not os.path.exists(image_path):
162 | raise KeyError("%s does not exist ... " %image_path)
163 | image = cv2.imread(image_path)
164 | bboxes = np.array([list(map(int, box.split(','))) for box in line[1:]])
165 |
166 | if self.data_aug:
167 | image, bboxes = self.random_horizontal_flip(np.copy(image), np.copy(bboxes))
168 | image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes))
169 | image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes))
170 |
171 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
172 | image, bboxes = utils.image_preporcess(np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes))
173 | return image, bboxes
174 |
175 | def bbox_iou(self, boxes1, boxes2):
176 |
177 | boxes1 = np.array(boxes1)
178 | boxes2 = np.array(boxes2)
179 |
180 | boxes1_area = boxes1[..., 2] * boxes1[..., 3]
181 | boxes2_area = boxes2[..., 2] * boxes2[..., 3]
182 |
183 | boxes1 = np.concatenate([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
184 | boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
185 | boxes2 = np.concatenate([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
186 | boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
187 |
188 | left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
189 | right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])
190 |
191 | inter_section = np.maximum(right_down - left_up, 0.0)
192 | inter_area = inter_section[..., 0] * inter_section[..., 1]
193 | union_area = boxes1_area + boxes2_area - inter_area
194 |
195 | return inter_area / union_area
196 |
197 | def preprocess_true_boxes(self, bboxes):
198 |
199 | label = [np.zeros((self.train_output_sizes[i], self.train_output_sizes[i], self.anchor_per_scale,
200 | 5 + self.num_classes)) for i in range(3)]
201 | bboxes_xywh = [np.zeros((self.max_bbox_per_scale, 4)) for _ in range(3)]
202 | bbox_count = np.zeros((3,))
203 |
204 | for bbox in bboxes:
205 | bbox_coor = bbox[:4]
206 | bbox_class_ind = bbox[4]
207 |
208 | onehot = np.zeros(self.num_classes, dtype=np.float)
209 | onehot[bbox_class_ind] = 1.0
210 | uniform_distribution = np.full(self.num_classes, 1.0 / self.num_classes)
211 | deta = 0.01
212 | smooth_onehot = onehot * (1 - deta) + deta * uniform_distribution
213 |
214 | bbox_xywh = np.concatenate([(bbox_coor[2:] + bbox_coor[:2]) * 0.5, bbox_coor[2:] - bbox_coor[:2]], axis=-1)
215 | bbox_xywh_scaled = 1.0 * bbox_xywh[np.newaxis, :] / self.strides[:, np.newaxis]
216 |
217 | iou = []
218 | exist_positive = False
219 | for i in range(3):
220 | anchors_xywh = np.zeros((self.anchor_per_scale, 4))
221 | anchors_xywh[:, 0:2] = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5
222 | anchors_xywh[:, 2:4] = self.anchors[i]
223 |
224 | iou_scale = self.bbox_iou(bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh)
225 | iou.append(iou_scale)
226 | iou_mask = iou_scale > 0.3
227 |
228 | if np.any(iou_mask):
229 | xind, yind = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32)
230 |
231 | label[i][yind, xind, iou_mask, :] = 0
232 | label[i][yind, xind, iou_mask, 0:4] = bbox_xywh
233 | label[i][yind, xind, iou_mask, 4:5] = 1.0
234 | label[i][yind, xind, iou_mask, 5:] = smooth_onehot
235 |
236 | bbox_ind = int(bbox_count[i] % self.max_bbox_per_scale)
237 | bboxes_xywh[i][bbox_ind, :4] = bbox_xywh
238 | bbox_count[i] += 1
239 |
240 | exist_positive = True
241 |
242 | if not exist_positive:
243 | best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1)
244 | best_detect = int(best_anchor_ind / self.anchor_per_scale)
245 | best_anchor = int(best_anchor_ind % self.anchor_per_scale)
246 | xind, yind = np.floor(bbox_xywh_scaled[best_detect, 0:2]).astype(np.int32)
247 |
248 | label[best_detect][yind, xind, best_anchor, :] = 0
249 | label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh
250 | label[best_detect][yind, xind, best_anchor, 4:5] = 1.0
251 | label[best_detect][yind, xind, best_anchor, 5:] = smooth_onehot
252 |
253 | bbox_ind = int(bbox_count[best_detect] % self.max_bbox_per_scale)
254 | bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh
255 | bbox_count[best_detect] += 1
256 | label_sbbox, label_mbbox, label_lbbox = label
257 | sbboxes, mbboxes, lbboxes = bboxes_xywh
258 | return label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes
259 |
260 | def __len__(self):
261 | return self.num_batchs
262 |
263 |
264 |
265 |
266 |
--------------------------------------------------------------------------------
/src/core/operation.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import copy
4 | import tensorflow as tf
5 |
6 | def list_sortby(bboxes_one, index, order):
7 | '''
8 | sort list. list likes [ [1,2,3,4], [5,6,7,8], [9,1,2,3]].
9 | index refers to the inner [] number index.
10 | index=4 means it will sort this list according 4 8 3
11 | ranked by descending order=0, increasing order=1
12 | '''
13 | if order == 0:
14 | tmp_len = len(bboxes_one)
15 | for i in range(tmp_len):#排序
16 | for j in range(tmp_len):
17 | tmp_t = bboxes_one[i][index]
18 | tmp_k = bboxes_one[j][index]
19 | if tmp_t > tmp_k:
20 | tmp = bboxes_one[i]
21 | bboxes_one[i] = bboxes_one[j]
22 | bboxes_one[j] = tmp
23 | if order == 1:
24 | tmp_len = len(bboxes_one)
25 | for i in range(tmp_len):#排序
26 | for j in range(tmp_len):
27 | tmp_t = bboxes_one[i][index]
28 | tmp_k = bboxes_one[j][index]
29 | if tmp_t < tmp_k:
30 | tmp = bboxes_one[i]
31 | bboxes_one[i] = bboxes_one[j]
32 | bboxes_one[j] = tmp
33 | return bboxes_one
34 |
35 | def detect_only_one(bboxes, IDi):
36 | '''
37 | Select one specific class to output according the class ID.
38 | '''
39 | bboxes_one = []
40 | # bboxes_one_out = []
41 | bboxes_num = len(bboxes)
42 | if bboxes_num > 0:
43 | for j in range(bboxes_num):
44 | if int(bboxes[j][5]) == IDi:#只过滤等于ID的标签,person==0, car==2
45 | bboxes_one.append(bboxes[j])
46 | return bboxes_one
47 |
48 | def pick_top_n(bboxes, top_n):
49 | '''
50 | It will select top n elements in bboxes(a list) as output.\\
51 | If the list of bboxes was sorted by increasing order, it will select lower elements.
52 | '''
53 | tmp_len = len(bboxes)
54 | if tmp_len < top_n :
55 | print("only have : ", tmp_len)
56 | top_n = tmp_len
57 | return bboxes[:top_n]
58 |
59 |
60 | def select_and_pick(bboxes, index, order, top_n, gap=False):
61 | '''
62 | sorted according to index,
63 | and ranked by descending order=0, increasing order=1
64 | then select top_n elements as output
65 | '''
66 | tmp = pick_top_n(list_sortby(bboxes, index, order), top_n)
67 | if gap:
68 | tmp_out = []
69 | for i in range(len(tmp)):
70 | t = tmp[i][index]
71 | # print("prob", t)
72 | if t >= gap[0] and t < gap[1]:
73 | tmp_out.append(tmp[i])
74 | else:
75 | tmp_out = tmp.copy()
76 | return tmp_out
77 |
78 | def select_in_gap(bboxes, gap, alpha, live=False):
79 | '''
80 | If depth frame is 16bit frame for each pixel, alpha should be equal to 0.
81 | If it is 8bit frame, alpha = 0.3
82 | '''
83 | i = len(bboxes)
84 | t = 0
85 | while t != i :
86 | if live:
87 | dist_16bit = bboxes[t][5] * 1000
88 | else:
89 | if alpha == 0:
90 | dist_16bit = bboxes[t][5]
91 | else:
92 | dist_16bit = bboxes[t][5] / alpha
93 | if dist_16bit >= float(gap[0]) and dist_16bit <= float(gap[1]):
94 | t = t + 1
95 | else:
96 | del bboxes[t]
97 | i = i - 1
98 | return bboxes
99 |
100 |
101 | def cut_bbox(image, bboxes, ext_pix_x, ext_pix_y):
102 | """
103 | bboxes: [x_min, y_min, x_max, y_max, probability, depth] format coordinates.
104 | OUTPUT :
105 | left_top_list is a list that containing each single img left top point x,y values for frame(original size image),
106 | img_set is in BGR style
107 | """
108 | # img_blank = np.zeros(shape=image.shape, dtype=image.dtype)
109 | img_set = []
110 | left_top_list = []
111 |
112 | for i, bbox in enumerate(bboxes):
113 | coor = np.array(bbox[:4], dtype=np.int32)
114 | if (coor[0] - ext_pix_x) < 0 :
115 | x1 = 0
116 | else:
117 | x1 = coor[0] - ext_pix_x
118 | if (coor[1] - ext_pix_y) < 0 :
119 | y1 = 0
120 | else:
121 | y1 = coor[1] - ext_pix_y
122 | if (coor[2] + ext_pix_x) > image.shape[1]:
123 | x2 = image.shape[1]
124 | else:
125 | x2 = coor[2] + ext_pix_x
126 | if (coor[3] + ext_pix_y) > image.shape[0]:
127 | y2 = image.shape[0]
128 | else:
129 | y2 = coor[3] + ext_pix_y
130 | c1, c2 = (x1, y1), (x2, y2) # (x, y)
131 | img_one_blank = np.zeros(shape=(y2 - y1, x2 - x1, 3), dtype=image.dtype)
132 | img_one_blank[:, :, 0] = image[y1:y2, x1:x2, 2] #image->RGB img_one_blank->BGR
133 | img_one_blank[:, :, 1] = image[y1:y2, x1:x2, 1]
134 | img_one_blank[:, :, 2] = image[y1:y2, x1:x2, 0]
135 | img_set.append(img_one_blank)
136 | left_top_list.append([x1, y1])
137 | # cv2.namedWindow("cut_result", cv2.WINDOW_AUTOSIZE)
138 | # cut_result = cv2.cvtColor(img_one_blank, cv2.COLOR_RGB2BGR)
139 | # cv2.imshow("cut_result", cut_result)
140 | # img_set.append(img_blank)#返回的图像和原图frame尺寸一致,只有bbox内的图像,但各自不是独立的
141 | return img_set, left_top_list
142 |
143 | def get_centerpoints_xyz(bboxes, depth_frame, live=False, live_frame=None):
144 | '''
145 | bboxes [x1, y1, x2, y2, prob, cls]
146 | Change the cls into depth value
147 | '''
148 | bboxes_dp = copy.deepcopy(bboxes)
149 | for i in range(len(bboxes_dp)):
150 | x1 = bboxes_dp[i][0]
151 | y1 = bboxes_dp[i][1]
152 | x2 = bboxes_dp[i][2]
153 | y2 = bboxes_dp[i][3]
154 | c1 = int(( x2 - x1 ) / 2)
155 | c2 = int(( y2 - y1 ) / 2)
156 | if live:
157 | dp_val = live_frame.get_distance(c1, c2)
158 | else:
159 | dp_val = depth_frame[c2, c1]#depth_frame[c2, c1]
160 | bboxes_dp[i][-1] = dp_val
161 | return bboxes_dp
162 |
163 | def is_get_keypoints(datum):
164 | '''
165 | Judge the keypoints set is empty or not.
166 | '''
167 | try:
168 | a = len(datum.poseKeypoints)
169 | # print("len keypoinst", a)
170 | except:
171 | return False
172 | return True
173 |
174 | def get_keypoints_xy(datum, kp_num):
175 | '''
176 | Get keypoint (x, y) and put them into a list as return.
177 | '''
178 | kps_list = []
179 | for j in datum.poseKeypoints:
180 | kp_list = []
181 | for i in kp_num:
182 | x = j[i][0]
183 | y = j[i][1]
184 | kp_list.append([x, y]) #[x, y]
185 | kps_list.append(kp_list)
186 | return kps_list
187 |
188 | def get_keypoints_xyz(kp_list, depth_frame=None, nn_pix=3, live=False, live_frame=None): #dp_frame.shape = (y, x) 单通道, nn_num 中心点近邻像素数量
189 | '''
190 | nn_pix is neighbor pixel of the central pixel\\
191 | live is a symbol of livecam or not\\
192 | live_frame is depth frame flow grasp from camera
193 | '''
194 | # testkp_x, testkp_y = [326, 280, 365, 302, 343], [127, 269, 264, 335, 335]
195 | dp_nparray = np.ndarray(shape=(len(kp_list), nn_pix, nn_pix, 3)) #这里的3存的是一个点的xyz信息
196 | for i in range(len(kp_list)):
197 | kp_x, kp_y = kp_list[i][0], kp_list[i][1]
198 | gap = int((nn_pix - 1)/2)
199 | kp_lt_x, kp_lt_y = int(kp_x - gap), int(kp_y - gap) #left top cornor point
200 | #纠正方格块,如果方格块有部分已经超出图像边缘,则把左上角坐标移动回到图像边缘
201 | if kp_lt_x < 0:
202 | kp_lt_x = 0
203 | if kp_lt_y < 0:
204 | kp_lt_y = 0
205 | if live:
206 | dp_frame_x = live_frame.get_width()
207 | dp_frame_y = live_frame.get_height()
208 | else:
209 | dp_frame_x = depth_frame.shape[1]
210 | dp_frame_y = depth_frame.shape[0]
211 | #纠正方格块,如果方格块有部分已经超出图像边缘,防止方块右边超出边缘
212 | kp_most_x = kp_lt_x + (nn_pix - 1)
213 | if kp_most_x >= dp_frame_x:
214 | x_diff = kp_most_x - dp_frame_x
215 | kp_lt_x = kp_lt_x - x_diff - 1
216 | kp_most_y = kp_lt_y + (nn_pix - 1)
217 | if kp_most_y >= dp_frame_y:
218 | y_diff = kp_most_y - dp_frame_y
219 | kp_lt_y = kp_lt_y - y_diff - 1
220 |
221 | for t in range(nn_pix):
222 | for p in range(nn_pix):
223 | # if kp_lt_y + t < 0 or kp_lt_y + t >= depth_frame.shape[0] or kp_lt_x + p < 0 or kp_lt_x + p >= depth_frame.shape[1]: #判断加上nn pix的时候是否会超出范围,超出范围的取0
224 | # dp_nparray[i][t][p] = [kp_lt_x + p, kp_lt_y + t, 0]
225 | # else:
226 | if live:
227 | dp_frame_val = live_frame.get_distance(kp_lt_x + p, kp_lt_y + t)
228 | else:
229 | dp_frame_val = depth_frame[kp_lt_y + t, kp_lt_x + p]
230 | dp_nparray[i][t][p] = [kp_lt_x + p, kp_lt_y + t, dp_frame_val]
231 | # kp_lt_y = kp_lt_y + 1
232 | return dp_nparray
233 |
234 | def filter_kp_list(kp_list, norm_p_index=0):
235 | if kp_list[norm_p_index][0] == 0 or kp_list[norm_p_index][1] == 0:
236 | return True
237 | return False
238 |
239 | def get_mean_dep_val_xyz(kp_list, dp_nparray):
240 | dp_list = []
241 | for i in range(len(dp_nparray)):
242 | len_size = len(dp_nparray[i][0])
243 | dp_val = 0
244 | # print("len_size:",len_size)
245 | for j in range(len_size):
246 | for t in range(len_size):
247 | # print("nparray",dp_nparray[i][j][t])
248 | dp_val = dp_val + dp_nparray[i][j][t][-1]
249 | dp_mean = dp_val / (len_size**2)
250 | dp_list.append([kp_list[i][0], kp_list[i][1], dp_mean])
251 | # print("mean",[kp_list[i][0], kp_list[i][1], dp_mean])
252 | return dp_list
253 |
254 | def norm_keypoints_xyz(dp_list, norm_p_index, rgb_size):
255 | '''
256 | (x, y) is used to normalizate.
257 | OUTPUT will be reshape as (3,5)
258 | '''
259 | x_max, y_max = rgb_size[0], rgb_size[1]
260 | dp_nparray = np.asarray(dp_list)
261 | dp_rs_array = dp_nparray.T
262 | z_max = dp_rs_array[2].max()
263 | z_min = dp_rs_array[2].min()
264 | dp_rs_array[0] = dp_rs_array[0] / x_max#最大最小归一化。x坐标/图像长度width,最小位置为0,直接省略掉了
265 | dp_rs_array[1] = dp_rs_array[1] / y_max
266 | dp_rs_array[2] = (dp_rs_array[2] - z_min) / (z_max - z_min)
267 | # 距离摄像机远近不同,切割出来的人像大小会有所不同,但是五个骨骼点的深度差是不变的,骨骼点深度差只与人的动作有关(在realsense返回的深度值准确的前提下)
268 | # z 深度是需要归一化的,需要变成 [0, 1] ,这样的情况下才能去掉量纲,和另外两个维度可以一起在网络中训练
269 | zero_diff = [[dp_rs_array[0][norm_p_index]], [dp_rs_array[1][norm_p_index]], [dp_rs_array[2][norm_p_index]]]
270 | # print("zero diff : ",zero_diff)
271 | dp_array = np.subtract(dp_rs_array, zero_diff)
272 | # print("dp array :\n ", dp_array)
273 | return dp_array
274 |
275 | def no_negative(array):
276 | min_diff = [[array[0].min()], [array[1].min()], [array[2].min()]]
277 | array_out = array - min_diff
278 | return array_out
279 |
280 | def filter_zero(listin, threadhold):
281 | list_shape = listin.shape
282 | for i in range(list_shape[0]):
283 | for j in range(list_shape[1]):
284 | if listin[i][j] < threadhold:
285 | listin[i][j] = 0
286 | return listin
287 |
288 | def get_action_name(label):
289 | # label_dict = {0: "walk", 1: "stop", 2: "come", 3: "phone", 4: "open", 5: "stand"}
290 | output = []
291 | for _y in label:
292 | y_list = _y.tolist()
293 | label_pred = y_list.index(max(y_list))
294 | output.append(label_pred)
295 | # print(label_pred)
296 | return output
297 |
298 | def my_mlp(inputs):
299 | x = tf.keras.layers.Flatten()(inputs)
300 | x = tf.keras.layers.Dense(units=20, activation=tf.nn.relu, use_bias=True)(x)
301 | x = tf.keras.layers.Dense(units=35, activation=tf.nn.relu, use_bias=True)(x)
302 | x = tf.keras.layers.Dense(units=15, activation=tf.nn.relu, use_bias=True)(x)
303 | x = tf.keras.layers.Dense(units=6)(x)
304 | # x = tf.keras.layers.Dense(units=20, activation=tf.nn.relu, use_bias=True)(x)
305 | # x = tf.keras.layers.Dense(units=10, activation=tf.nn.relu, use_bias=True)(x)
306 | # x = tf.keras.layers.Dense(units=6)(x)
307 | # x = tf.keras.layers.Dense(units=20, activation=tf.nn.relu, use_bias=True)(x)#mlp3
308 | # x = tf.keras.layers.Dense(units=35, activation=tf.nn.relu, use_bias=True)(x)
309 | # x = tf.keras.layers.Dense(units=15, activation=tf.nn.relu, use_bias=True)(x)
310 | # x = tf.keras.layers.Dense(units=6)(x)
311 | outputs = tf.keras.layers.Softmax()(x)
312 | return outputs
313 |
314 |
--------------------------------------------------------------------------------
/src/core/utils.py:
--------------------------------------------------------------------------------
1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python
2 | # coding=utf-8
3 | #================================================================
4 | # Copyright (C) 2019 * Ltd. All rights reserved.
5 | #
6 | # Editor : VIM
7 | # File name : utils.py
8 | # Author : YunYang1994
9 | # Created date: 2019-07-12 01:33:38
10 | # Description :
11 | #
12 | #================================================================
13 |
14 | import cv2
15 | import random
16 | import colorsys
17 | import numpy as np
18 |
19 | def load_weights(model, weights_file):
20 | """
21 | I agree that this code is very ugly, but I don’t know any better way of doing it.
22 | """
23 | wf = open(weights_file, 'rb')
24 | major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5)
25 |
26 | j = 0
27 | for i in range(75):
28 | conv_layer_name = 'conv2d_%d' %i if i > 0 else 'conv2d'
29 | bn_layer_name = 'batch_normalization_%d' %j if j > 0 else 'batch_normalization'
30 |
31 | conv_layer = model.get_layer(conv_layer_name)
32 | filters = conv_layer.filters
33 | k_size = conv_layer.kernel_size[0]
34 | in_dim = conv_layer.input_shape[-1]
35 |
36 | if i not in [58, 66, 74]:
37 | # darknet weights: [beta, gamma, mean, variance]
38 | bn_weights = np.fromfile(wf, dtype=np.float32, count=4 * filters)
39 | # tf weights: [gamma, beta, mean, variance]
40 | bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]]
41 | bn_layer = model.get_layer(bn_layer_name)
42 | j += 1
43 | else:
44 | conv_bias = np.fromfile(wf, dtype=np.float32, count=filters)
45 |
46 | # darknet shape (out_dim, in_dim, height, width)
47 | conv_shape = (filters, in_dim, k_size, k_size)
48 | conv_weights = np.fromfile(wf, dtype=np.float32, count=np.product(conv_shape))
49 | # tf shape (height, width, in_dim, out_dim)
50 | conv_weights = conv_weights.reshape(conv_shape).transpose([2, 3, 1, 0])
51 |
52 | if i not in [58, 66, 74]:
53 | conv_layer.set_weights([conv_weights])
54 | bn_layer.set_weights(bn_weights)
55 | else:
56 | conv_layer.set_weights([conv_weights, conv_bias])
57 |
58 | assert len(wf.read()) == 0, 'failed to read all data'
59 | wf.close()
60 |
61 |
62 | def read_class_names(class_file_name):
63 | '''loads class name from a file'''
64 | names = {}
65 | with open(class_file_name, 'r') as data:
66 | for ID, name in enumerate(data):
67 | names[ID] = name.strip('\n')
68 | return names
69 |
70 |
71 | def get_anchors(anchors_path):
72 | '''loads the anchors from a file'''
73 | with open(anchors_path) as f:
74 | anchors = f.readline()
75 | anchors = np.array(anchors.split(','), dtype=np.float32)
76 | return anchors.reshape(3, 3, 2)
77 |
78 |
79 | def image_preporcess(image, target_size, gt_boxes=None):
80 |
81 | ih, iw = target_size
82 | h, w, _ = image.shape
83 |
84 | scale = min(iw/w, ih/h)
85 | nw, nh = int(scale * w), int(scale * h)
86 | image_resized = cv2.resize(image, (nw, nh))
87 |
88 | image_paded = np.full(shape=[ih, iw, 3], fill_value=128.0)
89 | dw, dh = (iw - nw) // 2, (ih-nh) // 2
90 | image_paded[dh:nh+dh, dw:nw+dw, :] = image_resized
91 | image_paded = image_paded / 255.
92 |
93 | if gt_boxes is None:
94 | return image_paded
95 |
96 | else:
97 | gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * scale + dw
98 | gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh
99 | return image_paded, gt_boxes
100 |
101 |
102 | def draw_bbox(image, bboxes, pred=None, num_classes=6, classes=None, show_label=True):
103 | """
104 | bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates.
105 | """
106 | image_h, image_w, _ = image.shape
107 | # num_classes += 1# 多一个类别,因为可能只检测到人或骨骼
108 | hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
109 | colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
110 | colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
111 |
112 | random.seed(0)
113 | random.shuffle(colors)
114 | random.seed(None)
115 |
116 | if bboxes and pred:
117 | for i, bbox in enumerate(bboxes):
118 | coor = np.array(bbox[:4], dtype=np.int32)
119 | fontScale = 0.5
120 | class_ind = pred[i][1]
121 | bbox_color = colors[class_ind]
122 | bbox_thick = int(0.6 * (image_h + image_w) / 600)
123 | c1, c2 = (coor[0], coor[1]), (coor[2], coor[3])
124 | cv2.rectangle(image, c1, c2, bbox_color, bbox_thick)
125 |
126 | if show_label:
127 | if pred != None:
128 | bbox_mess = '%s %s' % (classes[class_ind], pred[i][1])
129 | else:
130 | bbox_mess = '%s' % (classes[class_ind])
131 | t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick//2)[0]
132 | cv2.rectangle(image, c1, (c1[0] + t_size[0], c1[1] - t_size[1] - 3), bbox_color, -1) # filled
133 |
134 | cv2.putText(image, bbox_mess, (c1[0], c1[1]-2), cv2.FONT_HERSHEY_SIMPLEX,
135 | fontScale, (0, 0, 0), bbox_thick//2, lineType=cv2.LINE_AA)
136 |
137 | return image
138 |
139 |
140 |
141 | def bboxes_iou(boxes1, boxes2):
142 |
143 | boxes1 = np.array(boxes1)
144 | boxes2 = np.array(boxes2)
145 |
146 | boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
147 | boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
148 |
149 | left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
150 | right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])
151 |
152 | inter_section = np.maximum(right_down - left_up, 0.0)
153 | inter_area = inter_section[..., 0] * inter_section[..., 1]
154 | union_area = boxes1_area + boxes2_area - inter_area
155 | ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)
156 |
157 | return ious
158 |
159 |
160 | def nms(bboxes, iou_threshold, sigma=0.3, method='nms'):
161 | """
162 | :param bboxes: (xmin, ymin, xmax, ymax, score, class)
163 |
164 | Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf
165 | https://github.com/bharatsingh430/soft-nms
166 | """
167 | classes_in_img = list(set(bboxes[:, 5]))
168 | best_bboxes = []
169 |
170 | for cls in classes_in_img:
171 | cls_mask = (bboxes[:, 5] == cls)
172 | cls_bboxes = bboxes[cls_mask]
173 |
174 | while len(cls_bboxes) > 0:
175 | max_ind = np.argmax(cls_bboxes[:, 4])
176 | best_bbox = cls_bboxes[max_ind]
177 | best_bboxes.append(best_bbox)
178 | cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]])
179 | iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])
180 | weight = np.ones((len(iou),), dtype=np.float32)
181 |
182 | assert method in ['nms', 'soft-nms']
183 |
184 | if method == 'nms':
185 | iou_mask = iou > iou_threshold
186 | weight[iou_mask] = 0.0
187 |
188 | if method == 'soft-nms':
189 | weight = np.exp(-(1.0 * iou ** 2 / sigma))
190 |
191 | cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
192 | score_mask = cls_bboxes[:, 4] > 0.
193 | cls_bboxes = cls_bboxes[score_mask]
194 |
195 | return best_bboxes
196 |
197 |
198 | def postprocess_boxes(pred_bbox, org_img_shape, input_size, score_threshold):
199 |
200 | valid_scale=[0, np.inf]
201 | pred_bbox = np.array(pred_bbox)
202 |
203 | pred_xywh = pred_bbox[:, 0:4]
204 | pred_conf = pred_bbox[:, 4]
205 | pred_prob = pred_bbox[:, 5:]
206 |
207 | # # (1) (x, y, w, h) --> (xmin, ymin, xmax, ymax)
208 | pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5,
209 | pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1)
210 | # # (2) (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org)
211 | org_h, org_w = org_img_shape
212 | resize_ratio = min(input_size / org_w, input_size / org_h)
213 |
214 | dw = (input_size - resize_ratio * org_w) / 2
215 | dh = (input_size - resize_ratio * org_h) / 2
216 |
217 | pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio
218 | pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio
219 |
220 | # # (3) clip some boxes those are out of range
221 | pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]),
222 | np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1)
223 | invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3]))
224 | pred_coor[invalid_mask] = 0
225 |
226 | # # (4) discard some invalid boxes
227 | bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1))
228 | scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1]))
229 |
230 | # # (5) discard some boxes with low scores
231 | classes = np.argmax(pred_prob, axis=-1)
232 | scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes]
233 | score_mask = scores > score_threshold
234 | mask = np.logical_and(scale_mask, score_mask)
235 | coors, scores, classes = pred_coor[mask], scores[mask], classes[mask]
236 |
237 | return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1)
238 |
239 |
240 |
241 |
242 |
--------------------------------------------------------------------------------
/src/core/yolov3.py:
--------------------------------------------------------------------------------
1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python
2 | # coding=utf-8
3 | #================================================================
4 | # Copyright (C) 2019 * Ltd. All rights reserved.
5 | #
6 | # Editor : VIM
7 | # File name : yolov3.py
8 | # Author : YunYang1994
9 | # Created date: 2019-07-12 13:47:10
10 | # Description :
11 | #
12 | #================================================================
13 | import sys
14 | import numpy as np
15 | import tensorflow as tf
16 | import core.utils as utils
17 | import core.common as common
18 | import core.backbone as backbone
19 | from core.config import cfg
20 |
21 |
22 | NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES))
23 | ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS)
24 | STRIDES = np.array(cfg.YOLO.STRIDES)
25 | IOU_LOSS_THRESH = cfg.YOLO.IOU_LOSS_THRESH
26 |
27 | def YOLOv3(input_layer):
28 | route_1, route_2, conv = backbone.darknet53(input_layer)
29 |
30 | conv = common.convolutional(conv, (1, 1, 1024, 512))
31 | conv = common.convolutional(conv, (3, 3, 512, 1024))
32 | conv = common.convolutional(conv, (1, 1, 1024, 512))
33 | conv = common.convolutional(conv, (3, 3, 512, 1024))
34 | conv = common.convolutional(conv, (1, 1, 1024, 512))
35 |
36 | conv_lobj_branch = common.convolutional(conv, (3, 3, 512, 1024))
37 | conv_lbbox = common.convolutional(conv_lobj_branch, (1, 1, 1024, 3*(NUM_CLASS + 5)), activate=False, bn=False)
38 |
39 | conv = common.convolutional(conv, (1, 1, 512, 256))
40 | conv = common.upsample(conv)
41 |
42 | conv = tf.concat([conv, route_2], axis=-1)
43 |
44 | conv = common.convolutional(conv, (1, 1, 768, 256))
45 | conv = common.convolutional(conv, (3, 3, 256, 512))
46 | conv = common.convolutional(conv, (1, 1, 512, 256))
47 | conv = common.convolutional(conv, (3, 3, 256, 512))
48 | conv = common.convolutional(conv, (1, 1, 512, 256))
49 |
50 | conv_mobj_branch = common.convolutional(conv, (3, 3, 256, 512))
51 | conv_mbbox = common.convolutional(conv_mobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False)
52 |
53 | conv = common.convolutional(conv, (1, 1, 256, 128))
54 | conv = common.upsample(conv)
55 |
56 | conv = tf.concat([conv, route_1], axis=-1)
57 |
58 | conv = common.convolutional(conv, (1, 1, 384, 128))
59 | conv = common.convolutional(conv, (3, 3, 128, 256))
60 | conv = common.convolutional(conv, (1, 1, 256, 128))
61 | conv = common.convolutional(conv, (3, 3, 128, 256))
62 | conv = common.convolutional(conv, (1, 1, 256, 128))
63 |
64 | conv_sobj_branch = common.convolutional(conv, (3, 3, 128, 256))
65 | conv_sbbox = common.convolutional(conv_sobj_branch, (1, 1, 256, 3*(NUM_CLASS +5)), activate=False, bn=False)
66 |
67 | return [conv_sbbox, conv_mbbox, conv_lbbox]
68 |
69 | def decode(conv_output, i=0):
70 | """
71 | return tensor of shape [batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes]
72 | contains (x, y, w, h, score, probability)
73 | """
74 |
75 | conv_shape = tf.shape(conv_output)
76 | batch_size = conv_shape[0]
77 | output_size = conv_shape[1]
78 |
79 | conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
80 |
81 | conv_raw_dxdy = conv_output[:, :, :, :, 0:2]
82 | conv_raw_dwdh = conv_output[:, :, :, :, 2:4]
83 | conv_raw_conf = conv_output[:, :, :, :, 4:5]
84 | conv_raw_prob = conv_output[:, :, :, :, 5: ]
85 |
86 | y = tf.tile(tf.range(output_size, dtype=tf.int32)[:, tf.newaxis], [1, output_size])
87 | x = tf.tile(tf.range(output_size, dtype=tf.int32)[tf.newaxis, :], [output_size, 1])
88 |
89 | xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]], axis=-1)
90 | xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :], [batch_size, 1, 1, 3, 1])
91 | xy_grid = tf.cast(xy_grid, tf.float32)
92 |
93 | pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * STRIDES[i]
94 | pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i]) * STRIDES[i]
95 | pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1)
96 |
97 | pred_conf = tf.sigmoid(conv_raw_conf)
98 | pred_prob = tf.sigmoid(conv_raw_prob)
99 |
100 | return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1)
101 |
102 | def bbox_iou(boxes1, boxes2):
103 |
104 | boxes1_area = boxes1[..., 2] * boxes1[..., 3]
105 | boxes2_area = boxes2[..., 2] * boxes2[..., 3]
106 |
107 | boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
108 | boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
109 | boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
110 | boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
111 |
112 | left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
113 | right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
114 |
115 | inter_section = tf.maximum(right_down - left_up, 0.0)
116 | inter_area = inter_section[..., 0] * inter_section[..., 1]
117 | union_area = boxes1_area + boxes2_area - inter_area
118 |
119 | return 1.0 * inter_area / union_area
120 |
121 | def bbox_giou(boxes1, boxes2):
122 |
123 | boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
124 | boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
125 | boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
126 | boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
127 |
128 | boxes1 = tf.concat([tf.minimum(boxes1[..., :2], boxes1[..., 2:]),
129 | tf.maximum(boxes1[..., :2], boxes1[..., 2:])], axis=-1)
130 | boxes2 = tf.concat([tf.minimum(boxes2[..., :2], boxes2[..., 2:]),
131 | tf.maximum(boxes2[..., :2], boxes2[..., 2:])], axis=-1)
132 |
133 | boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
134 | boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
135 |
136 | left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
137 | right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
138 |
139 | inter_section = tf.maximum(right_down - left_up, 0.0)
140 | inter_area = inter_section[..., 0] * inter_section[..., 1]
141 | union_area = boxes1_area + boxes2_area - inter_area
142 | iou = inter_area / union_area
143 |
144 | enclose_left_up = tf.minimum(boxes1[..., :2], boxes2[..., :2])
145 | enclose_right_down = tf.maximum(boxes1[..., 2:], boxes2[..., 2:])
146 | enclose = tf.maximum(enclose_right_down - enclose_left_up, 0.0)
147 | enclose_area = enclose[..., 0] * enclose[..., 1]
148 | giou = iou - 1.0 * (enclose_area - union_area) / enclose_area
149 |
150 | return giou
151 |
152 |
153 | def compute_loss(pred, conv, label, bboxes, i=0):
154 |
155 | conv_shape = tf.shape(conv)
156 | batch_size = conv_shape[0]
157 | output_size = conv_shape[1]
158 | input_size = STRIDES[i] * output_size
159 | conv = tf.reshape(conv, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
160 |
161 | conv_raw_conf = conv[:, :, :, :, 4:5]
162 | conv_raw_prob = conv[:, :, :, :, 5:]
163 |
164 | pred_xywh = pred[:, :, :, :, 0:4]
165 | pred_conf = pred[:, :, :, :, 4:5]
166 |
167 | label_xywh = label[:, :, :, :, 0:4]
168 | respond_bbox = label[:, :, :, :, 4:5]
169 | label_prob = label[:, :, :, :, 5:]
170 |
171 | giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis=-1)
172 | input_size = tf.cast(input_size, tf.float32)
173 |
174 | bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :, 2:3] * label_xywh[:, :, :, :, 3:4] / (input_size ** 2)
175 | giou_loss = respond_bbox * bbox_loss_scale * (1- giou)
176 |
177 | iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :], bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :])
178 | max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1)
179 |
180 | respond_bgd = (1.0 - respond_bbox) * tf.cast( max_iou < IOU_LOSS_THRESH, tf.float32 )
181 |
182 | conf_focal = tf.pow(respond_bbox - pred_conf, 2)
183 |
184 | conf_loss = conf_focal * (
185 | respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
186 | +
187 | respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
188 | )
189 |
190 | prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_prob, logits=conv_raw_prob)
191 |
192 | giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis=[1,2,3,4]))
193 | conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1,2,3,4]))
194 | prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1,2,3,4]))
195 |
196 | return giou_loss, conf_loss, prob_loss
197 |
198 |
199 |
200 |
201 |
202 |
--------------------------------------------------------------------------------
/src/data/anchors/basline_anchors.txt:
--------------------------------------------------------------------------------
1 | 1.25,1.625, 2.0,3.75, 4.125,2.875, 1.875,3.8125, 3.875,2.8125, 3.6875,7.4375, 3.625,2.8125, 4.875,6.1875, 11.65625,10.1875
2 |
--------------------------------------------------------------------------------
/src/data/classes/coco.names:
--------------------------------------------------------------------------------
1 | person
2 | bicycle
3 | car
4 | motorbike
5 | aeroplane
6 | bus
7 | train
8 | truck
9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 |
--------------------------------------------------------------------------------
/src/data/classes/yymnist.names:
--------------------------------------------------------------------------------
1 | 0
2 | 1
3 | 2
4 | 3
5 | 4
6 | 5
7 | 6
8 | 7
9 | 8
10 | 9
11 |
--------------------------------------------------------------------------------
/src/data/log/events.out.tfevents.1576027966.dongjai.16966.6502.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kevinchan04/Skeleton-based-action-recognition/2c4d7dd18cd90a2392e4c45a061e9e205c6dfaca/src/data/log/events.out.tfevents.1576027966.dongjai.16966.6502.v2
--------------------------------------------------------------------------------
/src/dataprocessing.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 | # coding=utf-8
3 |
4 | import cv2
5 | import time
6 | import numpy as np
7 | import core.utils as utils
8 | import tensorflow as tf
9 | from core.yolov3 import YOLOv3, decode
10 | import core.operation as operation
11 | import core.camera as cam
12 |
13 | #####################################################################
14 | ###### Import openpose && Openpose initialization ######
15 | import os
16 | import sys
17 | from sys import platform
18 | import argparse
19 |
20 | sys.path.append('/home/dongjai/openpose/build/python')
21 | from openpose import pyopenpose as op
22 |
23 | ## setting up openpose flags ##
24 | params = dict()
25 | params["model_folder"] = "/home/dongjai/openpose/models/"
26 | params["model_pose"] = "COCO"
27 | params["face"] = False
28 | params["hand"] = False
29 |
30 | # Starting OpenPose
31 | opWrapper = op.WrapperPython()
32 | opWrapper.configure(params)
33 | opWrapper.start()
34 |
35 | # Initialize Openpose
36 | datum = op.Datum()
37 | #####################################################################
38 |
39 | # video_path = "./docs/office2.mp4"
40 | # img_path = "./docs/color_270.png"
41 | # dp_path = "./docs/depth_8bit_270.png"
42 | #make sure the RGB image and the depth image in the same size
43 | alpha = 0.03 # depth frame is 8bit. if depth frame is 16bit, alpha = 0.0
44 | # video_path = 0
45 | num_classes = 80
46 | input_size = 416
47 |
48 | input_layer = tf.keras.layers.Input([input_size, input_size, 3])
49 | feature_maps = YOLOv3(input_layer)
50 |
51 | bbox_tensors = []
52 | for i, fm in enumerate(feature_maps):
53 | bbox_tensor = decode(fm, i)
54 | bbox_tensors.append(bbox_tensor)
55 |
56 | model = tf.keras.Model(input_layer, bbox_tensors)
57 | utils.load_weights(model, "./yolov3.weights")
58 | # model.summary()
59 |
60 | # vid = cv2.VideoCapture(video_path)
61 | # cam.wait4cam_init(vid, WaitTimes=10) #等待相机初始化调节自动曝光
62 | # vid = cv2.imread(img_path, -1)
63 | # dp_vid = cv2.imread(dp_path, -1)
64 | files_path = r"/home/dongjai/action data set/dataset"
65 | dirs = os.listdir(files_path)
66 | for files in dirs:
67 | img_file_path = r"/home/dongjai/action data set/dataset/" + files
68 | file_txt = open(img_file_path + "/" + files + ".txt", 'a')#根据不同的类别修改txt的文件命名前缀
69 | img_num = 0
70 | print("Action: ", files)
71 | while True:
72 | prev_time = time.time()
73 | if img_num == 2000: #读完所有照片,退出程序
74 | file_txt.close()
75 | break
76 | ########################################
77 | ##### Read image ######
78 | ########################################
79 | print("Picture: ", img_num)
80 | img_path = img_file_path+"/RGB/color_"+str(img_num)+".png"
81 | dp_path = img_file_path+"/depth/depth_8bit_"+str(img_num)+".png"
82 | vid = cv2.imread(img_path, -1)
83 | dp_vid = cv2.imread(dp_path, -1)
84 | img_num = img_num + 1
85 | ########################################
86 | ##### YOLOv3 human detection ######
87 | ########################################
88 | # return_value, frame = vid.read()
89 | frame = vid
90 | return_value = 1
91 | dp_frame = dp_vid
92 | # print("rgbshape:", frame.shape)
93 | # print("dpshape:", dp_frame.shape)
94 | if return_value:
95 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
96 | else:
97 | raise ValueError("No image!")
98 | frame_size = frame.shape[:2]
99 | image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size])#裁剪图片成416x416
100 | image_data = image_data[np.newaxis, ...].astype(np.float32)#转换成32位浮点数格式
101 |
102 | pred_bbox = model.predict(image_data)
103 | pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
104 | pred_bbox = tf.concat(pred_bbox, axis=0)
105 |
106 | bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.3)#对预测出来的bouding box作后处理
107 | bboxes = utils.nms(bboxes, 0.45, method='nms')#非极大抑制nms选择概括性最高的一个框
108 | only_one_bboxes = operation.detect_only_one(bboxes, IDi=0)#只输出一个类别
109 | prob_top_n_bboxes = operation.select_and_pick(only_one_bboxes, index=4, order=0, top_n=3)#index=4表示筛选的依据是分类概论
110 | top_n_bboxes_dp = operation.get_centerpoints_xyz(prob_top_n_bboxes, depth_frame=dp_frame)
111 | # print("top n bboxes dp : ", top_n_bboxes_dp)
112 | #############NOTE: 实际运行时,gap=的数值可以通过rs.get distance函数获取,这样就可以跳过在训练模型阶段抓取的img 8bit大小限制了深度只有8500的问题
113 | top_n_bboxes_in_gap = operation.select_in_gap(top_n_bboxes_dp, gap=[500, 8000], alpha=alpha)#过滤距离范围,gap单位mm
114 | the_one_bbox = operation.select_and_pick(top_n_bboxes_in_gap, index=-1, order=1, top_n=1)#深度值越靠近越低,所以用升序
115 | if len(the_one_bbox) == 0:
116 | print("error no detection")
117 | continue
118 | #确定操作者,选择yolo检测框中央坐标对应的深度值,认为处于距离范围(x~y)的最前者是操作者
119 | ########################################
120 | ##### YOLOv3 End ######
121 | ########################################
122 |
123 | ########################################
124 | ##### 切割人像 ######
125 | ########################################
126 | img_cut_set, left_top_list = operation.cut_bbox(frame, the_one_bbox, ext_pix_x=0, ext_pix_y=0)#获得每一帧图像上所有bbox内的分割图像,组成一个list图片集
127 | # img_cut_set = img_set[0:len(img_set)]#单独独立的图片
128 | if len(left_top_list) != len(img_cut_set):
129 | print("error")
130 | ########################################
131 | ##### 切割人像 End ######
132 | ########################################
133 |
134 | ########################################
135 | ##### Openpose ######
136 | ########################################
137 | for i in range(len(img_cut_set)):#每一帧切割出来的单人图像一张输入openpose中
138 | #openpose阶段
139 | datum.cvInputData = img_cut_set[i]
140 | opWrapper.emplaceAndPop([datum])#openpose input image->BGR ############### ATTENTION 可以结合起来改善运行效率?
141 |
142 | # cv2.namedWindow("cut_one", cv2.WINDOW_AUTOSIZE)
143 | # cv2.imshow("cut_one", cut_out)
144 | cv2.namedWindow("cut_backbone_result", cv2.WINDOW_AUTOSIZE)
145 | cv2.imshow("cut_backbone_result", datum.cvOutputData)
146 |
147 | #坐标映射到depth frame阶段
148 | kp_num = [0, 8, 9, 10, 11, 14, 15, 16, 17]
149 | kp_list = operation.get_keypoints_xy(datum=datum, kp_num=kp_num) #[x, y]获得5个点的2维坐标(x, y)
150 | # print("kp list: ",kp_list)
151 | if operation.filter_kp_list(kp_list, norm_p_index=0):#如果检测的人体骨骼缺少了鼻子,则认为这个骨架是背对相机
152 | continue
153 | # dp_nparray.shape = (5, 7, 7, 3)
154 | nn_pix = 3
155 | dp_nparray = operation.get_keypoints_xyz(kp_list=kp_list, depth_frame=dp_frame, left_top=left_top_list[i], nn_pix=nn_pix)#[x, y, z]获得5个点映射到原图frame的3维坐标(x, y, z),和附近的几个点
156 | #求平均值替代中心点深度值
157 | dp_list = operation.get_mean_dep_val_xyz(kp_list=kp_list, dp_nparray=dp_nparray)
158 | #坐标归一化
159 | dp_array_out = operation.norm_keypoints_xyz(dp_list, norm_p_index=0, cut_img=img_cut_set[i])
160 | #坐标平移,让坐标值都变成非负数
161 | dp_array_no_neg = operation.no_negative(array=dp_array_out)
162 | #将关节点数据存储到txt文件中#骨骼点深度信息处理阶段, 获得最具代表性的深度信息. 五个点的深度信息生成一个output向量
163 | #将小于0.001的数字认为是0
164 | dp_array_no_neg_zero = operation.filter_zero(dp_array_no_neg, threadhold=0.001)
165 | # print("dp nparray: ", dp_array_out)
166 | # cv2.imshow("nparray", dp_array_no_neg)
167 |
168 | file_txt.write(str(dp_array_no_neg_zero))
169 | file_txt.write('\n')
170 | ########################################
171 | ##### Openpose End ######
172 | ########################################
173 |
174 | ########################################
175 | ##### MLP ######
176 | ########################################
177 |
178 | #神经网络动作识别阶段
179 |
180 | #可视化阶段
181 | ##########################
182 | ### 原图 ###
183 | ##########################
184 | curr_time = time.time()
185 | exec_time = curr_time - prev_time
186 |
187 | image = utils.draw_bbox(frame, only_one_bboxes)#画bouding box
188 | result = np.asarray(image)#好像没用
189 | info = "time: %.2f ms" %(1000*exec_time)
190 | cv2.putText(result, text=info, org=(50, 70), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
191 | fontScale=1, color=(255, 0, 0), thickness=2)
192 | cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
193 | result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
194 | cv2.imshow("result", result)
195 |
196 | if cv2.waitKey(1) & 0xFF == ord('q'):
197 | file_txt.close()
198 | break
199 |
200 |
201 |
202 |
--------------------------------------------------------------------------------
/src/mediandensity.json:
--------------------------------------------------------------------------------
1 | {
2 | "aux-param-autoexposure-setpoint": "1536",
3 | "aux-param-colorcorrection1": "0.298828",
4 | "aux-param-colorcorrection10": "-0",
5 | "aux-param-colorcorrection11": "-0",
6 | "aux-param-colorcorrection12": "-0",
7 | "aux-param-colorcorrection2": "0.293945",
8 | "aux-param-colorcorrection3": "0.293945",
9 | "aux-param-colorcorrection4": "0.114258",
10 | "aux-param-colorcorrection5": "-0",
11 | "aux-param-colorcorrection6": "-0",
12 | "aux-param-colorcorrection7": "-0",
13 | "aux-param-colorcorrection8": "-0",
14 | "aux-param-colorcorrection9": "-0",
15 | "aux-param-depthclampmax": "65535",
16 | "aux-param-depthclampmin": "0",
17 | "aux-param-disparityshift": "0",
18 | "controls-autoexposure-auto": "False",
19 | "controls-autoexposure-manual": "10000",
20 | "controls-color-autoexposure-auto": "True",
21 | "controls-color-autoexposure-manual": "156",
22 | "controls-color-backlight-compensation": "0",
23 | "controls-color-brightness": "0",
24 | "controls-color-contrast": "50",
25 | "controls-color-gain": "64",
26 | "controls-color-gamma": "300",
27 | "controls-color-hue": "0",
28 | "controls-color-power-line-frequency": "3",
29 | "controls-color-saturation": "64",
30 | "controls-color-sharpness": "50",
31 | "controls-color-white-balance-auto": "True",
32 | "controls-color-white-balance-manual": "4600",
33 | "controls-depth-gain": "16",
34 | "controls-laserpower": "150",
35 | "controls-laserstate": "on",
36 | "ignoreSAD": "0",
37 | "param-amplitude-factor": "0",
38 | "param-autoexposure-setpoint": "1536",
39 | "param-censusenablereg-udiameter": "8",
40 | "param-censusenablereg-vdiameter": "5",
41 | "param-censususize": "8",
42 | "param-censusvsize": "5",
43 | "param-depthclampmax": "65535",
44 | "param-depthclampmin": "0",
45 | "param-depthunits": "1000",
46 | "param-disableraucolor": "0",
47 | "param-disablesadcolor": "0",
48 | "param-disablesadnormalize": "0",
49 | "param-disablesloleftcolor": "1",
50 | "param-disableslorightcolor": "0",
51 | "param-disparitymode": "0",
52 | "param-disparityshift": "0",
53 | "param-lambdaad": "978",
54 | "param-lambdacensus": "21",
55 | "param-leftrightthreshold": "32",
56 | "param-maxscorethreshb": "972",
57 | "param-medianthreshold": "175",
58 | "param-minscorethresha": "3",
59 | "param-neighborthresh": "1",
60 | "param-raumine": "3",
61 | "param-rauminn": "1",
62 | "param-rauminnssum": "6",
63 | "param-raumins": "2",
64 | "param-rauminw": "1",
65 | "param-rauminwesum": "5",
66 | "param-regioncolorthresholdb": "0.677104",
67 | "param-regioncolorthresholdg": "0.0117417",
68 | "param-regioncolorthresholdr": "0.0782779",
69 | "param-regionshrinku": "4",
70 | "param-regionshrinkv": "1",
71 | "param-robbinsmonrodecrement": "14",
72 | "param-robbinsmonroincrement": "22",
73 | "param-rsmdiffthreshold": "5.875",
74 | "param-rsmrauslodiffthreshold": "1",
75 | "param-rsmremovethreshold": "0.416667",
76 | "param-scanlineedgetaub": "18",
77 | "param-scanlineedgetaug": "493",
78 | "param-scanlineedgetaur": "390",
79 | "param-scanlinep1": "134",
80 | "param-scanlinep1onediscon": "10",
81 | "param-scanlinep1twodiscon": "70",
82 | "param-scanlinep2": "397",
83 | "param-scanlinep2onediscon": "21",
84 | "param-scanlinep2twodiscon": "226",
85 | "param-secondpeakdelta": "3",
86 | "param-texturecountthresh": "6",
87 | "param-texturedifferencethresh": "0",
88 | "param-usersm": "1",
89 | "param-zunits": "1000",
90 | "stream-depth-format": "Z16",
91 | "stream-fps": "30",
92 | "stream-height": "480",
93 | "stream-width": "640"
94 | }
--------------------------------------------------------------------------------