├── CMakeLists.txt ├── README.md ├── client ├── Module │ ├── imager.py │ └── windows_client.py ├── README.md ├── myclient.py ├── note.txt └── utils.py ├── environment.yaml ├── launch └── run.launch ├── msg ├── ActElem.msg └── Person.msg ├── package.xml ├── requirements.txt ├── scripts ├── listener.py ├── talker.py └── testing.py └── src ├── Modules ├── act_talker.py ├── camera.py ├── detection.py ├── imager.py ├── midprocessor.py ├── myserver.py ├── poser.py └── recognizer.py ├── core ├── backbone.py ├── camera.py ├── common.py ├── config.py ├── dataset.py ├── operation.py ├── utils.py └── yolov3.py ├── data ├── anchors │ └── basline_anchors.txt ├── classes │ ├── coco.names │ └── yymnist.names └── log │ └── events.out.tfevents.1576027966.dongjai.16966.6502.v2 ├── dataprocessing.py └── mediandensity.json /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.3) 2 | project(act_recognizer) 3 | 4 | ## Compile as C++11, supported in ROS Kinetic and newer 5 | # add_compile_options(-std=c++11) 6 | 7 | ## Find catkin macros and libraries 8 | ## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz) 9 | ## is used, also find other catkin packages 10 | find_package(catkin REQUIRED COMPONENTS 11 | roscpp 12 | rospy 13 | std_msgs 14 | message_generation 15 | ) 16 | 17 | ## System dependencies are found with CMake's conventions 18 | # find_package(Boost REQUIRED COMPONENTS system) 19 | 20 | 21 | ## Uncomment this if the package has a setup.py. This macro ensures 22 | ## modules and global scripts declared therein get installed 23 | ## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html 24 | # catkin_python_setup() 25 | 26 | ################################################ 27 | ## Declare ROS messages, services and actions ## 28 | ################################################ 29 | 30 | ## To declare and build messages, services or actions from within this 31 | ## package, follow these steps: 32 | ## * Let MSG_DEP_SET be the set of packages whose message types you use in 33 | ## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...). 34 | ## * In the file package.xml: 35 | ## * add a build_depend tag for "message_generation" 36 | ## * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET 37 | ## * If MSG_DEP_SET isn't empty the following dependency has been pulled in 38 | ## but can be declared for certainty nonetheless: 39 | ## * add a exec_depend tag for "message_runtime" 40 | ## * In this file (CMakeLists.txt): 41 | ## * add "message_generation" and every package in MSG_DEP_SET to 42 | ## find_package(catkin REQUIRED COMPONENTS ...) 43 | ## * add "message_runtime" and every package in MSG_DEP_SET to 44 | ## catkin_package(CATKIN_DEPENDS ...) 45 | ## * uncomment the add_*_files sections below as needed 46 | ## and list every .msg/.srv/.action file to be processed 47 | ## * uncomment the generate_messages entry below 48 | ## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...) 49 | 50 | ## Generate messages in the 'msg' folder 51 | add_message_files( 52 | FILES 53 | ActElem.msg 54 | Person.msg 55 | # Persons.msg 56 | # Message2.msg 57 | ) 58 | 59 | ## Generate services in the 'srv' folder 60 | # add_service_files( 61 | # FILES 62 | # Service1.srv 63 | # Service2.srv 64 | # ) 65 | 66 | ## Generate actions in the 'action' folder 67 | # add_action_files( 68 | # FILES 69 | # Action1.action 70 | # Action2.action 71 | # ) 72 | 73 | ## Generate added messages and services with any dependencies listed here 74 | generate_messages( 75 | DEPENDENCIES 76 | std_msgs 77 | ) 78 | 79 | ################################################ 80 | ## Declare ROS dynamic reconfigure parameters ## 81 | ################################################ 82 | 83 | ## To declare and build dynamic reconfigure parameters within this 84 | ## package, follow these steps: 85 | ## * In the file package.xml: 86 | ## * add a build_depend and a exec_depend tag for "dynamic_reconfigure" 87 | ## * In this file (CMakeLists.txt): 88 | ## * add "dynamic_reconfigure" to 89 | ## find_package(catkin REQUIRED COMPONENTS ...) 90 | ## * uncomment the "generate_dynamic_reconfigure_options" section below 91 | ## and list every .cfg file to be processed 92 | 93 | ## Generate dynamic reconfigure parameters in the 'cfg' folder 94 | # generate_dynamic_reconfigure_options( 95 | # cfg/DynReconf1.cfg 96 | # cfg/DynReconf2.cfg 97 | # ) 98 | 99 | ################################### 100 | ## catkin specific configuration ## 101 | ################################### 102 | ## The catkin_package macro generates cmake config files for your package 103 | ## Declare things to be passed to dependent projects 104 | ## INCLUDE_DIRS: uncomment this if your package contains header files 105 | ## LIBRARIES: libraries you create in this project that dependent projects also need 106 | ## CATKIN_DEPENDS: catkin_packages dependent projects also need 107 | ## DEPENDS: system dependencies of this project that dependent projects also need 108 | catkin_package( 109 | INCLUDE_DIRS include 110 | LIBRARIES act_recognizer 111 | CATKIN_DEPENDS roscpp rospy std_msgs message_runtime 112 | DEPENDS system_lib 113 | ) 114 | 115 | ########### 116 | ## Build ## 117 | ########### 118 | 119 | ## Specify additional locations of header files 120 | ## Your package locations should be listed before other locations 121 | include_directories( 122 | # include 123 | ${catkin_INCLUDE_DIRS} 124 | ) 125 | 126 | ## Declare a C++ library 127 | # add_library(${PROJECT_NAME} 128 | # src/${PROJECT_NAME}/act_recognizer.cpp 129 | # ) 130 | 131 | ## Add cmake target dependencies of the library 132 | ## as an example, code may need to be generated before libraries 133 | ## either from message generation or dynamic reconfigure 134 | # add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) 135 | 136 | ## Declare a C++ executable 137 | ## With catkin_make all packages are built within a single CMake context 138 | ## The recommended prefix ensures that target names across packages don't collide 139 | # add_executable(${PROJECT_NAME}_node src/act_recognizer_node.cpp) 140 | 141 | ## Rename C++ executable without prefix 142 | ## The above recommended prefix causes long target names, the following renames the 143 | ## target back to the shorter version for ease of user use 144 | ## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node" 145 | # set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "") 146 | 147 | ## Add cmake target dependencies of the executable 148 | ## same as for the library above 149 | # add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) 150 | 151 | ## Specify libraries to link a library or executable target against 152 | # target_link_libraries(${PROJECT_NAME}_node 153 | # ${catkin_LIBRARIES} 154 | # ) 155 | 156 | ############# 157 | ## Install ## 158 | ############# 159 | 160 | # all install targets should use catkin DESTINATION variables 161 | # See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html 162 | 163 | ## Mark executable scripts (Python etc.) for installation 164 | ## in contrast to setup.py, you can choose the destination 165 | # install(PROGRAMS 166 | # scripts/my_python_script 167 | # DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} 168 | # ) 169 | 170 | ## Mark executables for installation 171 | ## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html 172 | # install(TARGETS ${PROJECT_NAME}_node 173 | # RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} 174 | # ) 175 | 176 | ## Mark libraries for installation 177 | ## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html 178 | # install(TARGETS ${PROJECT_NAME} 179 | # ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} 180 | # LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} 181 | # RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION} 182 | # ) 183 | 184 | ## Mark cpp header files for installation 185 | # install(DIRECTORY include/${PROJECT_NAME}/ 186 | # DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION} 187 | # FILES_MATCHING PATTERN "*.h" 188 | # PATTERN ".svn" EXCLUDE 189 | # ) 190 | 191 | ## Mark other files for installation (e.g. launch and bag files, etc.) 192 | # install(FILES 193 | # # myfile1 194 | # # myfile2 195 | # DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} 196 | # ) 197 | 198 | ############# 199 | ## Testing ## 200 | ############# 201 | 202 | ## Add gtest based cpp test target and link libraries 203 | # catkin_add_gtest(${PROJECT_NAME}-test test/test_act_recognizer.cpp) 204 | # if(TARGET ${PROJECT_NAME}-test) 205 | # target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME}) 206 | # endif() 207 | 208 | ## Add folders to be run by python nosetests 209 | # catkin_add_nosetests(test) 210 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Skeleton-based-action-recognition 2 | Yolov3, Openpose, Tensorflow2, ROS, multi-thread 3 | 4 | It also support for remote GPU server. You can grap a frame from D435 in local and then send the data to remote server to process the data and return the result. 5 | 6 | This is my final year project "3D Action Recognition based on Openpose and YOLO". 7 | 8 | ## Installation 9 | 10 | ### 0. install openpose python api 11 | 12 | Following the openpose homepage instruction to install [openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/doc/installation.md) and compile the [python api](https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/doc/installation.md#python-api). 13 | 14 | 15 | ### 1. create a conda env. 16 | 17 | ``` 18 | conda create -n tensorflow2 python=3.6 19 | pip install -r requirements.txt 20 | ``` 21 | 22 | ### 2. create a ROS package 23 | 24 | At first, following the [ros wiki](http://wiki.ros.org/cn/ROS/Tutorials/InstallingandConfiguringROSEnvironment) instruction to install ROS and create a ROS workspace 25 | 26 | Then, create a ROS package which names `act_recognizer`. 27 | ``` 28 | cd catkin_ws/src 29 | mkdir -p act_recognizer 30 | ``` 31 | 32 | Therefore, in your ROS workspace file folder, the structure will be as the following, 33 | ``` 34 | ->~/catkin_ws 35 | ----->build 36 | ----->devel 37 | ----->src 38 | --------->act_recognizer 39 | ``` 40 | 41 | ### 3. clone this repo 42 | 43 | Cloen the repo and copy all files to ROS package `act_recognizer` 44 | 45 | Modify the `act_talker.py`. Likes, 46 | ``` 47 | sys.path.append('{$your root path}/catkin_ws/src/act_recognizer') 48 | ``` 49 | 50 | Modify the `config.py`. Change the path about YOLO data, such as `YOLO.CLASSES` and `YOLO.ANCHORS`. 51 | 52 | Change your openpose python api path in `Module/poser.py`, so that your code can import pyopenpose correctly. 53 | 54 | Additionally, you also have to change the openpose model path. 55 | ``` 56 | Module/poser.py 57 | ----->class PoseLoader() 58 | --------->self._params["model_folder"] = your openpose model folder 59 | ``` 60 | 61 | ### 4. download yolo and mlp checkpoints 62 | 63 | Download checkpoints from [BaiduYun](https://pan.baidu.com/s/1XAbQe_AZBWuXT1MvYESh4w) the extract code is *cxj6*. Then move `yolov3.weights` into checkpoints folder `checkpoints/YOLO` and `mlp.h5` to `checkpoints`. You should create a `checkpoints` folder first probably. 64 | 65 | ``` 66 | cd act_recognizer/src 67 | mkdir -p checkpoints/YOLO 68 | ``` 69 | 70 | ### 5. run the code 71 | 72 | The ROS package whic written by Python do not need to compile. 73 | We have to specify the python interpreter in all `.py` files in the first line to use the conda env `tensorflow2`'s python interpreter. 74 | Likes this, 75 | 76 | ``` 77 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python 78 | ``` 79 | 80 | Then, run the code following, 81 | ``` 82 | roscore 83 | cd catkin_ws 84 | conda activate tensorflow2 85 | source ./devel/setup.bash 86 | roslaunch act_recognizer run.launch 87 | ``` 88 | 89 | ## Citation and Reference 90 | [Openpose from CMU](https://github.com/kevinchan04/openpose) 91 | 92 | [Yolov3 tensorflow2 from YunYang1994](https://github.com/YunYang1994/TensorFlow2.0-Examples/tree/master/4-Object_Detection/YOLOV3) 93 | -------------------------------------------------------------------------------- /client/Module/imager.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | from threading import Thread 5 | from queue import Queue 6 | 7 | import cv2 8 | import scipy.misc 9 | import numpy as np 10 | 11 | class ImageReader(): 12 | def __init__(self, opt, queueSize=128): 13 | self.opt = opt 14 | self.rgb_path = opt.rgb_path 15 | self.dp_path = opt.dp_path 16 | 17 | for root, dirs, files in os.walk(self.rgb_path): 18 | self.rgb_len = len(files) 19 | for root, dirs, files in os.walk(self.dp_path): 20 | self.dp_len = len(files) 21 | assert self.rgb_len == self.dp_len, "RGB和深度图数量不相等" 22 | 23 | if opt.sp:#单线程 24 | self.rgb_queue = Queue(maxsize=queueSize) 25 | self.dp_queue = Queue(maxsize=queueSize) 26 | 27 | def start_worker(self, target, path): 28 | if self.opt.sp: 29 | p = Thread(target=target, args=(), kwargs={"num": 0, "path": path})#target是該線程需要完成的方法函數 30 | # else: 31 | p.start() 32 | return p 33 | 34 | def start(self): 35 | self.rgb_image_worker = self.start_worker(self.rgb_reader_process, path=self.rgb_path) 36 | self.dp_image_worker = self.start_worker(self.dp_reader_process, path=self.dp_path) 37 | return self 38 | 39 | def stop(self): 40 | self.rgb_image_worker.join() 41 | self.dp_image_worker.join() 42 | self.clear_queues() 43 | 44 | def clear_queues(self): 45 | self.clear(self.rgb_queue) 46 | self.clear(self.dp_queue) 47 | 48 | def clear(self, queue): 49 | while not queue.empty(): 50 | queue.get() 51 | 52 | def wait_and_put(self, queue, item): 53 | queue.put(item) 54 | 55 | def wait_and_get(self, queue): 56 | return queue.get() 57 | 58 | def rgb_reader_process(self, **kargcs): 59 | while kargcs["num"] < self.rgb_len: 60 | rgb_name = "color_" + str(kargcs["num"]) + ".png" 61 | rgb_file = kargcs["path"] + "/" + rgb_name 62 | # print(rgb_file) 63 | frame = cv2.imread(rgb_file, -1) 64 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 65 | self.wait_and_put(self.rgb_queue, (rgb_name, kargcs["num"], frame)) 66 | kargcs["num"] += 1 67 | 68 | def dp_reader_process(self, **kargcs): 69 | while kargcs["num"] < self.dp_len: 70 | dp_name = "depth_8bit_" + str(kargcs["num"]) + ".png" 71 | dp_file = kargcs["path"] + "/" + dp_name 72 | frame = cv2.imread(dp_file, -1) 73 | self.wait_and_put(self.dp_queue, (dp_name, kargcs["num"], frame)) 74 | kargcs["num"] += 1 75 | 76 | def read_rgb(self): 77 | return self.wait_and_get(self.rgb_queue) 78 | 79 | def read_dp(self): 80 | return self.wait_and_get(self.dp_queue) 81 | 82 | -------------------------------------------------------------------------------- /client/Module/windows_client.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | from threading import Thread 5 | from queue import Queue 6 | import pickle 7 | 8 | import cv2 9 | 10 | class WindowsClient(): 11 | def __init__(self, opt, queueSize=128): 12 | self.opt = opt 13 | self.host = opt.host 14 | self.port = opt.port 15 | from multiprocessing.connection import Client 16 | self.client = Client((self.host, self.port)) 17 | 18 | if opt.sp: 19 | self.frame_queue = Queue(maxsize=queueSize) 20 | self.output_queue = Queue(maxsize=queueSize) 21 | 22 | def start_worker(self, target): 23 | if self.opt.sp: 24 | p = Thread(target=target)#target是該線程需要完成的方法函數 25 | # else: 26 | p.start() 27 | return p 28 | 29 | def start(self): 30 | self.client_worker = self.start_worker(self.client_worker_process) 31 | self.client_recv_worker = self.start_worker(self.client_recv_process) 32 | return self 33 | 34 | def stop(self): 35 | self.client_worker.join() 36 | self.client_recv_worker.join() 37 | self.clear_queues() 38 | 39 | def clear_queues(self): 40 | self.clear(self.frame_queue) 41 | self.clear(self.output_queue) 42 | 43 | def clear(self, queue): 44 | while not queue.empty(): 45 | queue.get() 46 | 47 | def wait_and_put(self, queue, item): 48 | queue.put(item) 49 | 50 | def wait_and_get(self, queue): 51 | return queue.get() 52 | 53 | def client_worker_process(self, **kargcs): 54 | while True: 55 | (rgb_name, rgb_No, rgb_frame, dp_name, dps_No, dp_frame) = self.wait_and_get(self.frame_queue) 56 | data_string = pickle.dumps((rgb_name, rgb_No, rgb_frame, dp_name, dps_No, dp_frame)) 57 | # byte_size = sys.getsizeof(data_string) 58 | # print(byte_size) 59 | self.client.send(data_string) 60 | # print('Send', type(data_string)) 61 | # time.sleep(0.02) 62 | 63 | # pred_data_byte = self.client.recv() 64 | # # (rgb_name, rgb_No, rgb_frame, dp_name, dps_No, dp_frame) = pickle.loads(pred_data_byte) 65 | # (bboxes, pred_data) = pickle.loads(pred_data_byte) 66 | # # print("Recv", pred_data) 67 | # self.wait_and_put(self.output_queue, (bboxes, pred_data)) 68 | 69 | def client_recv_process(self): 70 | while True: 71 | pred_data_byte = self.client.recv() 72 | # (rgb_name, rgb_No, rgb_frame, dp_name, dps_No, dp_frame) = pickle.loads(pred_data_byte) 73 | (bboxes, pred_data) = pickle.loads(pred_data_byte) 74 | # print("Recv", pred_data) 75 | self.wait_and_put(self.output_queue, (bboxes, pred_data)) 76 | 77 | def read_data(self): 78 | return self.wait_and_get(self.output_queue) 79 | 80 | def put_data(self, rgb_name, rgb_No, rgb_frame, dp_name, dp_No, dp_frame): 81 | self.wait_and_put(self.frame_queue, (rgb_name, rgb_No, rgb_frame, dp_name, dp_No, dp_frame)) 82 | 83 | -------------------------------------------------------------------------------- /client/README.md: -------------------------------------------------------------------------------- 1 | This is for client. 2 | 3 | It contains the D435 camera class and client class. 4 | 5 | Using pyhton pickle and socket to send frame data to remote GPU server. After the data process, client will receive the result from server. 6 | 7 | Before start, you have to change the IP address and port both in client and server. 8 | -------------------------------------------------------------------------------- /client/myclient.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | import time 4 | from threading import Thread 5 | from queue import Queue 6 | import pickle 7 | 8 | from tqdm import tqdm 9 | import cv2 10 | 11 | import utils 12 | 13 | from Modules.imager import ImageReader 14 | from Modules.windows_client import WindowsClient 15 | 16 | parser = argparse.ArgumentParser(description='Debug') 17 | parser.add_argument('--rgb_path', type=str, default=r"I:\0walk/RGB", 18 | help='path to store rgb images') 19 | parser.add_argument('--dp_path', type=str, default=r"I:\0walk/depth", 20 | help='path to store depth images') 21 | parser.add_argument('--cam_json', type=str, default=r"/home/dongjai/catkin_ws/src/act_recognizer/src/mediandensity.json", 22 | help='the camera setting json file path') 23 | parser.add_argument('--width', type=int, default=640, 24 | help='image width') 25 | parser.add_argument('--height', type=int, default=480, 26 | help='image height') 27 | parser.add_argument('--fps', type=int, default=30, 28 | help='camera fps') 29 | parser.add_argument('--sp', default=True, action='store_true', #多线程系统显存不足 30 | help='Use single process') 31 | parser.add_argument('--d435', type=bool, default=False, 32 | help='Using the D435 cam') 33 | parser.add_argument('--host', type=str, default='10.60.2.65',#10.60.2.47 10.60.2.65 34 | help='server_host') 35 | parser.add_argument('--port', type=int, default=10086,#60001 10086 36 | help='server_port') 37 | 38 | args = parser.parse_args() 39 | # args, unknown = parser.parse_known_args()#NOTE : using in ROS environment 40 | # label_dict = {0: "walk", 1: "stop", 2: "come", 3: "phone", 4: "open", 5: "stand"} 41 | label_list = ["walk", "stop", "come", "phone", "open", "stand"] 42 | 43 | #Start the Image Reader Thread 44 | image_reader = ImageReader(args) 45 | image_reader.start() 46 | 47 | my_windows_client = WindowsClient(args) 48 | my_windows_client.start() 49 | 50 | if args.d435: 51 | print('Starting D435, press Ctrl + C to terminate...') 52 | sys.stdout.flush() 53 | im_names_desc = tqdm(loop()) 54 | else: 55 | data_len = image_reader.rgb_len 56 | im_names_desc = tqdm(range(data_len), dynamic_ncols=True)#终端输出 57 | 58 | try: 59 | for i in im_names_desc: 60 | # (rgb_nums, rgb_frame) = camera_reader.read_rgb() 61 | (rgb_name, rgb_nums, rgb_frame) = image_reader.read_rgb() 62 | 63 | # (dp_nums, dp_frame) = camera_reader.read_dp() 64 | (dp_name, dp_nums, dp_frame) = image_reader.read_dp() 65 | 66 | my_windows_client.put_data(rgb_name, rgb_nums, rgb_frame, dp_name, dp_nums, dp_frame) 67 | 68 | (bboxes, pred_data) = my_windows_client.read_data() 69 | # print("pred data: ", pred_data) 70 | 71 | image = utils.draw_bbox(rgb_frame, bboxes, pred=pred_data, classes=label_list) 72 | cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) 73 | rgb_frame = cv2.cvtColor(rgb_frame, cv2.COLOR_RGB2BGR) 74 | cv2.imshow("result", rgb_frame) 75 | if cv2.waitKey(1) & 0xFF == ord('q'): 76 | break 77 | 78 | image_reader.stop() 79 | my_windows_client.stop() 80 | except KeyboardInterrupt: 81 | image_reader.stop() 82 | my_windows_client.stop() 83 | -------------------------------------------------------------------------------- /client/note.txt: -------------------------------------------------------------------------------- 1 | VPN限速了 2 | 大概是500kb/s到1MB/s 3 | 所以需要0.5s左右传一个组合的数据 4 | 在服务器上单独运行程序可以做到realtime,openpose运行时间比yolo更低 -------------------------------------------------------------------------------- /client/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import random 3 | import colorsys 4 | import numpy as np 5 | 6 | def draw_bbox(image, bboxes, pred=None, num_classes=6, classes=None, show_label=True): 7 | """ 8 | bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates. 9 | """ 10 | image_h, image_w, _ = image.shape 11 | # num_classes += 1# 多一个类别,因为可能只检测到人或骨骼 12 | hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)] 13 | colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) 14 | colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) 15 | 16 | random.seed(0) 17 | random.shuffle(colors) 18 | random.seed(None) 19 | 20 | if bboxes and pred: 21 | for i, bbox in enumerate(bboxes): 22 | coor = np.array(bbox[:4], dtype=np.int32) 23 | fontScale = 0.5 24 | class_ind = pred[i][1] 25 | bbox_color = colors[class_ind] 26 | bbox_thick = int(0.6 * (image_h + image_w) / 600) 27 | c1, c2 = (coor[0], coor[1]), (coor[2], coor[3]) 28 | cv2.rectangle(image, c1, c2, bbox_color, bbox_thick) 29 | 30 | if show_label: 31 | if pred != None: 32 | bbox_mess = '%s %s' % (classes[class_ind], pred[i][1]) 33 | else: 34 | bbox_mess = '%s' % (classes[class_ind]) 35 | t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick//2)[0] 36 | cv2.rectangle(image, c1, (c1[0] + t_size[0], c1[1] - t_size[1] - 3), bbox_color, -1) # filled 37 | 38 | cv2.putText(image, bbox_mess, (c1[0], c1[1]-2), cv2.FONT_HERSHEY_SIMPLEX, 39 | fontScale, (0, 0, 0), bbox_thick//2, lineType=cv2.LINE_AA) 40 | 41 | return image -------------------------------------------------------------------------------- /environment.yaml: -------------------------------------------------------------------------------- 1 | name: tensorflow2 2 | channels: 3 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free 4 | - defaults 5 | dependencies: 6 | - _libgcc_mutex=0.1=main 7 | - _nb_ext_conf=0.4.0=py36_1 8 | - anaconda-client=1.6.3=py36_0 9 | - bleach=1.5.0=py36_0 10 | - ca-certificates=2019.11.27=0 11 | - certifi=2019.11.28=py36_0 12 | - clyent=1.2.2=py36_0 13 | - cudatoolkit=10.0.130=0 14 | - cudnn=7.6.4=cuda10.0_0 15 | - decorator=4.1.2=py36_0 16 | - entrypoints=0.2.3=py36_0 17 | - html5lib=0.9999999=py36_0 18 | - ipykernel=4.6.1=py36_0 19 | - ipython=6.1.0=py36_0 20 | - ipython_genutils=0.2.0=py36_0 21 | - ipywidgets=6.0.0=py36_0 22 | - jedi=0.10.2=py36_2 23 | - jinja2=2.9.6=py36_0 24 | - jsonschema=2.6.0=py36_0 25 | - jupyter_client=5.1.0=py36_0 26 | - jupyter_core=4.3.0=py36_0 27 | - libedit=3.1.20181209=hc058e9b_0 28 | - libffi=3.2.1=hd88cf55_4 29 | - libgcc-ng=9.1.0=hdf63c60_0 30 | - libsodium=1.0.10=0 31 | - libstdcxx-ng=9.1.0=hdf63c60_0 32 | - markupsafe=1.0=py36_0 33 | - mistune=0.7.4=py36_0 34 | - nb_anacondacloud=1.4.0=py36_0 35 | - nb_conda=2.2.0=py36_0 36 | - nb_conda_kernels=2.1.0=py36_0 37 | - nbconvert=5.2.1=py36_0 38 | - nbformat=4.4.0=py36_0 39 | - nbpresent=3.0.2=py36_0 40 | - ncurses=6.1=he6710b0_1 41 | - notebook=5.0.0=py36_0 42 | - openssl=1.1.1d=h7b6447c_3 43 | - pandocfilters=1.4.2=py36_0 44 | - path.py=10.3.1=py36_0 45 | - pexpect=4.2.1=py36_0 46 | - pickleshare=0.7.4=py36_0 47 | - pip=19.3.1=py36_0 48 | - prompt_toolkit=1.0.15=py36_0 49 | - ptyprocess=0.5.2=py36_0 50 | - pygments=2.2.0=py36_0 51 | - python=3.6.9=h265db76_0 52 | - pyyaml=3.12=py36_0 53 | - pyzmq=16.0.2=py36_0 54 | - readline=7.0=h7b6447c_5 55 | - setuptools=42.0.2=py36_0 56 | - simplegeneric=0.8.1=py36_1 57 | - singledispatch=3.4.0.3=py36_0 58 | - sqlite=3.30.1=h7b6447c_0 59 | - terminado=0.6=py36_0 60 | - testpath=0.3.1=py36_0 61 | - tk=8.6.8=hbc83047_0 62 | - tornado=4.5.2=py36_0 63 | - traitlets=4.3.2=py36_0 64 | - wcwidth=0.1.7=py36_0 65 | - wheel=0.33.6=py36_0 66 | - widgetsnbextension=3.0.2=py36_0 67 | - xz=5.2.4=h14c3975_4 68 | - yaml=0.1.6=0 69 | - zeromq=4.1.5=0 70 | - zlib=1.2.11=h7b6447c_3 71 | - pip: 72 | - absl-py==0.8.1 73 | - altgraph==0.16.1 74 | - astor==0.8.0 75 | - astroid==2.3.3 76 | - cachetools==3.1.1 77 | - chardet==3.0.4 78 | - cycler==0.10.0 79 | - distro==1.4.0 80 | - easydict==1.9 81 | - gast==0.2.2 82 | - google-auth==1.7.2 83 | - google-auth-oauthlib==0.4.1 84 | - google-pasta==0.1.8 85 | - grpcio==1.25.0 86 | - h5py==2.10.0 87 | - idna==2.8 88 | - isort==4.3.21 89 | - keras==2.3.1 90 | - keras-applications==1.0.8 91 | - keras-preprocessing==1.1.0 92 | - kiwisolver==1.1.0 93 | - lazy-object-proxy==1.4.3 94 | - markdown==3.1.1 95 | - matplotlib==3.1.2 96 | - mccabe==0.6.1 97 | - numpy==1.17.0 98 | - oauthlib==3.1.0 99 | - opencv-python==4.1.2.30 100 | - opt-einsum==3.1.0 101 | - pandas==0.25.3 102 | - pillow==6.2.0 103 | - protobuf==3.11.1 104 | - pyasn1==0.4.8 105 | - pyasn1-modules==0.2.7 106 | - pyinstaller==3.5 107 | - pylint==2.4.4 108 | - pyparsing==2.4.5 109 | - pyrealsense2==2.31.0.1235 110 | - python-dateutil==2.8.1 111 | - pytz==2019.3 112 | - requests==2.22.0 113 | - requests-oauthlib==1.3.0 114 | - rospkg==1.2.3 115 | - rsa==4.0 116 | - scipy==1.1.0 117 | - seaborn==0.9.0 118 | - six==1.13.0 119 | - tb-nightly==1.14.0a20190301 120 | - tensorboard==1.14.0 121 | - tensorflow==2.0.0a0 122 | - tensorflow-estimator==2.0.1 123 | - tensorflow-gpu==2.0.0 124 | - termcolor==1.1.0 125 | - tf-estimator-nightly==1.14.0.dev2019030115 126 | - tqdm==4.40.2 127 | - typed-ast==1.4.0 128 | - urllib3==1.25.7 129 | - werkzeug==0.16.0 130 | - wget==3.2 131 | - wrapt==1.11.2 132 | prefix: /home/dongjai/anaconda3/envs/tensorflow2 133 | 134 | -------------------------------------------------------------------------------- /launch/run.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /msg/ActElem.msg: -------------------------------------------------------------------------------- 1 | float32 x 2 | float32 y 3 | float32 z 4 | uint16 act_cls 5 | -------------------------------------------------------------------------------- /msg/Person.msg: -------------------------------------------------------------------------------- 1 | ActElem[] xyz_cls 2 | -------------------------------------------------------------------------------- /package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | act_recognizer 4 | 0.0.0 5 | The act_recognizer package 6 | 7 | 8 | 9 | 10 | dongjai 11 | 12 | 13 | 14 | 15 | 16 | TODO 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | catkin 52 | roscpp 53 | rospy 54 | std_msgs 55 | message_generation 56 | roscpp 57 | rospy 58 | std_msgs 59 | message_generation 60 | roscpp 61 | rospy 62 | std_msgs 63 | message_runtime 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.9.0 2 | actionlib==1.12.0 3 | altgraph==0.16.1 4 | anaconda-client==1.6.3 5 | angles==1.9.12 6 | astor==0.8.1 7 | astroid==2.3.3 8 | bleach==1.5.0 9 | bondpy==1.8.3 10 | cachetools==4.0.0 11 | camera-calibration==1.14.0 12 | camera-calibration-parsers==1.11.13 13 | catkin==0.7.20 14 | catkin-pkg==0.4.16 15 | certifi==2019.11.28 16 | chardet==3.0.4 17 | cloudpickle==1.2.2 18 | clyent==1.2.2 19 | controller-manager==0.17.0 20 | controller-manager-msgs==0.17.0 21 | cv-bridge==1.13.0 22 | cycler==0.10.0 23 | Cython==0.29.14 24 | decorator==4.4.1 25 | diagnostic-analysis==1.9.3 26 | diagnostic-common-diagnostics==1.9.3 27 | diagnostic-updater==1.9.3 28 | distro==1.4.0 29 | docutils==0.16 30 | dynamic-reconfigure==1.6.0 31 | easydict==1.9 32 | empy==3.3.4 33 | entrypoints==0.2.3 34 | gast==0.2.2 35 | gazebo-plugins==2.8.6 36 | gazebo-ros==2.8.6 37 | gencpp==0.6.2 38 | geneus==2.2.6 39 | genlisp==0.4.16 40 | genmsg==0.5.12 41 | gennodejs==2.0.1 42 | genpy==0.6.9 43 | google-auth==1.11.3 44 | google-auth-oauthlib==0.4.1 45 | google-pasta==0.2.0 46 | grpcio==1.27.2 47 | h5py==2.10.0 48 | html5lib==0.9999999 49 | idna==2.9 50 | image-geometry==1.13.0 51 | imageio==2.4.1 52 | interactive-markers==1.11.4 53 | ipykernel==4.6.1 54 | ipython==6.1.0 55 | ipython-genutils==0.2.0 56 | ipywidgets==6.0.0 57 | isort==4.3.21 58 | jedi==0.10.2 59 | Jinja2==2.9.6 60 | joint-state-publisher==1.12.14 61 | jsonschema==2.6.0 62 | jupyter-client==5.1.0 63 | jupyter-core==4.3.0 64 | kdl-parser-py==1.13.1 65 | Keras==2.3.1 66 | Keras-Applications==1.0.8 67 | Keras-Preprocessing==1.1.0 68 | kiwisolver==1.1.0 69 | laser-geometry==1.6.4 70 | lazy-object-proxy==1.4.3 71 | lxml==4.2.6 72 | Markdown==3.2.1 73 | MarkupSafe==1.0 74 | matplotlib==3.2.1 75 | mccabe==0.6.1 76 | message-filters==1.14.3 77 | mistune==0.7.4 78 | mock==3.0.5 79 | nb-anacondacloud==1.4.0 80 | nb-conda==2.2.0 81 | nb-conda-kernels==2.1.0 82 | nbconvert==5.2.1 83 | nbformat==4.4.0 84 | nbpresent==3.0.2 85 | networkx==2.4 86 | nltk==3.3 87 | notebook==5.0.0 88 | numpy==1.18.2 89 | oauthlib==3.1.0 90 | opencv-python==3.4.8.29 91 | opt-einsum==3.1.0 92 | pandas==0.25.3 93 | pandocfilters==1.4.2 94 | pexpect==4.2.1 95 | pickleshare==0.7.4 96 | Pillow==7.0.0 97 | progressbar2==3.38.0 98 | prompt-toolkit==1.0.15 99 | protobuf==3.11.3 100 | ptyprocess==0.5.2 101 | pyasn1==0.4.8 102 | pyasn1-modules==0.2.8 103 | pycocotools==2.0.0 104 | Pygments==2.2.0 105 | PyInstaller==3.5 106 | pylint==2.4.4 107 | pyparsing==2.4.6 108 | pyrealsense2==2.31.0.1235 109 | python-dateutil==2.8.1 110 | python-qt-binding==0.4.0 111 | python-utils==2.3.0 112 | pytz==2019.3 113 | PyWavelets==1.1.1 114 | PyYAML==3.12 115 | pyzmq==16.0.2 116 | qt-dotgraph==0.4.0 117 | qt-gui==0.4.0 118 | qt-gui-cpp==0.4.0 119 | qt-gui-py-common==0.4.0 120 | requests==2.23.0 121 | requests-oauthlib==1.3.0 122 | resource-retriever==1.12.6 123 | rosbag==1.14.3 124 | rosboost-cfg==1.14.7 125 | rosclean==1.14.7 126 | roscreate==1.14.7 127 | rosgraph==1.14.3 128 | roslaunch==1.14.3 129 | roslib==1.14.7 130 | roslint==0.11.2 131 | roslz4==1.14.3 132 | rosmake==1.14.7 133 | rosmaster==1.14.3 134 | rosmsg==1.14.3 135 | rosnode==1.14.3 136 | rosparam==1.14.3 137 | rospkg==1.2.3 138 | rospy==1.14.3 139 | rosservice==1.14.3 140 | rostest==1.14.3 141 | rostopic==1.14.3 142 | rosunit==1.14.7 143 | roswtf==1.14.3 144 | rqt-action==0.4.9 145 | rqt-bag==0.4.12 146 | rqt-bag-plugins==0.4.12 147 | rqt-console==0.4.9 148 | rqt-dep==0.4.9 149 | rqt-graph==0.4.11 150 | rqt-gui==0.5.0 151 | rqt-gui-py==0.5.0 152 | rqt-image-view==0.4.14 153 | rqt-launch==0.4.8 154 | rqt-logger-level==0.4.8 155 | rqt-moveit==0.5.7 156 | rqt-msg==0.4.8 157 | rqt-nav-view==0.5.7 158 | rqt-plot==0.4.9 159 | rqt-pose-view==0.5.8 160 | rqt-publisher==0.4.8 161 | rqt-py-common==0.5.0 162 | rqt-py-console==0.4.8 163 | rqt-reconfigure==0.5.1 164 | rqt-robot-dashboard==0.5.7 165 | rqt-robot-monitor==0.5.9 166 | rqt-robot-steering==0.5.10 167 | rqt-runtime-monitor==0.5.7 168 | rqt-rviz==0.6.0 169 | rqt-service-caller==0.4.8 170 | rqt-shell==0.4.9 171 | rqt-srv==0.4.8 172 | rqt-tf-tree==0.6.0 173 | rqt-top==0.4.8 174 | rqt-topic==0.4.11 175 | rqt-web==0.4.8 176 | rsa==4.0 177 | rviz==1.13.7 178 | scikit-image==0.14.5 179 | scikit-learn==0.20.4 180 | scipy==1.1.0 181 | seaborn==0.9.0 182 | sensor-msgs==1.12.7 183 | simplegeneric==0.8.1 184 | singledispatch==3.4.0.3 185 | six==1.14.0 186 | smach==2.0.1 187 | smach-ros==2.0.1 188 | smclib==1.8.3 189 | tb-nightly==1.14.0a20190301 190 | tensorboard==2.0.2 191 | tensorflow==2.0.0a0 192 | tensorflow-estimator==1.14.0 193 | tensorflow-gpu==2.0.0 194 | termcolor==1.1.0 195 | terminado==0.6 196 | terminaltables==3.1.0 197 | testpath==0.3 198 | tf==1.12.0 199 | tf-conversions==1.12.0 200 | tf-estimator-nightly==1.14.0.dev2019030115 201 | tf2-geometry-msgs==0.6.5 202 | tf2-kdl==0.6.5 203 | tf2-py==0.6.5 204 | tf2-ros==0.6.5 205 | topic-tools==1.14.3 206 | torch==1.4.0 207 | torchvision==0.5.0 208 | tornado==4.5.2 209 | tqdm==4.43.0 210 | traitlets==4.3.2 211 | typed-ast==1.4.0 212 | urdfdom-py==0.4.2 213 | urllib3==1.25.8 214 | wcwidth==0.1.7 215 | Werkzeug==1.0.0 216 | wget==3.2 217 | widgetsnbextension==3.0.2 218 | wrapt==1.12.1 219 | xacro==1.13.5 220 | -------------------------------------------------------------------------------- /scripts/listener.py: -------------------------------------------------------------------------------- 1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python 2 | import os 3 | import sys 4 | import rospy 5 | from std_msgs.msg import String 6 | sys.path.append('/home/dongjai/catkin_ws/src/act_recognizer') 7 | from act_recognizer.msg import Persons 8 | from act_recognizer.msg import Person 9 | from act_recognizer.msg import ActElem 10 | 11 | def callback(data): 12 | rospy.logwarn(data.xyz_cls) 13 | #print(data.xyz_cls) 14 | 15 | def listener(): 16 | rospy.init_node('suber_xyzcls', anonymous=True) 17 | rospy.Subscriber('chatter', Person, callback) 18 | rospy.spin() 19 | 20 | if __name__ == '__main__': 21 | listener() 22 | 23 | 24 | 25 | # In ROS, nodes are uniquely named. If two nodes with the same 26 | # name are launched, the previous one is kicked off. The 27 | # anonymous=True flag means that rospy will choose a unique 28 | # name for our 'listener' node so that multiple listeners can 29 | # run simultaneously. 30 | -------------------------------------------------------------------------------- /scripts/talker.py: -------------------------------------------------------------------------------- 1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python 2 | 3 | import argparse 4 | import sys 5 | import time 6 | from threading import Thread 7 | from queue import Queue 8 | 9 | from tqdm import tqdm 10 | import cv2 11 | import rospy # if using ros, turn it on 12 | 13 | sys.path.append('/home/dongjai/catkin_ws/src/act_recognizer/src/') 14 | import core.utils as utils 15 | from Modules.camera import CamReader 16 | from Modules.imager import ImageReader 17 | from Modules.detection import DetectionLoader 18 | from Modules.poser import PoseLoader 19 | from Modules.midprocessor import MidProcessor 20 | from Modules.recognizer import ActionRecognizer 21 | from Modules.act_talker import Talker # using a ros publisher 22 | # from Modules.myserver import myServer # using a remote server 23 | 24 | def getTime(time1=0): 25 | if not time1: 26 | return time.time() 27 | else: 28 | interval = time.time() - time1 29 | return time.time(), interval 30 | 31 | def loop(): 32 | n = 0 33 | while True: 34 | yield n 35 | n += 1 36 | 37 | parser = argparse.ArgumentParser(description='Debug') 38 | ### for training set record 39 | parser.add_argument('--is_train', type=bool, default=False, 40 | help='is_train is True means write records from Openpose and Normalization into a txt file.') 41 | parser.add_argument('--training_set', type=str, default=r"/home/dongjai/action data set/dataset/twoperson/0walk.txt", 42 | help='txt file to store training set.') 43 | ### for inference 44 | parser.add_argument('--rgb_path', type=str, default=r"/home/dongjai/action data set/dataset/twoperson/4open/RGB", 45 | help='path to store rgb images') 46 | parser.add_argument('--dp_path', type=str, default=r"/home/dongjai/action data set/dataset/twoperson/4open/depth", 47 | help='path to store depth images') 48 | parser.add_argument('--cam_json', type=str, default=r"/home/dongjai/catkin_ws/src/act_recognizer/src/mediandensity.json", 49 | help='the camera setting json file path') 50 | parser.add_argument('--width', type=int, default=640, 51 | help='image width') 52 | parser.add_argument('--height', type=int, default=480, 53 | help='image height') 54 | parser.add_argument('--fps', type=int, default=30, 55 | help='camera fps') 56 | parser.add_argument('--sp', default=True, action='store_true', #多线程系统显存不足 57 | help='Use single process') 58 | parser.add_argument('--d435', type=bool, default=False, 59 | help='Using the D435 cam') 60 | parser.add_argument('--yolo_gpu_usages', type=float, default=0.4, 61 | help='restrict the gpu memory') 62 | parser.add_argument('--nn_pix', type=int, default=3, 63 | help='extend pixel for depth image') 64 | parser.add_argument('--kps_usage_num', type=int, default=5, 65 | help='the number of kps be used in recognizor') 66 | parser.add_argument('--kp_num', type=list, default=[0, 4, 7, 9, 12], 67 | help='the kps be used in recognizor') 68 | parser.add_argument('--yolo_weights', type=str, default=r"/home/dongjai/catkin_ws/src/act_recognizer/src/checkpoints/YOLO/yolov3.weights", 69 | help='yolov3 weights') 70 | parser.add_argument('--mlp_weights', type=str, default=r"/home/dongjai/catkin_ws/src/act_recognizer/src/checkpoints/new_mlp20_35_15.h5", 71 | help='mlp weights') 72 | parser.add_argument('--host', type=str, default='10.60.2.65', 73 | help='server_host') 74 | parser.add_argument('--port', type=int, default=10086, 75 | help='server_port') 76 | 77 | # args = parser.parse_args() 78 | args, unknown = parser.parse_known_args()#NOTE : using in ROS environment 79 | # label_dict = {0: "walk", 1: "stop", 2: "come", 3: "phone", 4: "open", 5: "stand"} 80 | label_list = ["walk", "stop", "come", "phone", "open", "stand"] 81 | #For data statstic 82 | walk_t, stand_t, come_t, stop_t, open_t, phone_t = 0,0,0,0,0,0 83 | last_action_name, action_stable_time, stable_action_name = None, None, None 84 | 85 | mid_proc_time, mlp_pred_time = 0, 0 86 | det_loader_put_time, det_time = 0, 0 87 | pose_loader_put_time, det_openpose_time = 0, 0 88 | 89 | #Start the Server to receive data from client 90 | # proc_server = myServer(args) 91 | # proc_server.start() 92 | 93 | #Start the Camera Thread 94 | # camera_reader = CamReader(args) 95 | # camera_reader.start() 96 | 97 | #Start the Image Reader Thread 98 | image_reader = ImageReader(args) 99 | image_reader.start() 100 | 101 | #Start the YOLO Detection Thread 102 | det_loader = DetectionLoader(args) 103 | det_loader.start() 104 | 105 | #Start the Openpose Estimation Thread 106 | pose_loader = PoseLoader(args) 107 | pose_loader.start() 108 | 109 | #Start the Middle Data Processing Thread 110 | mid_processor = MidProcessor(args) 111 | mid_processor.start() 112 | 113 | #Start the MLP Action Recognition Thread 114 | act_recognizer = ActionRecognizer(args) 115 | act_recognizer.start() 116 | 117 | #Start the ROS Talker Thread to publish recognition data 118 | ros_talker = Talker(args) 119 | ros_talker.start() 120 | 121 | if args.d435: 122 | print('Starting D435, press Ctrl + C to terminate...') 123 | sys.stdout.flush() 124 | im_names_desc = tqdm(loop()) 125 | else: 126 | # data_len = 2000 127 | data_len = image_reader.rgb_len 128 | im_names_desc = tqdm(range(data_len), dynamic_ncols=True)#终端输出 129 | 130 | try: 131 | for i in im_names_desc: 132 | pred_data = None 133 | start_time = getTime() 134 | 135 | # (rgb_name, rgb_nums, rgb_frame, dp_name, dp_nums, dp_frame) = proc_server.read_data() 136 | 137 | # (rgb_nums, rgb_frame) = camera_reader.read_rgb() 138 | (rgb_name, rgb_nums, rgb_frame) = image_reader.read_rgb() 139 | # ckpt_time, rgb_read_time = getTime(start_time) 140 | 141 | # (dp_nums, dp_frame) = camera_reader.read_dp() 142 | (dp_name, dp_nums, dp_frame) = image_reader.read_dp() 143 | ckpt_time, img_read_time = getTime(start_time) 144 | 145 | det_loader.put_image(rgb_nums, rgb_frame) 146 | ckpt_time, det_loader_put_time = getTime(ckpt_time) 147 | 148 | (det_nums, bboxes) = det_loader.read() 149 | ckpt_time, det_time = getTime(ckpt_time) 150 | 151 | pose_loader.put_image(rgb_nums, rgb_frame) 152 | ckpt_time, pose_loader_put_time = getTime(ckpt_time) 153 | 154 | (kp_nums, kps_list) = pose_loader.read() 155 | ckpt_time, det_openpose_time = getTime(ckpt_time) 156 | 157 | if kps_list and bboxes:# if detect some human in camera 158 | mid_processor.put_data(kp_nums, kps_list, dp_nums, dp_frame, det_nums, bboxes) 159 | ckpt_time, mid_proc_put_time = getTime(ckpt_time) 160 | 161 | (kp_nums, output) = mid_processor.read() 162 | ckpt_time, mid_proc_time = getTime(ckpt_time) 163 | # output = False #It is used to pass the following loop 164 | if output and not args.is_train: 165 | # print(output) 166 | act_recognizer.put_data(kp_nums, output) 167 | (out_nums, pred_data) = act_recognizer.read() 168 | ckpt_time, mlp_pred_time = getTime(ckpt_time) 169 | # cv2.imwrite("/home/dongjai/catkin_ws/src/act_recognizer/store/random/{}.png".format(rgb_nums), image) 170 | print(pred_data) 171 | #Using ROS talker to publish the information. 172 | ros_talker.put_data(pred_data) 173 | for i in pred_data: 174 | action_t = i[-1] 175 | if action_t == 0: 176 | walk_t +=1 177 | if action_t == 5: 178 | stand_t += 1 179 | if action_t == 1: 180 | stop_t +=1 181 | if action_t == 3: 182 | phone_t +=1 183 | if action_t == 2: 184 | come_t +=1 185 | if action_t == 4: 186 | open_t +=1 187 | else: 188 | pred_data = None 189 | # proc_server.get_data(bboxes, pred_data) 190 | image = utils.draw_bbox(rgb_frame, bboxes, pred=pred_data, classes=label_list) 191 | cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) 192 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 193 | cv2.imshow("result", image) 194 | # cv2.imwrite("/home/dongjai/catkin_ws/src/act_recognizer/store/2come/{}.png".format(rgb_nums), image) 195 | if cv2.waitKey(1) & 0xFF == ord('q'): 196 | break 197 | # im_names_desc.set_description( 198 | # 'img time: {img:.4f} | det put: {put:.4f} | det: {det:.4f} | opt: {opt:.4f} | mid: {mid:.4f} | mlp: {mlp:.4f}'.format( 199 | # img=img_read_time, put=det_loader_put_time, det=det_time, opt=det_openpose_time, mid=mid_proc_time, mlp=mlp_pred_time) 200 | # ) 201 | print("walk: {}, stand: {}, stop: {}, come:{}, open:{}, phone:{}".format(walk_t, stand_t, stop_t, come_t, open_t, phone_t)) 202 | # proc_server.stop() 203 | image_reader.stop() 204 | # camera_reader.stop() 205 | det_loader.stop() 206 | pose_loader.stop() 207 | mid_processor.stop() 208 | act_recognizer.stop() 209 | ros_talker.stop() 210 | except KeyboardInterrupt: 211 | print("walk: {}, stand: {}, stop: {}, come:{}, open:{}, phone:{}".format(walk_t, stand_t, stop_t, come_t, open_t, phone_t)) 212 | # proc_server.stop() 213 | image_reader.stop() 214 | # camera_reader.stop() 215 | det_loader.stop() 216 | pose_loader.stop() 217 | mid_processor.stop() 218 | act_recognizer.stop() 219 | ros_talker.stop() 220 | 221 | 222 | 223 | 224 | 225 | -------------------------------------------------------------------------------- /scripts/testing.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | import time 4 | from threading import Thread 5 | from queue import Queue 6 | 7 | from tqdm import tqdm 8 | import cv2 9 | # import rospy 10 | 11 | sys.path.append('/home/dongjai/catkin_ws/src/act_recognizer/src/') 12 | import core.utils as utils 13 | from Modules.camera import CamReader 14 | from Modules.imager import ImageReader 15 | from Modules.detection import DetectionLoader 16 | from Modules.poser import PoseLoader 17 | from Modules.midprocessor import MidProcessor 18 | from Modules.recognizer import ActionRecognizer 19 | 20 | def getTime(time1=0): 21 | if not time1: 22 | return time.time() 23 | else: 24 | interval = time.time() - time1 25 | return time.time(), interval 26 | 27 | def loop(): 28 | n = 0 29 | while True: 30 | yield n 31 | n += 1 32 | 33 | parser = argparse.ArgumentParser(description='Debug') 34 | parser.add_argument('--rgb_path', type=str, default=r"/home/dongjai/action data set/dataset/multiperson/2come/RGB", 35 | help='path to store rgb images') 36 | parser.add_argument('--dp_path', type=str, default=r"/home/dongjai/action data set/dataset/multiperson/2come/depth", 37 | help='path to store depth images') 38 | parser.add_argument('--cam_json', type=str, default=r"/home/dongjai/catkin_ws/src/act_recognizer/src/mediandensity.json", 39 | help='the camera setting json file path') 40 | parser.add_argument('--width', type=int, default=640, 41 | help='image width') 42 | parser.add_argument('--height', type=int, default=480, 43 | help='image height') 44 | parser.add_argument('--fps', type=int, default=30, 45 | help='camera fps') 46 | parser.add_argument('--sp', default=True, action='store_true', #多线程系统显存不足 47 | help='Use single process') 48 | parser.add_argument('--d435', type=bool, default=True, 49 | help='Using the D435 cam') 50 | parser.add_argument('--yolo_gpu_usages', type=float, default=0.4, 51 | help='restrict the gpu memory') 52 | parser.add_argument('--nn_pix', type=int, default=3, 53 | help='extend pixel for depth image') 54 | parser.add_argument('--kps_usage_num', type=int, default=5, 55 | help='the number of kps be used in recognizor') 56 | 57 | args = parser.parse_args() 58 | # label_dict = {0: "walk", 1: "stop", 2: "come", 3: "phone", 4: "open", 5: "stand"} 59 | label_list = ["walk", "stop", "come", "phone", "open", "stand"] 60 | walk_t, stand_t, come_t, stop_t, open_t, phone_t = 0,0,0,0,0,0 61 | last_action_name, action_stable_time, stable_action_name = None, None, None 62 | 63 | mid_proc_time, mlp_pred_time = 0, 0 64 | det_loader_put_time, det_time = 0, 0 65 | pose_loader_put_time, det_openpose_time = 0, 0 66 | 67 | det_loader = DetectionLoader(args) 68 | det_loader.start() 69 | 70 | camera_reader = CamReader(args) 71 | camera_reader.start() 72 | 73 | if args.d435: 74 | print('Starting D435, press Ctrl + C to terminate...') 75 | sys.stdout.flush() 76 | im_names_desc = tqdm(loop()) 77 | else: 78 | data_len = image_reader.rgb_len 79 | im_names_desc = tqdm(range(data_len), dynamic_ncols=True)#终端输出 80 | 81 | try: 82 | for i in im_names_desc: 83 | (rgb_name, rgb_nums, rgb_frame) = camera_reader.read_rgb() 84 | (dp_name, dp_nums, dp_frame) = camera_reader.read_dp() 85 | 86 | det_loader.put_image(rgb_name, rgb_nums, rgb_frame) 87 | (rgb_name, det_nums, bboxes) = det_loader.read() 88 | 89 | image = rgb_frame 90 | cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) 91 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 92 | cv2.imshow("result", image) 93 | if cv2.waitKey(1) & 0xFF == ord('q'): 94 | break 95 | camera_reader.stop() 96 | det_loader.stop() 97 | except KeyboardInterrupt: 98 | # print("walk: {}, stand: {}, stop: {}, come:{}, open:{}, phone:{}".format(walk_t, stand_t, stop_t, come_t, open_t, phone_t)) 99 | # image_reader.stop() 100 | camera_reader.stop() 101 | det_loader.stop() 102 | # pose_loader.stop() 103 | # mid_processor.stop() 104 | # act_recognizer.stop() -------------------------------------------------------------------------------- /src/Modules/act_talker.py: -------------------------------------------------------------------------------- 1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python 2 | import os 3 | import sys 4 | from threading import Thread 5 | from queue import Queue 6 | 7 | import rospy 8 | from std_msgs.msg import String 9 | sys.path.append('/home/dongjai/catkin_ws/src/act_recognizer') 10 | from act_recognizer.msg import Person 11 | from act_recognizer.msg import ActElem 12 | # from act_recognizer.msg import Persons 13 | 14 | import cv2 15 | import scipy.misc 16 | import numpy as np 17 | 18 | class Talker(): 19 | def __init__(self, opt, queueSize=128): 20 | self.opt = opt 21 | self._talker, self._talk_rate = self._init_talker() 22 | 23 | if opt.sp: 24 | self.data_queue = Queue(maxsize=queueSize) 25 | self.talk_queue = Queue(maxsize=queueSize) 26 | else: 27 | self.data_queue = mp.Queue(maxsize=queueSize) 28 | self.talk_queue = mp.Queue(maxsize=queueSize) 29 | 30 | def _init_talker(self): 31 | print("Initializing ROS Talker......") 32 | pub = rospy.Publisher('chatter', Person, queue_size=10) 33 | rospy.init_node('puber_xyzcls', anonymous=True) 34 | rate = rospy.Rate(10) # 10hz 35 | print("Successfully Initialized ROS Talker") 36 | return pub, rate 37 | 38 | def start_worker(self, target): 39 | if self.opt.sp: 40 | p = Thread(target=target, args=(), kwargs={})#target是該線程需要完成的方法函數 41 | # else: 42 | p.start() 43 | return p 44 | 45 | def start(self): 46 | self.talker_worker = self.start_worker(self.talker_process) 47 | return self 48 | 49 | def stop(self): 50 | self.talker_worker.join() 51 | self.clear_queues() 52 | 53 | def clear_queues(self): 54 | self.clear(self.talk_queue) 55 | 56 | def clear(self, queue): 57 | while not queue.empty(): 58 | queue.get() 59 | 60 | def wait_and_put(self, queue, item): 61 | queue.put(item) 62 | 63 | def wait_and_get(self, queue): 64 | return queue.get() 65 | 66 | def talker_process(self, **kargcs): 67 | while not rospy.is_shutdown(): 68 | pred_data_list = self.wait_and_get(self.data_queue) 69 | person_msg = self.persondata_to_msg(pred_data_list) 70 | 71 | # hello_str = "hello world %s" % rospy.get_time() 72 | # rospy.loginfo(person_msg) 73 | self._talker.publish(person_msg) 74 | self._talk_rate.sleep() 75 | 76 | def persondata_to_msg(self, pred_data_list): 77 | # persons = Persons() 78 | person = Person() 79 | for i in pred_data_list: 80 | act_elem = ActElem() 81 | act_elem.x = i[0][0] 82 | act_elem.y = i[0][1] 83 | act_elem.z = i[0][2] 84 | act_elem.act_cls = i[-1] 85 | person.xyz_cls.append(act_elem) 86 | # persons.persons.append(person) 87 | # person.xyz_cls.clear() 88 | return person 89 | 90 | def put_data(self, pred_data_list): 91 | self.wait_and_put(self.data_queue, (pred_data_list)) 92 | 93 | -------------------------------------------------------------------------------- /src/Modules/camera.py: -------------------------------------------------------------------------------- 1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python 2 | import os 3 | import sys 4 | import time 5 | from threading import Thread 6 | from queue import Queue 7 | 8 | import cv2 9 | import scipy.misc 10 | import numpy as np 11 | 12 | from core import camera as cam 13 | import pyrealsense2 as rs 14 | import json 15 | 16 | class CamReader(): 17 | def __init__(self, opt, queueSize=10): 18 | self.opt = opt 19 | self.json_file_path = opt.cam_json 20 | self.width = opt.width 21 | self.height = opt.height 22 | self.fps = opt.fps 23 | 24 | if opt.sp:#单线程 25 | self.rgb_queue = Queue(maxsize=queueSize) 26 | self.dp_queue = Queue(maxsize=queueSize) 27 | 28 | def start_worker(self, target): 29 | if self.opt.sp: 30 | p = Thread(target=target, args=(), kwargs={})#target是該線程需要完成的方法函數 31 | # else: 32 | p.start() 33 | return p 34 | 35 | def start(self): 36 | self.cam_worker = self.start_worker(self.cam_reader_process) 37 | return self 38 | 39 | def stop(self): 40 | self.cam_worker.join() 41 | self.clear_queues() 42 | 43 | def clear_queues(self): 44 | self.clear(self.rgb_queue) 45 | self.clear(self.dp_queue) 46 | 47 | def clear(self, queue): 48 | while not queue.empty(): 49 | queue.get() 50 | 51 | def wait_and_put(self, queue, item): 52 | queue.put(item) 53 | 54 | def wait_and_get(self, queue): 55 | return queue.get() 56 | 57 | def cam_reader_process(self, **kargcs): 58 | print("Loading Intel Realsense D435......") 59 | try: 60 | print(self.json_file_path) 61 | cam.try_to_advanced_mode(self.json_file_path) 62 | except Exception as e: 63 | print(e) 64 | pass 65 | # Configure depth and color streams 66 | pipeline = rs.pipeline() 67 | config = rs.config() 68 | config.enable_stream(rs.stream.depth, self.width, self.height, rs.format.z16, self.fps) 69 | config.enable_stream(rs.stream.color, self.width, self.height, rs.format.bgr8, self.fps) 70 | # Start streaming 71 | pipeline.start(config) 72 | cam.wait4cam_init(pipeline, WaitTimes=10) #等待相机初始化调节自动曝光 73 | #align 74 | align_to = rs.stream.color 75 | align = rs.align(align_to) 76 | print("Camera Starting") 77 | frame_num = 0 78 | while True: 79 | frames = pipeline.wait_for_frames() 80 | aligned_frames = align.process(frames)# Align the depth frame to color frame 81 | aligned_depth_frame = aligned_frames.get_depth_frame()# Get aligned depth and color frames 82 | color_frame = aligned_frames.get_color_frame() 83 | if not aligned_depth_frame or not color_frame:#只有一张rgb和depth不对应的话就舍去 84 | continue 85 | # dp_frame = np.asanyarray(aligned_depth_frame.get_data()) #16bit 86 | frame = np.asanyarray(color_frame.get_data()) 87 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 88 | self.wait_and_put(self.rgb_queue, (frame_num, frame)) 89 | self.wait_and_put(self.dp_queue, (frame_num, aligned_depth_frame)) 90 | frame_num += 1 91 | 92 | def read_rgb(self): 93 | return self.wait_and_get(self.rgb_queue) 94 | 95 | def read_dp(self): 96 | return self.wait_and_get(self.dp_queue) -------------------------------------------------------------------------------- /src/Modules/detection.py: -------------------------------------------------------------------------------- 1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python 2 | import os 3 | from threading import Thread 4 | from queue import Queue 5 | 6 | import cv2 7 | import scipy.misc 8 | import numpy as np 9 | import tensorflow as tf 10 | 11 | import sys 12 | sys.path.append('..') 13 | from core.yolov3 import YOLOv3, decode 14 | import core.utils as utils 15 | import core.operation as operation 16 | 17 | class DetectionLoader(): 18 | def __init__(self, opt, queueSize=128): 19 | self.opt = opt 20 | self._num_classes = 80 21 | self._input_size = 416 22 | self._gpu_usages = opt.yolo_gpu_usages#0.4 23 | self.detector = self._load_detector() 24 | 25 | # initialize the queue used to store data 26 | """ 27 | image_queue: the buffer storing pre-processed images for object detection 28 | det_queue: the buffer storing human detection results 29 | pose_queue: the buffer storing post-processed cropped human image for pose estimation 30 | """ 31 | if opt.sp: 32 | self._stopped = False 33 | self.image_queue = Queue(maxsize=queueSize)#input queue 34 | self.det_queue = Queue(maxsize=queueSize) 35 | else: 36 | self._stopped = mp.Value('b', False) 37 | self.image_queue = mp.Queue(maxsize=queueSize) 38 | self.det_queue = mp.Queue(maxsize=queueSize) 39 | 40 | def _load_detector(self): 41 | print("Loading YOLO Model......") 42 | config_tf = tf.compat.v1.ConfigProto() 43 | config_tf.gpu_options.per_process_gpu_memory_fraction = self._gpu_usages 44 | tf.compat.v1.Session(config=config_tf) 45 | input_layer = tf.keras.layers.Input([self._input_size, self._input_size, 3]) 46 | feature_maps = YOLOv3(input_layer) 47 | bbox_tensors = [] 48 | for i, fm in enumerate(feature_maps): 49 | bbox_tensor = decode(fm, i) 50 | bbox_tensors.append(bbox_tensor) 51 | detector = tf.keras.Model(input_layer, bbox_tensors) 52 | utils.load_weights(detector, self.opt.yolo_weights) 53 | # detector.summary() 54 | print("Successfully Loading YOLO Model") 55 | return detector 56 | 57 | def start_worker(self, target): 58 | if self.opt.sp: 59 | p = Thread(target=target)#target是該線程需要完成的方法函數 60 | # else: 61 | p.start() 62 | return p 63 | 64 | def start(self): 65 | self.detection_worker = self.start_worker(self.detection_process) 66 | return self 67 | 68 | def stop(self): 69 | self.detection_worker.join() 70 | self.clear_queues() 71 | 72 | def clear_queues(self): 73 | self.clear(self.image_queue) 74 | self.clear(self.det_queue) 75 | 76 | def clear(self, queue): 77 | while not queue.empty(): 78 | queue.get() 79 | 80 | def wait_and_put(self, queue, item): 81 | queue.put(item) 82 | 83 | def wait_and_get(self, queue): 84 | return queue.get() 85 | 86 | def detection_process(self , **kargcs): 87 | while True: 88 | (nums, frame) = self.wait_and_get(self.image_queue) 89 | frame_size = frame.shape[:2] 90 | image_data = utils.image_preporcess(np.copy(frame), [self._input_size, self._input_size])#裁剪图片成416x416 91 | image_data = image_data[np.newaxis, ...].astype(np.float32)#转换成32位浮点数格式 92 | 93 | pred_bbox = self.detector.predict(image_data) 94 | pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] 95 | pred_bbox = tf.concat(pred_bbox, axis=0) 96 | 97 | bboxes = utils.postprocess_boxes(pred_bbox, frame_size, self._input_size, 0.3)#对预测出来的bouding box作后处理 98 | bboxes = utils.nms(bboxes, 0.45, method='nms')#非极大抑制nms选择概括性最高的一个框 99 | 100 | only_one_bboxes = operation.detect_only_one(bboxes, IDi=0)#只输出一个类别 101 | self.wait_and_put(self.det_queue, (nums, only_one_bboxes)) 102 | 103 | def put_image(self, nums, frame): 104 | self.wait_and_put(self.image_queue, (nums, frame)) 105 | 106 | def read(self): 107 | return self.wait_and_get(self.det_queue) 108 | -------------------------------------------------------------------------------- /src/Modules/imager.py: -------------------------------------------------------------------------------- 1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python 2 | import os 3 | import time 4 | from threading import Thread 5 | from queue import Queue 6 | 7 | import cv2 8 | import scipy.misc 9 | import numpy as np 10 | 11 | class ImageReader(): 12 | def __init__(self, opt, queueSize=128): 13 | self.opt = opt 14 | self.rgb_path = opt.rgb_path 15 | self.dp_path = opt.dp_path 16 | 17 | for root, dirs, files in os.walk(self.rgb_path): 18 | self.rgb_len = len(files) 19 | for root, dirs, files in os.walk(self.dp_path): 20 | self.dp_len = len(files) 21 | assert self.rgb_len == self.dp_len, "RGB和深度图数量不相等" 22 | 23 | if opt.sp:#单线程 24 | self.rgb_queue = Queue(maxsize=queueSize) 25 | self.dp_queue = Queue(maxsize=queueSize) 26 | 27 | def start_worker(self, target, path): 28 | if self.opt.sp: 29 | p = Thread(target=target, args=(), kwargs={"num": 0, "path": path})#target是該線程需要完成的方法函數 30 | # else: 31 | p.start() 32 | return p 33 | 34 | def start(self): 35 | self.rgb_image_worker = self.start_worker(self.rgb_reader_process, path=self.rgb_path) 36 | self.dp_image_worker = self.start_worker(self.dp_reader_process, path=self.dp_path) 37 | return self 38 | 39 | def stop(self): 40 | self.rgb_image_worker.join() 41 | self.dp_image_worker.join() 42 | self.clear_queues() 43 | 44 | def clear_queues(self): 45 | self.clear(self.rgb_queue) 46 | self.clear(self.dp_queue) 47 | 48 | def clear(self, queue): 49 | while not queue.empty(): 50 | queue.get() 51 | 52 | def wait_and_put(self, queue, item): 53 | queue.put(item) 54 | 55 | def wait_and_get(self, queue): 56 | return queue.get() 57 | 58 | def rgb_reader_process(self, **kargcs): 59 | while kargcs["num"] < self.rgb_len: 60 | rgb_name = "color_" + str(kargcs["num"]) + ".png" 61 | rgb_file = kargcs["path"] + "/" + rgb_name 62 | # print(rgb_file) 63 | frame = cv2.imread(rgb_file, -1) 64 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 65 | self.wait_and_put(self.rgb_queue, (rgb_name, kargcs["num"], frame)) 66 | kargcs["num"] += 1 67 | 68 | def dp_reader_process(self, **kargcs): 69 | while kargcs["num"] < self.dp_len: 70 | dp_name = "depth_8bit_" + str(kargcs["num"]) + ".png" 71 | dp_file = kargcs["path"] + "/" + dp_name 72 | frame = cv2.imread(dp_file, -1) 73 | self.wait_and_put(self.dp_queue, (dp_name, kargcs["num"], frame)) 74 | kargcs["num"] += 1 75 | 76 | def read_rgb(self): 77 | return self.wait_and_get(self.rgb_queue) 78 | 79 | def read_dp(self): 80 | return self.wait_and_get(self.dp_queue) 81 | 82 | 83 | -------------------------------------------------------------------------------- /src/Modules/midprocessor.py: -------------------------------------------------------------------------------- 1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python 2 | import os 3 | import sys 4 | import time 5 | from sys import platform 6 | from threading import Thread 7 | from queue import Queue 8 | 9 | import cv2 10 | import scipy.misc 11 | import numpy as np 12 | 13 | sys.path.append('..') 14 | import core.utils as utils 15 | import core.operation as operation 16 | 17 | class MidProcessor(): 18 | def __init__(self, opt, queueSize=128): 19 | self.opt = opt 20 | self.nn_pix = opt.nn_pix 21 | self.is_train = opt.is_train 22 | self.training_set = opt.training_set 23 | 24 | if opt.sp:#单线程 25 | self.data_queue = Queue(maxsize=queueSize) 26 | self.fuse_queue = Queue(maxsize=queueSize) 27 | 28 | def start_worker(self, target): 29 | if self.opt.sp: 30 | p = Thread(target=target, args=(), kwargs={})#target是該線程需要完成的方法函數 31 | # else: 32 | p.start() 33 | return p 34 | 35 | def start(self): 36 | self.fuse_worker = self.start_worker(self.fuse_process) 37 | return self 38 | 39 | def stop(self): 40 | self.fuse_worker.join() 41 | self.clear_queues() 42 | 43 | def clear_queues(self): 44 | self.clear(self.data_queue) 45 | self.clear(self.fuse_queue) 46 | 47 | def clear(self, queue): 48 | while not queue.empty(): 49 | queue.get() 50 | 51 | def wait_and_put(self, queue, item): 52 | queue.put(item) 53 | 54 | def wait_and_get(self, queue): 55 | return queue.get() 56 | 57 | def fuse_process(self, **kargcs): 58 | while True: 59 | (kp_nums, kps_list, dp_nums, dp_frame, det_nums, bboxes) = self.wait_and_get(self.data_queue) 60 | # print(kp_nums, dp_nums, det_nums) 61 | assert (kp_nums == dp_nums and dp_nums == det_nums), "队列数据顺序不匹配" 62 | # print("kp list: ",kps_list) 63 | # print("bboxes: ",bboxes) 64 | _flag = self._filter_error(kps_list, bboxes) 65 | if not _flag or len(kps_list) != len(bboxes): 66 | print("error") 67 | self.wait_and_put(self.fuse_queue, ((kp_nums, None))) 68 | else: 69 | output = [] 70 | for kp_list in kps_list: 71 | #[x, y, z]获得5个点映射到原图frame的3维坐标(x, y, z),和附近的几个点 72 | if self.opt.d435: 73 | dp_nparray = operation.get_keypoints_xyz(kp_list=kp_list, nn_pix=self.nn_pix, live=True, live_frame=dp_frame) 74 | else: 75 | dp_nparray = operation.get_keypoints_xyz(kp_list=kp_list, depth_frame=dp_frame, nn_pix=self.nn_pix) 76 | #求平均值替代中心点深度值 77 | dp_list = operation.get_mean_dep_val_xyz(kp_list=kp_list, dp_nparray=dp_nparray) 78 | #坐标归一化 79 | dp_array_out = operation.norm_keypoints_xyz(dp_list, norm_p_index=0, rgb_size=[640, 480]) 80 | #坐标平移,让坐标值都变成非负数 81 | dp_array_no_neg = operation.no_negative(array=dp_array_out) 82 | #将小于0.001的数字认为是0 83 | dp_array_no_neg_zero = operation.filter_zero(dp_array_no_neg, threadhold=0.001) 84 | if self.is_train: 85 | file_txt = open(self.training_set, 'a')#根据不同的类别修改txt的文件命名前缀 86 | file_txt.write(str(dp_array_no_neg)) 87 | file_txt.write('\n') 88 | file_txt.close() 89 | dp_array_no_neg_zero = np.expand_dims(dp_array_no_neg_zero, axis=-1) 90 | # dp_array_no_neg_zero = np.expand_dims(dp_array_no_neg_zero, axis=0) 91 | # print(dp_array_no_neg_zero) 92 | 93 | output.append([dp_list[0], dp_array_no_neg_zero])#kp_list[0] means the nose joint of body 94 | self.wait_and_put(self.fuse_queue, (kp_nums, output)) 95 | 96 | def _filter_error(self, kps_list, bboxes): 97 | kps_len = len(kps_list) 98 | bboxes_len = len(bboxes) 99 | if not kps_len == bboxes_len: 100 | print("目标检测和姿态估计人数结果不匹配") 101 | return False 102 | kps_cet, bboxes_cet = [], [] 103 | kps_array = np.asarray(kps_list) 104 | for i in bboxes: 105 | x_min, y_min = i[0], i[1] 106 | x_max, y_max = i[2], i[3] 107 | _j = 0 108 | for j in kps_array: 109 | # print("J:",j) 110 | for p in j: 111 | # print("P: ",p) 112 | if p[0] >= x_min and p[0] <= x_max: 113 | if p[1] >= y_min and p[1] <= y_max: 114 | _j += 1 115 | # print("-j: ", _j) 116 | if _j >= 10:#too many kps in one bbox, error occur in openpose 117 | print("too many kps in one bbox, error occur in openpose") 118 | return False 119 | break 120 | return True 121 | 122 | def filter_IoU_error(self, kps_list, bboxes): 123 | ''' 124 | bboxes:[x_min,y_min,x_max,y_max,prob,cls] 125 | ''' 126 | kps_len = len(kps_list) 127 | bboxes_len = len(bboxes) 128 | assert kps_len == bboxes_len, "目标检测和姿态估计人数结果不匹配" 129 | for i in kps_list: 130 | for j in i: 131 | if j[0] == 0 or j[1] == 0: 132 | i.remove(j) 133 | kps_box = [] 134 | kps_array = np.asarray(kps_list) 135 | for i in kps_array: 136 | x_min, y_min = np.amin(i, 0) 137 | x_max, y_max = np.amax(i, 0) 138 | # x_cet = x_min + int((x_max - x_min) / 2) 139 | # y_cet = y_min + int((y_max - y_min) / 2) 140 | kps_box.append([x_min, y_min, x_max, y_max]) 141 | print("kps_box: ",kps_box) 142 | _t = 0 143 | for i in kps_box: 144 | for j in bboxes: 145 | print(j[:4]) 146 | _IoU = self.IOU(i, j[:4]) 147 | print(_IoU) 148 | if _IoU > 0.4: 149 | _t += 1 150 | if _t != len(kps_box): 151 | print("出现错误, 跳过一帧") 152 | # else 153 | pass 154 | 155 | def IOU(self, box1, box2): 156 | ''' 157 | :param box1:[x1,y1,x2,y2] 左上角的坐标与右下角的坐标 158 | :param box2:[x1,y1,x2,y2] 159 | :return: iou_ratio--交并比 160 | ''' 161 | width1 = abs(box1[2] - box1[0]) 162 | height1 = abs(box1[1] - box1[3]) # 这里y1-y2是因为一般情况y1>y2,为了方便采用绝对值 163 | width2 = abs(box2[2] - box2[0]) 164 | height2 = abs(box2[1] - box2[3]) 165 | x_max = max(box1[0],box1[2],box2[0],box2[2]) 166 | y_max = max(box1[1],box1[3],box2[1],box2[3]) 167 | x_min = min(box1[0],box1[2],box2[0],box2[2]) 168 | y_min = min(box1[1],box1[3],box2[1],box2[3]) 169 | iou_width = x_min + width1 + width2 - x_max 170 | iou_height = y_min + height1 + height2 - y_max 171 | if iou_width <= 0 or iou_height <= 0: 172 | iou_ratio = 0 173 | else: 174 | iou_area = iou_width * iou_height # 交集的面积 175 | box1_area = width1 * height1 176 | box2_area = width2 * height2 177 | iou_ratio = iou_area / (box1_area + box2_area - iou_area) # 并集的面积 178 | return iou_ratio 179 | 180 | def put_data(self, kp_nums, kp_list, dp_nums, dp_frame, det_nums, bboxes): 181 | self.wait_and_put(self.data_queue, (kp_nums, kp_list, dp_nums, dp_frame, det_nums, bboxes)) 182 | 183 | def read(self): 184 | return self.wait_and_get(self.fuse_queue) 185 | -------------------------------------------------------------------------------- /src/Modules/myserver.py: -------------------------------------------------------------------------------- 1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python 2 | import os 3 | import sys 4 | from threading import Thread 5 | from queue import Queue 6 | import pickle 7 | 8 | 9 | class myServer(): 10 | def __init__(self, opt, queueSize=128): 11 | self.opt = opt 12 | self.host = opt.host 13 | self.port = opt.port 14 | from multiprocessing.connection import Listener 15 | server_sock = Listener((self.host, self.port)) 16 | 17 | self.conn = server_sock.accept() 18 | print('Server Listening') 19 | 20 | if opt.sp: 21 | self.data_queue = Queue(maxsize=queueSize) 22 | self.outptu_queue = Queue(maxsize=queueSize) 23 | 24 | def start_worker(self, target): 25 | if self.opt.sp: 26 | p = Thread(target=target, args=(), kwargs={})#target是該線程需要完成的方法函數 27 | # else: 28 | p.start() 29 | return p 30 | 31 | def start(self): 32 | self.server_worker = self.start_worker(self.frame_reader_process) 33 | self.data_send_worker = self.start_worker(self.data_send_process) 34 | return self 35 | 36 | def stop(self): 37 | self.server_worker.join() 38 | self.data_send_worker.join() 39 | self.clear_queues() 40 | 41 | def clear_queues(self): 42 | self.clear(self.data_queue) 43 | 44 | def clear(self, queue): 45 | while not queue.empty(): 46 | queue.get() 47 | 48 | def wait_and_put(self, queue, item): 49 | queue.put(item) 50 | 51 | def wait_and_get(self, queue): 52 | return queue.get() 53 | 54 | def frame_reader_process(self, **kargcs): 55 | while True: 56 | data_bytes = self.conn.recv() 57 | data = pickle.loads(data_bytes) 58 | # print('Received:', type(data)) 59 | self.wait_and_put(self.data_queue, data) 60 | 61 | def data_send_process(self): 62 | while True: 63 | (bboxes, pred_data) = self.wait_and_get(self.outptu_queue) 64 | pred_data_bytes = pickle.dumps((bboxes, pred_data)) 65 | self.conn.send(pred_data_bytes) 66 | 67 | def read_data(self): 68 | return self.wait_and_get(self.data_queue) 69 | 70 | def get_data(self, bboxes, pred_data): 71 | self.wait_and_put(self.outptu_queue, (bboxes, pred_data)) -------------------------------------------------------------------------------- /src/Modules/poser.py: -------------------------------------------------------------------------------- 1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python 2 | import os 3 | import sys 4 | from sys import platform 5 | from threading import Thread 6 | from queue import Queue 7 | 8 | import cv2 9 | import scipy.misc 10 | import numpy as np 11 | 12 | sys.path.append('..') 13 | import core.utils as utils 14 | import core.operation as operation 15 | 16 | sys.path.append('/home/dongjai/openpose/build/python') 17 | from openpose import pyopenpose as op 18 | 19 | class PoseLoader(): 20 | def __init__(self, opt, queueSize=128): 21 | self.opt = opt 22 | self.kp_num = opt.kp_num 23 | self._params = dict() 24 | self._params["model_folder"] = "/home/dongjai/openpose/models/" 25 | self._params["model_pose"] = "COCO" 26 | self._params["face"] = False 27 | self._params["hand"] = False 28 | self.pose_estimator = self._load_estimator() 29 | 30 | if opt.sp: 31 | self._stopped = False 32 | self.image_queue = Queue(maxsize=queueSize)#input queue 33 | self.pose_queue = Queue(maxsize=queueSize) 34 | else: 35 | self._stopped = mp.Value('b', False) 36 | self.image_queue = mp.Queue(maxsize=queueSize) 37 | self.pose_queue = mp.Queue(maxsize=queueSize) 38 | 39 | def _load_estimator(self): 40 | print("Loading Openpose......") 41 | self._opWrapper = op.WrapperPython() 42 | self._opWrapper.configure(self._params) 43 | self._opWrapper.start() 44 | 45 | datum = op.Datum() 46 | print("Successfully Loading Openpose") 47 | return datum 48 | 49 | def start_worker(self, target): 50 | if self.opt.sp: 51 | p = Thread(target=target, args=(), kwargs={})#target是該線程需要完成的方法函數 52 | # else: 53 | p.start() 54 | return p 55 | 56 | def start(self): 57 | self.estimator_worker = self.start_worker(self.estimator_process) 58 | return self 59 | 60 | def stop(self): 61 | self.estimator_worker.join() 62 | self.clear_queues() 63 | 64 | def clear_queues(self): 65 | self.clear(self.image_queue) 66 | self.clear(self.pose_queue) 67 | 68 | def clear(self, queue): 69 | while not queue.empty(): 70 | queue.get() 71 | 72 | def wait_and_put(self, queue, item): 73 | queue.put(item) 74 | 75 | def wait_and_get(self, queue): 76 | return queue.get() 77 | 78 | def estimator_process(self, **kargcs): 79 | while True: 80 | (nums, frame) = self.wait_and_get(self.image_queue) 81 | self.pose_estimator.cvInputData = frame 82 | self._opWrapper.emplaceAndPop([self.pose_estimator]) 83 | if not operation.is_get_keypoints(self.pose_estimator): 84 | print("No keypoints") 85 | self.wait_and_put(self.pose_queue, (nums, None)) 86 | else: 87 | kps_list = operation.get_keypoints_xy(datum=self.pose_estimator, kp_num=self.kp_num) 88 | self.wait_and_put(self.pose_queue, (nums, kps_list)) 89 | 90 | def put_image(self, nums, frame): 91 | self.wait_and_put(self.image_queue, (nums, frame)) 92 | 93 | def read(self): 94 | return self.wait_and_get(self.pose_queue) 95 | -------------------------------------------------------------------------------- /src/Modules/recognizer.py: -------------------------------------------------------------------------------- 1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python 2 | import sys 3 | from threading import Thread 4 | from queue import Queue 5 | 6 | import cv2 7 | import scipy.misc 8 | import numpy as np 9 | import tensorflow as tf 10 | 11 | sys.path.append('..') 12 | import core.utils as utils 13 | import core.operation as operation 14 | 15 | class ActionRecognizer(): 16 | def __init__(self, opt, queueSize=128): 17 | self.opt = opt 18 | self.kps_size = opt.kps_usage_num 19 | self.mlp_weights = opt.mlp_weights 20 | 21 | self.act_recognizer = self._load_recognizer() 22 | 23 | if opt.sp: 24 | self._stopped = False 25 | self.data_queue = Queue(maxsize=queueSize)#input queue 26 | self.act_queue = Queue(maxsize=queueSize) 27 | else: 28 | self._stopped = mp.Value('b', False) 29 | self.data_queue = mp.Queue(maxsize=queueSize) 30 | self.act_queue = mp.Queue(maxsize=queueSize) 31 | 32 | def _load_recognizer(self): 33 | print("Loading Recognizer......") 34 | mlp_inputs = tf.keras.Input(shape=(3, self.kps_size, 1)) 35 | mlp_outputs = operation.my_mlp(mlp_inputs) 36 | mlp_model = tf.keras.Model(inputs=mlp_inputs, outputs=mlp_outputs) 37 | mlp_model.compile( 38 | optimizer = tf.keras.optimizers.Adam(learning_rate=0.001), 39 | loss = tf.keras.losses.sparse_categorical_crossentropy, 40 | metrics = [tf.keras.metrics.sparse_categorical_accuracy] 41 | ) 42 | mlp_model.load_weights(self.mlp_weights) 43 | print("Successfully Loading Recognizer") 44 | return mlp_model 45 | 46 | def start_worker(self, target): 47 | if self.opt.sp: 48 | p = Thread(target=target, args=(), kwargs={})#target是該線程需要完成的方法函數 49 | # else: 50 | p.start() 51 | return p 52 | 53 | def start(self): 54 | self.recognizer_worker = self.start_worker(self.recognizer_process) 55 | return self 56 | 57 | def stop(self): 58 | self.recognizer_worker.join() 59 | self.clear_queues() 60 | 61 | def clear_queues(self): 62 | self.clear(self.data_queue) 63 | self.clear(self.act_queue) 64 | 65 | def clear(self, queue): 66 | while not queue.empty(): 67 | queue.get() 68 | 69 | def wait_and_put(self, queue, item): 70 | queue.put(item) 71 | 72 | def wait_and_get(self, queue): 73 | return queue.get() 74 | 75 | def recognizer_process(self, **kargcs): 76 | while True: 77 | (nums, data) = self.wait_and_get(self.data_queue) 78 | len_data = len(data) 79 | # print(len_data) 80 | _xyz_data = [] 81 | # action_name = [] 82 | for _t in data: 83 | _xyz_data.append(_t[1].tolist()) 84 | _q = np.asarray(_xyz_data) 85 | action_pred = self.act_recognizer.predict(_q, batch_size=len_data) 86 | # print("pred: ",action_pred) 87 | action_name = operation.get_action_name(label=action_pred)#需要测试多人的情况 88 | # action_name.append(operation.get_action_name(label=action_pred)) 89 | # print("name: ",action_name) 90 | for i, _t in enumerate(data): 91 | _t[1] = action_name[i] 92 | self.wait_and_put(self.act_queue, (nums, data)) 93 | 94 | def put_data(self, nums, data): 95 | self.wait_and_put(self.data_queue, (nums, data)) 96 | 97 | def read(self): 98 | return self.wait_and_get(self.act_queue) -------------------------------------------------------------------------------- /src/core/backbone.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding=utf-8 3 | #================================================================ 4 | # Copyright (C) 2019 * Ltd. All rights reserved. 5 | # 6 | # Editor : VIM 7 | # File name : backbone.py 8 | # Author : YunYang1994 9 | # Created date: 2019-07-11 23:37:51 10 | # Description : 11 | # 12 | #================================================================ 13 | 14 | import tensorflow as tf 15 | import core.common as common 16 | 17 | 18 | def darknet53(input_data): 19 | 20 | input_data = common.convolutional(input_data, (3, 3, 3, 32)) 21 | input_data = common.convolutional(input_data, (3, 3, 32, 64), downsample=True) 22 | 23 | for i in range(1): 24 | input_data = common.residual_block(input_data, 64, 32, 64) 25 | 26 | input_data = common.convolutional(input_data, (3, 3, 64, 128), downsample=True) 27 | 28 | for i in range(2): 29 | input_data = common.residual_block(input_data, 128, 64, 128) 30 | 31 | input_data = common.convolutional(input_data, (3, 3, 128, 256), downsample=True) 32 | 33 | for i in range(8): 34 | input_data = common.residual_block(input_data, 256, 128, 256) 35 | 36 | route_1 = input_data 37 | input_data = common.convolutional(input_data, (3, 3, 256, 512), downsample=True) 38 | 39 | for i in range(8): 40 | input_data = common.residual_block(input_data, 512, 256, 512) 41 | 42 | route_2 = input_data 43 | input_data = common.convolutional(input_data, (3, 3, 512, 1024), downsample=True) 44 | 45 | for i in range(4): 46 | input_data = common.residual_block(input_data, 1024, 512, 1024) 47 | 48 | return route_1, route_2, input_data 49 | 50 | 51 | -------------------------------------------------------------------------------- /src/core/camera.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import random 3 | import colorsys 4 | import numpy as np 5 | import copy 6 | from core.config import cfg 7 | import pyrealsense2 as rs 8 | import json 9 | 10 | def wait4cam_init(rspipeline, WaitTimes): 11 | for i in range(WaitTimes): 12 | tmp_frame = rspipeline.wait_for_frames() 13 | 14 | def find_device_that_supports_advanced_mode(): 15 | DS5_product_ids = ["0AD1", "0AD2", "0AD3", "0AD4", "0AD5", "0AF6", "0AFE", "0AFF", "0B00", "0B01", "0B03", "0B07","0B3A"] 16 | ctx = rs.context() 17 | ds5_dev = rs.device() 18 | devices = ctx.query_devices() 19 | for dev in devices: 20 | if dev.supports(rs.camera_info.product_id) and str(dev.get_info(rs.camera_info.product_id)) in DS5_product_ids: 21 | if dev.supports(rs.camera_info.name): 22 | print("Found device that supports advanced mode:", dev.get_info(rs.camera_info.name)) 23 | return dev 24 | raise Exception("No device that supports advanced mode was found") 25 | 26 | def try_to_advanced_mode(json_file_path): 27 | dev = find_device_that_supports_advanced_mode() 28 | advnc_mode = rs.rs400_advanced_mode(dev) 29 | print("Advanced mode is", "enabled" if advnc_mode.is_enabled() else "disabled") 30 | 31 | # Loop until we successfully enable advanced mode 32 | while not advnc_mode.is_enabled(): 33 | print("Trying to enable advanced mode...") 34 | advnc_mode.toggle_advanced_mode(True) 35 | # At this point the device will disconnect and re-connect. 36 | print("Sleeping for 5 seconds...") 37 | time.sleep(5) 38 | # The 'dev' object will become invalid and we need to initialize it again 39 | dev = find_device_that_supports_advanced_mode() 40 | advnc_mode = rs.rs400_advanced_mode(dev) 41 | print("Advanced mode is", "enabled" if advnc_mode.is_enabled() else "disabled") 42 | 43 | # Load json setting file 44 | jsonload = json.load(open(json_file_path)) 45 | json_string = str(jsonload).replace("'", '\"') 46 | advnc_mode.load_json(json_string) 47 | return dev, advnc_mode 48 | 49 | def get_depth_distance(event,x,y,flags,param): 50 | if event == cv2.EVENT_MOUSEMOVE: 51 | # print the distance of depth for pixel of mouse 52 | print("point: " + str(x) + ", " + str(y)) 53 | print("pixel: ", aligned_depth_8bit[y, x]) 54 | print("dis: " + str(aligned_depth_frame.get_distance(x, y))) -------------------------------------------------------------------------------- /src/core/common.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding=utf-8 3 | #================================================================ 4 | # Copyright (C) 2019 * Ltd. All rights reserved. 5 | # 6 | # Editor : VIM 7 | # File name : common.py 8 | # Author : YunYang1994 9 | # Created date: 2019-07-11 23:12:53 10 | # Description : 11 | # 12 | #================================================================ 13 | 14 | import tensorflow as tf 15 | 16 | class BatchNormalization(tf.keras.layers.BatchNormalization): 17 | """ 18 | "Frozen state" and "inference mode" are two separate concepts. 19 | `layer.trainable = False` is to freeze the layer, so the layer will use 20 | stored moving `var` and `mean` in the "inference mode", and both `gama` 21 | and `beta` will not be updated ! 22 | """ 23 | def call(self, x, training=False): 24 | #if not training: 25 | if training is not None: 26 | training = tf.constant(False) 27 | training = tf.logical_and(training, self.trainable) 28 | return super().call(x, training) 29 | 30 | def convolutional(input_layer, filters_shape, downsample=False, activate=True, bn=True): 31 | if downsample: 32 | input_layer = tf.keras.layers.ZeroPadding2D(((1, 0), (1, 0)))(input_layer) 33 | padding = 'valid' 34 | strides = 2 35 | else: 36 | strides = 1 37 | padding = 'same' 38 | 39 | conv = tf.keras.layers.Conv2D(filters=filters_shape[-1], kernel_size = filters_shape[0], strides=strides, padding=padding, 40 | use_bias=not bn, kernel_regularizer=tf.keras.regularizers.l2(0.0005), 41 | kernel_initializer=tf.random_normal_initializer(stddev=0.01), 42 | bias_initializer=tf.constant_initializer(0.))(input_layer) 43 | 44 | if bn: conv = BatchNormalization()(conv) 45 | if activate == True: conv = tf.nn.leaky_relu(conv, alpha=0.1) 46 | 47 | return conv 48 | 49 | def residual_block(input_layer, input_channel, filter_num1, filter_num2): 50 | short_cut = input_layer 51 | conv = convolutional(input_layer, filters_shape=(1, 1, input_channel, filter_num1)) 52 | conv = convolutional(conv , filters_shape=(3, 3, filter_num1, filter_num2)) 53 | 54 | residual_output = short_cut + conv 55 | return residual_output 56 | 57 | def upsample(input_layer): 58 | return tf.image.resize(input_layer, (input_layer.shape[1] * 2, input_layer.shape[2] * 2), method='nearest') 59 | 60 | -------------------------------------------------------------------------------- /src/core/config.py: -------------------------------------------------------------------------------- 1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python 2 | # coding=utf-8 3 | #================================================================ 4 | # Copyright (C) 2019 * Ltd. All rights reserved. 5 | # 6 | # Editor : VIM 7 | # File name : config.py 8 | # Author : YunYang1994 9 | # Created date: 2019-02-28 13:06:54 10 | # Description : 11 | # 12 | #================================================================ 13 | 14 | from easydict import EasyDict as edict 15 | 16 | __C = edict() 17 | # Consumers can get config by: from config import cfg 18 | 19 | cfg = __C 20 | 21 | # YOLO options 22 | __C.YOLO = edict() 23 | 24 | # Set the class name 25 | __C.YOLO.CLASSES = '/home/dongjai/catkin_ws/src/act_recognizer/src/data/classes/coco.names' 26 | #__C.YOLO.CLASSES = "./data/classes/yymnist.names" 27 | __C.YOLO.ANCHORS = '/home/dongjai/catkin_ws/src/act_recognizer/src/data/anchors/basline_anchors.txt' 28 | __C.YOLO.STRIDES = [8, 16, 32] 29 | __C.YOLO.ANCHOR_PER_SCALE = 3 30 | __C.YOLO.IOU_LOSS_THRESH = 0.5 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /src/core/dataset.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding=utf-8 3 | #================================================================ 4 | # Copyright (C) 2019 * Ltd. All rights reserved. 5 | # 6 | # Editor : VIM 7 | # File name : dataset.py 8 | # Author : YunYang1994 9 | # Created date: 2019-03-15 18:05:03 10 | # Description : 11 | # 12 | #================================================================ 13 | 14 | import os 15 | import cv2 16 | import random 17 | import numpy as np 18 | import tensorflow as tf 19 | import core.utils as utils 20 | from core.config import cfg 21 | 22 | 23 | 24 | class Dataset(object): 25 | """implement Dataset here""" 26 | def __init__(self, dataset_type): 27 | self.annot_path = cfg.TRAIN.ANNOT_PATH if dataset_type == 'train' else cfg.TEST.ANNOT_PATH 28 | self.input_sizes = cfg.TRAIN.INPUT_SIZE if dataset_type == 'train' else cfg.TEST.INPUT_SIZE 29 | self.batch_size = cfg.TRAIN.BATCH_SIZE if dataset_type == 'train' else cfg.TEST.BATCH_SIZE 30 | self.data_aug = cfg.TRAIN.DATA_AUG if dataset_type == 'train' else cfg.TEST.DATA_AUG 31 | 32 | self.train_input_sizes = cfg.TRAIN.INPUT_SIZE 33 | self.strides = np.array(cfg.YOLO.STRIDES) 34 | self.classes = utils.read_class_names(cfg.YOLO.CLASSES) 35 | self.num_classes = len(self.classes) 36 | self.anchors = np.array(utils.get_anchors(cfg.YOLO.ANCHORS)) 37 | self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE 38 | self.max_bbox_per_scale = 150 39 | 40 | self.annotations = self.load_annotations(dataset_type) 41 | self.num_samples = len(self.annotations) 42 | self.num_batchs = int(np.ceil(self.num_samples / self.batch_size)) 43 | self.batch_count = 0 44 | 45 | 46 | def load_annotations(self, dataset_type): 47 | with open(self.annot_path, 'r') as f: 48 | txt = f.readlines() 49 | annotations = [line.strip() for line in txt if len(line.strip().split()[1:]) != 0] 50 | np.random.shuffle(annotations) 51 | return annotations 52 | 53 | def __iter__(self): 54 | return self 55 | 56 | def __next__(self): 57 | 58 | with tf.device('/cpu:0'): 59 | self.train_input_size = random.choice(self.train_input_sizes) 60 | self.train_output_sizes = self.train_input_size // self.strides 61 | 62 | batch_image = np.zeros((self.batch_size, self.train_input_size, self.train_input_size, 3), dtype=np.float32) 63 | 64 | batch_label_sbbox = np.zeros((self.batch_size, self.train_output_sizes[0], self.train_output_sizes[0], 65 | self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32) 66 | batch_label_mbbox = np.zeros((self.batch_size, self.train_output_sizes[1], self.train_output_sizes[1], 67 | self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32) 68 | batch_label_lbbox = np.zeros((self.batch_size, self.train_output_sizes[2], self.train_output_sizes[2], 69 | self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32) 70 | 71 | batch_sbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32) 72 | batch_mbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32) 73 | batch_lbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32) 74 | 75 | num = 0 76 | if self.batch_count < self.num_batchs: 77 | while num < self.batch_size: 78 | index = self.batch_count * self.batch_size + num 79 | if index >= self.num_samples: index -= self.num_samples 80 | annotation = self.annotations[index] 81 | image, bboxes = self.parse_annotation(annotation) 82 | label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes = self.preprocess_true_boxes(bboxes) 83 | 84 | batch_image[num, :, :, :] = image 85 | batch_label_sbbox[num, :, :, :, :] = label_sbbox 86 | batch_label_mbbox[num, :, :, :, :] = label_mbbox 87 | batch_label_lbbox[num, :, :, :, :] = label_lbbox 88 | batch_sbboxes[num, :, :] = sbboxes 89 | batch_mbboxes[num, :, :] = mbboxes 90 | batch_lbboxes[num, :, :] = lbboxes 91 | num += 1 92 | self.batch_count += 1 93 | batch_smaller_target = batch_label_sbbox, batch_sbboxes 94 | batch_medium_target = batch_label_mbbox, batch_mbboxes 95 | batch_larger_target = batch_label_lbbox, batch_lbboxes 96 | 97 | return batch_image, (batch_smaller_target, batch_medium_target, batch_larger_target) 98 | else: 99 | self.batch_count = 0 100 | np.random.shuffle(self.annotations) 101 | raise StopIteration 102 | 103 | def random_horizontal_flip(self, image, bboxes): 104 | 105 | if random.random() < 0.5: 106 | _, w, _ = image.shape 107 | image = image[:, ::-1, :] 108 | bboxes[:, [0,2]] = w - bboxes[:, [2,0]] 109 | 110 | return image, bboxes 111 | 112 | def random_crop(self, image, bboxes): 113 | 114 | if random.random() < 0.5: 115 | h, w, _ = image.shape 116 | max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1) 117 | 118 | max_l_trans = max_bbox[0] 119 | max_u_trans = max_bbox[1] 120 | max_r_trans = w - max_bbox[2] 121 | max_d_trans = h - max_bbox[3] 122 | 123 | crop_xmin = max(0, int(max_bbox[0] - random.uniform(0, max_l_trans))) 124 | crop_ymin = max(0, int(max_bbox[1] - random.uniform(0, max_u_trans))) 125 | crop_xmax = max(w, int(max_bbox[2] + random.uniform(0, max_r_trans))) 126 | crop_ymax = max(h, int(max_bbox[3] + random.uniform(0, max_d_trans))) 127 | 128 | image = image[crop_ymin : crop_ymax, crop_xmin : crop_xmax] 129 | 130 | bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - crop_xmin 131 | bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - crop_ymin 132 | 133 | return image, bboxes 134 | 135 | def random_translate(self, image, bboxes): 136 | 137 | if random.random() < 0.5: 138 | h, w, _ = image.shape 139 | max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1) 140 | 141 | max_l_trans = max_bbox[0] 142 | max_u_trans = max_bbox[1] 143 | max_r_trans = w - max_bbox[2] 144 | max_d_trans = h - max_bbox[3] 145 | 146 | tx = random.uniform(-(max_l_trans - 1), (max_r_trans - 1)) 147 | ty = random.uniform(-(max_u_trans - 1), (max_d_trans - 1)) 148 | 149 | M = np.array([[1, 0, tx], [0, 1, ty]]) 150 | image = cv2.warpAffine(image, M, (w, h)) 151 | 152 | bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + tx 153 | bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + ty 154 | 155 | return image, bboxes 156 | 157 | def parse_annotation(self, annotation): 158 | 159 | line = annotation.split() 160 | image_path = line[0] 161 | if not os.path.exists(image_path): 162 | raise KeyError("%s does not exist ... " %image_path) 163 | image = cv2.imread(image_path) 164 | bboxes = np.array([list(map(int, box.split(','))) for box in line[1:]]) 165 | 166 | if self.data_aug: 167 | image, bboxes = self.random_horizontal_flip(np.copy(image), np.copy(bboxes)) 168 | image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes)) 169 | image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes)) 170 | 171 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 172 | image, bboxes = utils.image_preporcess(np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes)) 173 | return image, bboxes 174 | 175 | def bbox_iou(self, boxes1, boxes2): 176 | 177 | boxes1 = np.array(boxes1) 178 | boxes2 = np.array(boxes2) 179 | 180 | boxes1_area = boxes1[..., 2] * boxes1[..., 3] 181 | boxes2_area = boxes2[..., 2] * boxes2[..., 3] 182 | 183 | boxes1 = np.concatenate([boxes1[..., :2] - boxes1[..., 2:] * 0.5, 184 | boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1) 185 | boxes2 = np.concatenate([boxes2[..., :2] - boxes2[..., 2:] * 0.5, 186 | boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1) 187 | 188 | left_up = np.maximum(boxes1[..., :2], boxes2[..., :2]) 189 | right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:]) 190 | 191 | inter_section = np.maximum(right_down - left_up, 0.0) 192 | inter_area = inter_section[..., 0] * inter_section[..., 1] 193 | union_area = boxes1_area + boxes2_area - inter_area 194 | 195 | return inter_area / union_area 196 | 197 | def preprocess_true_boxes(self, bboxes): 198 | 199 | label = [np.zeros((self.train_output_sizes[i], self.train_output_sizes[i], self.anchor_per_scale, 200 | 5 + self.num_classes)) for i in range(3)] 201 | bboxes_xywh = [np.zeros((self.max_bbox_per_scale, 4)) for _ in range(3)] 202 | bbox_count = np.zeros((3,)) 203 | 204 | for bbox in bboxes: 205 | bbox_coor = bbox[:4] 206 | bbox_class_ind = bbox[4] 207 | 208 | onehot = np.zeros(self.num_classes, dtype=np.float) 209 | onehot[bbox_class_ind] = 1.0 210 | uniform_distribution = np.full(self.num_classes, 1.0 / self.num_classes) 211 | deta = 0.01 212 | smooth_onehot = onehot * (1 - deta) + deta * uniform_distribution 213 | 214 | bbox_xywh = np.concatenate([(bbox_coor[2:] + bbox_coor[:2]) * 0.5, bbox_coor[2:] - bbox_coor[:2]], axis=-1) 215 | bbox_xywh_scaled = 1.0 * bbox_xywh[np.newaxis, :] / self.strides[:, np.newaxis] 216 | 217 | iou = [] 218 | exist_positive = False 219 | for i in range(3): 220 | anchors_xywh = np.zeros((self.anchor_per_scale, 4)) 221 | anchors_xywh[:, 0:2] = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5 222 | anchors_xywh[:, 2:4] = self.anchors[i] 223 | 224 | iou_scale = self.bbox_iou(bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh) 225 | iou.append(iou_scale) 226 | iou_mask = iou_scale > 0.3 227 | 228 | if np.any(iou_mask): 229 | xind, yind = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) 230 | 231 | label[i][yind, xind, iou_mask, :] = 0 232 | label[i][yind, xind, iou_mask, 0:4] = bbox_xywh 233 | label[i][yind, xind, iou_mask, 4:5] = 1.0 234 | label[i][yind, xind, iou_mask, 5:] = smooth_onehot 235 | 236 | bbox_ind = int(bbox_count[i] % self.max_bbox_per_scale) 237 | bboxes_xywh[i][bbox_ind, :4] = bbox_xywh 238 | bbox_count[i] += 1 239 | 240 | exist_positive = True 241 | 242 | if not exist_positive: 243 | best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1) 244 | best_detect = int(best_anchor_ind / self.anchor_per_scale) 245 | best_anchor = int(best_anchor_ind % self.anchor_per_scale) 246 | xind, yind = np.floor(bbox_xywh_scaled[best_detect, 0:2]).astype(np.int32) 247 | 248 | label[best_detect][yind, xind, best_anchor, :] = 0 249 | label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh 250 | label[best_detect][yind, xind, best_anchor, 4:5] = 1.0 251 | label[best_detect][yind, xind, best_anchor, 5:] = smooth_onehot 252 | 253 | bbox_ind = int(bbox_count[best_detect] % self.max_bbox_per_scale) 254 | bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh 255 | bbox_count[best_detect] += 1 256 | label_sbbox, label_mbbox, label_lbbox = label 257 | sbboxes, mbboxes, lbboxes = bboxes_xywh 258 | return label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes 259 | 260 | def __len__(self): 261 | return self.num_batchs 262 | 263 | 264 | 265 | 266 | -------------------------------------------------------------------------------- /src/core/operation.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import copy 4 | import tensorflow as tf 5 | 6 | def list_sortby(bboxes_one, index, order): 7 | ''' 8 | sort list. list likes [ [1,2,3,4], [5,6,7,8], [9,1,2,3]]. 9 | index refers to the inner [] number index. 10 | index=4 means it will sort this list according 4 8 3 11 | ranked by descending order=0, increasing order=1 12 | ''' 13 | if order == 0: 14 | tmp_len = len(bboxes_one) 15 | for i in range(tmp_len):#排序 16 | for j in range(tmp_len): 17 | tmp_t = bboxes_one[i][index] 18 | tmp_k = bboxes_one[j][index] 19 | if tmp_t > tmp_k: 20 | tmp = bboxes_one[i] 21 | bboxes_one[i] = bboxes_one[j] 22 | bboxes_one[j] = tmp 23 | if order == 1: 24 | tmp_len = len(bboxes_one) 25 | for i in range(tmp_len):#排序 26 | for j in range(tmp_len): 27 | tmp_t = bboxes_one[i][index] 28 | tmp_k = bboxes_one[j][index] 29 | if tmp_t < tmp_k: 30 | tmp = bboxes_one[i] 31 | bboxes_one[i] = bboxes_one[j] 32 | bboxes_one[j] = tmp 33 | return bboxes_one 34 | 35 | def detect_only_one(bboxes, IDi): 36 | ''' 37 | Select one specific class to output according the class ID. 38 | ''' 39 | bboxes_one = [] 40 | # bboxes_one_out = [] 41 | bboxes_num = len(bboxes) 42 | if bboxes_num > 0: 43 | for j in range(bboxes_num): 44 | if int(bboxes[j][5]) == IDi:#只过滤等于ID的标签,person==0, car==2 45 | bboxes_one.append(bboxes[j]) 46 | return bboxes_one 47 | 48 | def pick_top_n(bboxes, top_n): 49 | ''' 50 | It will select top n elements in bboxes(a list) as output.\\ 51 | If the list of bboxes was sorted by increasing order, it will select lower elements. 52 | ''' 53 | tmp_len = len(bboxes) 54 | if tmp_len < top_n : 55 | print("only have : ", tmp_len) 56 | top_n = tmp_len 57 | return bboxes[:top_n] 58 | 59 | 60 | def select_and_pick(bboxes, index, order, top_n, gap=False): 61 | ''' 62 | sorted according to index, 63 | and ranked by descending order=0, increasing order=1 64 | then select top_n elements as output 65 | ''' 66 | tmp = pick_top_n(list_sortby(bboxes, index, order), top_n) 67 | if gap: 68 | tmp_out = [] 69 | for i in range(len(tmp)): 70 | t = tmp[i][index] 71 | # print("prob", t) 72 | if t >= gap[0] and t < gap[1]: 73 | tmp_out.append(tmp[i]) 74 | else: 75 | tmp_out = tmp.copy() 76 | return tmp_out 77 | 78 | def select_in_gap(bboxes, gap, alpha, live=False): 79 | ''' 80 | If depth frame is 16bit frame for each pixel, alpha should be equal to 0. 81 | If it is 8bit frame, alpha = 0.3 82 | ''' 83 | i = len(bboxes) 84 | t = 0 85 | while t != i : 86 | if live: 87 | dist_16bit = bboxes[t][5] * 1000 88 | else: 89 | if alpha == 0: 90 | dist_16bit = bboxes[t][5] 91 | else: 92 | dist_16bit = bboxes[t][5] / alpha 93 | if dist_16bit >= float(gap[0]) and dist_16bit <= float(gap[1]): 94 | t = t + 1 95 | else: 96 | del bboxes[t] 97 | i = i - 1 98 | return bboxes 99 | 100 | 101 | def cut_bbox(image, bboxes, ext_pix_x, ext_pix_y): 102 | """ 103 | bboxes: [x_min, y_min, x_max, y_max, probability, depth] format coordinates. 104 | OUTPUT : 105 | left_top_list is a list that containing each single img left top point x,y values for frame(original size image), 106 | img_set is in BGR style 107 | """ 108 | # img_blank = np.zeros(shape=image.shape, dtype=image.dtype) 109 | img_set = [] 110 | left_top_list = [] 111 | 112 | for i, bbox in enumerate(bboxes): 113 | coor = np.array(bbox[:4], dtype=np.int32) 114 | if (coor[0] - ext_pix_x) < 0 : 115 | x1 = 0 116 | else: 117 | x1 = coor[0] - ext_pix_x 118 | if (coor[1] - ext_pix_y) < 0 : 119 | y1 = 0 120 | else: 121 | y1 = coor[1] - ext_pix_y 122 | if (coor[2] + ext_pix_x) > image.shape[1]: 123 | x2 = image.shape[1] 124 | else: 125 | x2 = coor[2] + ext_pix_x 126 | if (coor[3] + ext_pix_y) > image.shape[0]: 127 | y2 = image.shape[0] 128 | else: 129 | y2 = coor[3] + ext_pix_y 130 | c1, c2 = (x1, y1), (x2, y2) # (x, y) 131 | img_one_blank = np.zeros(shape=(y2 - y1, x2 - x1, 3), dtype=image.dtype) 132 | img_one_blank[:, :, 0] = image[y1:y2, x1:x2, 2] #image->RGB img_one_blank->BGR 133 | img_one_blank[:, :, 1] = image[y1:y2, x1:x2, 1] 134 | img_one_blank[:, :, 2] = image[y1:y2, x1:x2, 0] 135 | img_set.append(img_one_blank) 136 | left_top_list.append([x1, y1]) 137 | # cv2.namedWindow("cut_result", cv2.WINDOW_AUTOSIZE) 138 | # cut_result = cv2.cvtColor(img_one_blank, cv2.COLOR_RGB2BGR) 139 | # cv2.imshow("cut_result", cut_result) 140 | # img_set.append(img_blank)#返回的图像和原图frame尺寸一致,只有bbox内的图像,但各自不是独立的 141 | return img_set, left_top_list 142 | 143 | def get_centerpoints_xyz(bboxes, depth_frame, live=False, live_frame=None): 144 | ''' 145 | bboxes [x1, y1, x2, y2, prob, cls] 146 | Change the cls into depth value 147 | ''' 148 | bboxes_dp = copy.deepcopy(bboxes) 149 | for i in range(len(bboxes_dp)): 150 | x1 = bboxes_dp[i][0] 151 | y1 = bboxes_dp[i][1] 152 | x2 = bboxes_dp[i][2] 153 | y2 = bboxes_dp[i][3] 154 | c1 = int(( x2 - x1 ) / 2) 155 | c2 = int(( y2 - y1 ) / 2) 156 | if live: 157 | dp_val = live_frame.get_distance(c1, c2) 158 | else: 159 | dp_val = depth_frame[c2, c1]#depth_frame[c2, c1] 160 | bboxes_dp[i][-1] = dp_val 161 | return bboxes_dp 162 | 163 | def is_get_keypoints(datum): 164 | ''' 165 | Judge the keypoints set is empty or not. 166 | ''' 167 | try: 168 | a = len(datum.poseKeypoints) 169 | # print("len keypoinst", a) 170 | except: 171 | return False 172 | return True 173 | 174 | def get_keypoints_xy(datum, kp_num): 175 | ''' 176 | Get keypoint (x, y) and put them into a list as return. 177 | ''' 178 | kps_list = [] 179 | for j in datum.poseKeypoints: 180 | kp_list = [] 181 | for i in kp_num: 182 | x = j[i][0] 183 | y = j[i][1] 184 | kp_list.append([x, y]) #[x, y] 185 | kps_list.append(kp_list) 186 | return kps_list 187 | 188 | def get_keypoints_xyz(kp_list, depth_frame=None, nn_pix=3, live=False, live_frame=None): #dp_frame.shape = (y, x) 单通道, nn_num 中心点近邻像素数量 189 | ''' 190 | nn_pix is neighbor pixel of the central pixel\\ 191 | live is a symbol of livecam or not\\ 192 | live_frame is depth frame flow grasp from camera 193 | ''' 194 | # testkp_x, testkp_y = [326, 280, 365, 302, 343], [127, 269, 264, 335, 335] 195 | dp_nparray = np.ndarray(shape=(len(kp_list), nn_pix, nn_pix, 3)) #这里的3存的是一个点的xyz信息 196 | for i in range(len(kp_list)): 197 | kp_x, kp_y = kp_list[i][0], kp_list[i][1] 198 | gap = int((nn_pix - 1)/2) 199 | kp_lt_x, kp_lt_y = int(kp_x - gap), int(kp_y - gap) #left top cornor point 200 | #纠正方格块,如果方格块有部分已经超出图像边缘,则把左上角坐标移动回到图像边缘 201 | if kp_lt_x < 0: 202 | kp_lt_x = 0 203 | if kp_lt_y < 0: 204 | kp_lt_y = 0 205 | if live: 206 | dp_frame_x = live_frame.get_width() 207 | dp_frame_y = live_frame.get_height() 208 | else: 209 | dp_frame_x = depth_frame.shape[1] 210 | dp_frame_y = depth_frame.shape[0] 211 | #纠正方格块,如果方格块有部分已经超出图像边缘,防止方块右边超出边缘 212 | kp_most_x = kp_lt_x + (nn_pix - 1) 213 | if kp_most_x >= dp_frame_x: 214 | x_diff = kp_most_x - dp_frame_x 215 | kp_lt_x = kp_lt_x - x_diff - 1 216 | kp_most_y = kp_lt_y + (nn_pix - 1) 217 | if kp_most_y >= dp_frame_y: 218 | y_diff = kp_most_y - dp_frame_y 219 | kp_lt_y = kp_lt_y - y_diff - 1 220 | 221 | for t in range(nn_pix): 222 | for p in range(nn_pix): 223 | # if kp_lt_y + t < 0 or kp_lt_y + t >= depth_frame.shape[0] or kp_lt_x + p < 0 or kp_lt_x + p >= depth_frame.shape[1]: #判断加上nn pix的时候是否会超出范围,超出范围的取0 224 | # dp_nparray[i][t][p] = [kp_lt_x + p, kp_lt_y + t, 0] 225 | # else: 226 | if live: 227 | dp_frame_val = live_frame.get_distance(kp_lt_x + p, kp_lt_y + t) 228 | else: 229 | dp_frame_val = depth_frame[kp_lt_y + t, kp_lt_x + p] 230 | dp_nparray[i][t][p] = [kp_lt_x + p, kp_lt_y + t, dp_frame_val] 231 | # kp_lt_y = kp_lt_y + 1 232 | return dp_nparray 233 | 234 | def filter_kp_list(kp_list, norm_p_index=0): 235 | if kp_list[norm_p_index][0] == 0 or kp_list[norm_p_index][1] == 0: 236 | return True 237 | return False 238 | 239 | def get_mean_dep_val_xyz(kp_list, dp_nparray): 240 | dp_list = [] 241 | for i in range(len(dp_nparray)): 242 | len_size = len(dp_nparray[i][0]) 243 | dp_val = 0 244 | # print("len_size:",len_size) 245 | for j in range(len_size): 246 | for t in range(len_size): 247 | # print("nparray",dp_nparray[i][j][t]) 248 | dp_val = dp_val + dp_nparray[i][j][t][-1] 249 | dp_mean = dp_val / (len_size**2) 250 | dp_list.append([kp_list[i][0], kp_list[i][1], dp_mean]) 251 | # print("mean",[kp_list[i][0], kp_list[i][1], dp_mean]) 252 | return dp_list 253 | 254 | def norm_keypoints_xyz(dp_list, norm_p_index, rgb_size): 255 | ''' 256 | (x, y) is used to normalizate. 257 | OUTPUT will be reshape as (3,5) 258 | ''' 259 | x_max, y_max = rgb_size[0], rgb_size[1] 260 | dp_nparray = np.asarray(dp_list) 261 | dp_rs_array = dp_nparray.T 262 | z_max = dp_rs_array[2].max() 263 | z_min = dp_rs_array[2].min() 264 | dp_rs_array[0] = dp_rs_array[0] / x_max#最大最小归一化。x坐标/图像长度width,最小位置为0,直接省略掉了 265 | dp_rs_array[1] = dp_rs_array[1] / y_max 266 | dp_rs_array[2] = (dp_rs_array[2] - z_min) / (z_max - z_min) 267 | # 距离摄像机远近不同,切割出来的人像大小会有所不同,但是五个骨骼点的深度差是不变的,骨骼点深度差只与人的动作有关(在realsense返回的深度值准确的前提下) 268 | # z 深度是需要归一化的,需要变成 [0, 1] ,这样的情况下才能去掉量纲,和另外两个维度可以一起在网络中训练 269 | zero_diff = [[dp_rs_array[0][norm_p_index]], [dp_rs_array[1][norm_p_index]], [dp_rs_array[2][norm_p_index]]] 270 | # print("zero diff : ",zero_diff) 271 | dp_array = np.subtract(dp_rs_array, zero_diff) 272 | # print("dp array :\n ", dp_array) 273 | return dp_array 274 | 275 | def no_negative(array): 276 | min_diff = [[array[0].min()], [array[1].min()], [array[2].min()]] 277 | array_out = array - min_diff 278 | return array_out 279 | 280 | def filter_zero(listin, threadhold): 281 | list_shape = listin.shape 282 | for i in range(list_shape[0]): 283 | for j in range(list_shape[1]): 284 | if listin[i][j] < threadhold: 285 | listin[i][j] = 0 286 | return listin 287 | 288 | def get_action_name(label): 289 | # label_dict = {0: "walk", 1: "stop", 2: "come", 3: "phone", 4: "open", 5: "stand"} 290 | output = [] 291 | for _y in label: 292 | y_list = _y.tolist() 293 | label_pred = y_list.index(max(y_list)) 294 | output.append(label_pred) 295 | # print(label_pred) 296 | return output 297 | 298 | def my_mlp(inputs): 299 | x = tf.keras.layers.Flatten()(inputs) 300 | x = tf.keras.layers.Dense(units=20, activation=tf.nn.relu, use_bias=True)(x) 301 | x = tf.keras.layers.Dense(units=35, activation=tf.nn.relu, use_bias=True)(x) 302 | x = tf.keras.layers.Dense(units=15, activation=tf.nn.relu, use_bias=True)(x) 303 | x = tf.keras.layers.Dense(units=6)(x) 304 | # x = tf.keras.layers.Dense(units=20, activation=tf.nn.relu, use_bias=True)(x) 305 | # x = tf.keras.layers.Dense(units=10, activation=tf.nn.relu, use_bias=True)(x) 306 | # x = tf.keras.layers.Dense(units=6)(x) 307 | # x = tf.keras.layers.Dense(units=20, activation=tf.nn.relu, use_bias=True)(x)#mlp3 308 | # x = tf.keras.layers.Dense(units=35, activation=tf.nn.relu, use_bias=True)(x) 309 | # x = tf.keras.layers.Dense(units=15, activation=tf.nn.relu, use_bias=True)(x) 310 | # x = tf.keras.layers.Dense(units=6)(x) 311 | outputs = tf.keras.layers.Softmax()(x) 312 | return outputs 313 | 314 | -------------------------------------------------------------------------------- /src/core/utils.py: -------------------------------------------------------------------------------- 1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python 2 | # coding=utf-8 3 | #================================================================ 4 | # Copyright (C) 2019 * Ltd. All rights reserved. 5 | # 6 | # Editor : VIM 7 | # File name : utils.py 8 | # Author : YunYang1994 9 | # Created date: 2019-07-12 01:33:38 10 | # Description : 11 | # 12 | #================================================================ 13 | 14 | import cv2 15 | import random 16 | import colorsys 17 | import numpy as np 18 | 19 | def load_weights(model, weights_file): 20 | """ 21 | I agree that this code is very ugly, but I don’t know any better way of doing it. 22 | """ 23 | wf = open(weights_file, 'rb') 24 | major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5) 25 | 26 | j = 0 27 | for i in range(75): 28 | conv_layer_name = 'conv2d_%d' %i if i > 0 else 'conv2d' 29 | bn_layer_name = 'batch_normalization_%d' %j if j > 0 else 'batch_normalization' 30 | 31 | conv_layer = model.get_layer(conv_layer_name) 32 | filters = conv_layer.filters 33 | k_size = conv_layer.kernel_size[0] 34 | in_dim = conv_layer.input_shape[-1] 35 | 36 | if i not in [58, 66, 74]: 37 | # darknet weights: [beta, gamma, mean, variance] 38 | bn_weights = np.fromfile(wf, dtype=np.float32, count=4 * filters) 39 | # tf weights: [gamma, beta, mean, variance] 40 | bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]] 41 | bn_layer = model.get_layer(bn_layer_name) 42 | j += 1 43 | else: 44 | conv_bias = np.fromfile(wf, dtype=np.float32, count=filters) 45 | 46 | # darknet shape (out_dim, in_dim, height, width) 47 | conv_shape = (filters, in_dim, k_size, k_size) 48 | conv_weights = np.fromfile(wf, dtype=np.float32, count=np.product(conv_shape)) 49 | # tf shape (height, width, in_dim, out_dim) 50 | conv_weights = conv_weights.reshape(conv_shape).transpose([2, 3, 1, 0]) 51 | 52 | if i not in [58, 66, 74]: 53 | conv_layer.set_weights([conv_weights]) 54 | bn_layer.set_weights(bn_weights) 55 | else: 56 | conv_layer.set_weights([conv_weights, conv_bias]) 57 | 58 | assert len(wf.read()) == 0, 'failed to read all data' 59 | wf.close() 60 | 61 | 62 | def read_class_names(class_file_name): 63 | '''loads class name from a file''' 64 | names = {} 65 | with open(class_file_name, 'r') as data: 66 | for ID, name in enumerate(data): 67 | names[ID] = name.strip('\n') 68 | return names 69 | 70 | 71 | def get_anchors(anchors_path): 72 | '''loads the anchors from a file''' 73 | with open(anchors_path) as f: 74 | anchors = f.readline() 75 | anchors = np.array(anchors.split(','), dtype=np.float32) 76 | return anchors.reshape(3, 3, 2) 77 | 78 | 79 | def image_preporcess(image, target_size, gt_boxes=None): 80 | 81 | ih, iw = target_size 82 | h, w, _ = image.shape 83 | 84 | scale = min(iw/w, ih/h) 85 | nw, nh = int(scale * w), int(scale * h) 86 | image_resized = cv2.resize(image, (nw, nh)) 87 | 88 | image_paded = np.full(shape=[ih, iw, 3], fill_value=128.0) 89 | dw, dh = (iw - nw) // 2, (ih-nh) // 2 90 | image_paded[dh:nh+dh, dw:nw+dw, :] = image_resized 91 | image_paded = image_paded / 255. 92 | 93 | if gt_boxes is None: 94 | return image_paded 95 | 96 | else: 97 | gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * scale + dw 98 | gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh 99 | return image_paded, gt_boxes 100 | 101 | 102 | def draw_bbox(image, bboxes, pred=None, num_classes=6, classes=None, show_label=True): 103 | """ 104 | bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates. 105 | """ 106 | image_h, image_w, _ = image.shape 107 | # num_classes += 1# 多一个类别,因为可能只检测到人或骨骼 108 | hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)] 109 | colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) 110 | colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) 111 | 112 | random.seed(0) 113 | random.shuffle(colors) 114 | random.seed(None) 115 | 116 | if bboxes and pred: 117 | for i, bbox in enumerate(bboxes): 118 | coor = np.array(bbox[:4], dtype=np.int32) 119 | fontScale = 0.5 120 | class_ind = pred[i][1] 121 | bbox_color = colors[class_ind] 122 | bbox_thick = int(0.6 * (image_h + image_w) / 600) 123 | c1, c2 = (coor[0], coor[1]), (coor[2], coor[3]) 124 | cv2.rectangle(image, c1, c2, bbox_color, bbox_thick) 125 | 126 | if show_label: 127 | if pred != None: 128 | bbox_mess = '%s %s' % (classes[class_ind], pred[i][1]) 129 | else: 130 | bbox_mess = '%s' % (classes[class_ind]) 131 | t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick//2)[0] 132 | cv2.rectangle(image, c1, (c1[0] + t_size[0], c1[1] - t_size[1] - 3), bbox_color, -1) # filled 133 | 134 | cv2.putText(image, bbox_mess, (c1[0], c1[1]-2), cv2.FONT_HERSHEY_SIMPLEX, 135 | fontScale, (0, 0, 0), bbox_thick//2, lineType=cv2.LINE_AA) 136 | 137 | return image 138 | 139 | 140 | 141 | def bboxes_iou(boxes1, boxes2): 142 | 143 | boxes1 = np.array(boxes1) 144 | boxes2 = np.array(boxes2) 145 | 146 | boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1]) 147 | boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1]) 148 | 149 | left_up = np.maximum(boxes1[..., :2], boxes2[..., :2]) 150 | right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:]) 151 | 152 | inter_section = np.maximum(right_down - left_up, 0.0) 153 | inter_area = inter_section[..., 0] * inter_section[..., 1] 154 | union_area = boxes1_area + boxes2_area - inter_area 155 | ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps) 156 | 157 | return ious 158 | 159 | 160 | def nms(bboxes, iou_threshold, sigma=0.3, method='nms'): 161 | """ 162 | :param bboxes: (xmin, ymin, xmax, ymax, score, class) 163 | 164 | Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf 165 | https://github.com/bharatsingh430/soft-nms 166 | """ 167 | classes_in_img = list(set(bboxes[:, 5])) 168 | best_bboxes = [] 169 | 170 | for cls in classes_in_img: 171 | cls_mask = (bboxes[:, 5] == cls) 172 | cls_bboxes = bboxes[cls_mask] 173 | 174 | while len(cls_bboxes) > 0: 175 | max_ind = np.argmax(cls_bboxes[:, 4]) 176 | best_bbox = cls_bboxes[max_ind] 177 | best_bboxes.append(best_bbox) 178 | cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]]) 179 | iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4]) 180 | weight = np.ones((len(iou),), dtype=np.float32) 181 | 182 | assert method in ['nms', 'soft-nms'] 183 | 184 | if method == 'nms': 185 | iou_mask = iou > iou_threshold 186 | weight[iou_mask] = 0.0 187 | 188 | if method == 'soft-nms': 189 | weight = np.exp(-(1.0 * iou ** 2 / sigma)) 190 | 191 | cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight 192 | score_mask = cls_bboxes[:, 4] > 0. 193 | cls_bboxes = cls_bboxes[score_mask] 194 | 195 | return best_bboxes 196 | 197 | 198 | def postprocess_boxes(pred_bbox, org_img_shape, input_size, score_threshold): 199 | 200 | valid_scale=[0, np.inf] 201 | pred_bbox = np.array(pred_bbox) 202 | 203 | pred_xywh = pred_bbox[:, 0:4] 204 | pred_conf = pred_bbox[:, 4] 205 | pred_prob = pred_bbox[:, 5:] 206 | 207 | # # (1) (x, y, w, h) --> (xmin, ymin, xmax, ymax) 208 | pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5, 209 | pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1) 210 | # # (2) (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org) 211 | org_h, org_w = org_img_shape 212 | resize_ratio = min(input_size / org_w, input_size / org_h) 213 | 214 | dw = (input_size - resize_ratio * org_w) / 2 215 | dh = (input_size - resize_ratio * org_h) / 2 216 | 217 | pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio 218 | pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio 219 | 220 | # # (3) clip some boxes those are out of range 221 | pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]), 222 | np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1) 223 | invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3])) 224 | pred_coor[invalid_mask] = 0 225 | 226 | # # (4) discard some invalid boxes 227 | bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1)) 228 | scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1])) 229 | 230 | # # (5) discard some boxes with low scores 231 | classes = np.argmax(pred_prob, axis=-1) 232 | scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes] 233 | score_mask = scores > score_threshold 234 | mask = np.logical_and(scale_mask, score_mask) 235 | coors, scores, classes = pred_coor[mask], scores[mask], classes[mask] 236 | 237 | return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1) 238 | 239 | 240 | 241 | 242 | -------------------------------------------------------------------------------- /src/core/yolov3.py: -------------------------------------------------------------------------------- 1 | #!/home/dongjai/anaconda3/envs/tensorflow2/bin/python 2 | # coding=utf-8 3 | #================================================================ 4 | # Copyright (C) 2019 * Ltd. All rights reserved. 5 | # 6 | # Editor : VIM 7 | # File name : yolov3.py 8 | # Author : YunYang1994 9 | # Created date: 2019-07-12 13:47:10 10 | # Description : 11 | # 12 | #================================================================ 13 | import sys 14 | import numpy as np 15 | import tensorflow as tf 16 | import core.utils as utils 17 | import core.common as common 18 | import core.backbone as backbone 19 | from core.config import cfg 20 | 21 | 22 | NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) 23 | ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS) 24 | STRIDES = np.array(cfg.YOLO.STRIDES) 25 | IOU_LOSS_THRESH = cfg.YOLO.IOU_LOSS_THRESH 26 | 27 | def YOLOv3(input_layer): 28 | route_1, route_2, conv = backbone.darknet53(input_layer) 29 | 30 | conv = common.convolutional(conv, (1, 1, 1024, 512)) 31 | conv = common.convolutional(conv, (3, 3, 512, 1024)) 32 | conv = common.convolutional(conv, (1, 1, 1024, 512)) 33 | conv = common.convolutional(conv, (3, 3, 512, 1024)) 34 | conv = common.convolutional(conv, (1, 1, 1024, 512)) 35 | 36 | conv_lobj_branch = common.convolutional(conv, (3, 3, 512, 1024)) 37 | conv_lbbox = common.convolutional(conv_lobj_branch, (1, 1, 1024, 3*(NUM_CLASS + 5)), activate=False, bn=False) 38 | 39 | conv = common.convolutional(conv, (1, 1, 512, 256)) 40 | conv = common.upsample(conv) 41 | 42 | conv = tf.concat([conv, route_2], axis=-1) 43 | 44 | conv = common.convolutional(conv, (1, 1, 768, 256)) 45 | conv = common.convolutional(conv, (3, 3, 256, 512)) 46 | conv = common.convolutional(conv, (1, 1, 512, 256)) 47 | conv = common.convolutional(conv, (3, 3, 256, 512)) 48 | conv = common.convolutional(conv, (1, 1, 512, 256)) 49 | 50 | conv_mobj_branch = common.convolutional(conv, (3, 3, 256, 512)) 51 | conv_mbbox = common.convolutional(conv_mobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False) 52 | 53 | conv = common.convolutional(conv, (1, 1, 256, 128)) 54 | conv = common.upsample(conv) 55 | 56 | conv = tf.concat([conv, route_1], axis=-1) 57 | 58 | conv = common.convolutional(conv, (1, 1, 384, 128)) 59 | conv = common.convolutional(conv, (3, 3, 128, 256)) 60 | conv = common.convolutional(conv, (1, 1, 256, 128)) 61 | conv = common.convolutional(conv, (3, 3, 128, 256)) 62 | conv = common.convolutional(conv, (1, 1, 256, 128)) 63 | 64 | conv_sobj_branch = common.convolutional(conv, (3, 3, 128, 256)) 65 | conv_sbbox = common.convolutional(conv_sobj_branch, (1, 1, 256, 3*(NUM_CLASS +5)), activate=False, bn=False) 66 | 67 | return [conv_sbbox, conv_mbbox, conv_lbbox] 68 | 69 | def decode(conv_output, i=0): 70 | """ 71 | return tensor of shape [batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes] 72 | contains (x, y, w, h, score, probability) 73 | """ 74 | 75 | conv_shape = tf.shape(conv_output) 76 | batch_size = conv_shape[0] 77 | output_size = conv_shape[1] 78 | 79 | conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS)) 80 | 81 | conv_raw_dxdy = conv_output[:, :, :, :, 0:2] 82 | conv_raw_dwdh = conv_output[:, :, :, :, 2:4] 83 | conv_raw_conf = conv_output[:, :, :, :, 4:5] 84 | conv_raw_prob = conv_output[:, :, :, :, 5: ] 85 | 86 | y = tf.tile(tf.range(output_size, dtype=tf.int32)[:, tf.newaxis], [1, output_size]) 87 | x = tf.tile(tf.range(output_size, dtype=tf.int32)[tf.newaxis, :], [output_size, 1]) 88 | 89 | xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]], axis=-1) 90 | xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :], [batch_size, 1, 1, 3, 1]) 91 | xy_grid = tf.cast(xy_grid, tf.float32) 92 | 93 | pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * STRIDES[i] 94 | pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i]) * STRIDES[i] 95 | pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1) 96 | 97 | pred_conf = tf.sigmoid(conv_raw_conf) 98 | pred_prob = tf.sigmoid(conv_raw_prob) 99 | 100 | return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1) 101 | 102 | def bbox_iou(boxes1, boxes2): 103 | 104 | boxes1_area = boxes1[..., 2] * boxes1[..., 3] 105 | boxes2_area = boxes2[..., 2] * boxes2[..., 3] 106 | 107 | boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5, 108 | boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1) 109 | boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5, 110 | boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1) 111 | 112 | left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2]) 113 | right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:]) 114 | 115 | inter_section = tf.maximum(right_down - left_up, 0.0) 116 | inter_area = inter_section[..., 0] * inter_section[..., 1] 117 | union_area = boxes1_area + boxes2_area - inter_area 118 | 119 | return 1.0 * inter_area / union_area 120 | 121 | def bbox_giou(boxes1, boxes2): 122 | 123 | boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5, 124 | boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1) 125 | boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5, 126 | boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1) 127 | 128 | boxes1 = tf.concat([tf.minimum(boxes1[..., :2], boxes1[..., 2:]), 129 | tf.maximum(boxes1[..., :2], boxes1[..., 2:])], axis=-1) 130 | boxes2 = tf.concat([tf.minimum(boxes2[..., :2], boxes2[..., 2:]), 131 | tf.maximum(boxes2[..., :2], boxes2[..., 2:])], axis=-1) 132 | 133 | boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1]) 134 | boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1]) 135 | 136 | left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2]) 137 | right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:]) 138 | 139 | inter_section = tf.maximum(right_down - left_up, 0.0) 140 | inter_area = inter_section[..., 0] * inter_section[..., 1] 141 | union_area = boxes1_area + boxes2_area - inter_area 142 | iou = inter_area / union_area 143 | 144 | enclose_left_up = tf.minimum(boxes1[..., :2], boxes2[..., :2]) 145 | enclose_right_down = tf.maximum(boxes1[..., 2:], boxes2[..., 2:]) 146 | enclose = tf.maximum(enclose_right_down - enclose_left_up, 0.0) 147 | enclose_area = enclose[..., 0] * enclose[..., 1] 148 | giou = iou - 1.0 * (enclose_area - union_area) / enclose_area 149 | 150 | return giou 151 | 152 | 153 | def compute_loss(pred, conv, label, bboxes, i=0): 154 | 155 | conv_shape = tf.shape(conv) 156 | batch_size = conv_shape[0] 157 | output_size = conv_shape[1] 158 | input_size = STRIDES[i] * output_size 159 | conv = tf.reshape(conv, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS)) 160 | 161 | conv_raw_conf = conv[:, :, :, :, 4:5] 162 | conv_raw_prob = conv[:, :, :, :, 5:] 163 | 164 | pred_xywh = pred[:, :, :, :, 0:4] 165 | pred_conf = pred[:, :, :, :, 4:5] 166 | 167 | label_xywh = label[:, :, :, :, 0:4] 168 | respond_bbox = label[:, :, :, :, 4:5] 169 | label_prob = label[:, :, :, :, 5:] 170 | 171 | giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis=-1) 172 | input_size = tf.cast(input_size, tf.float32) 173 | 174 | bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :, 2:3] * label_xywh[:, :, :, :, 3:4] / (input_size ** 2) 175 | giou_loss = respond_bbox * bbox_loss_scale * (1- giou) 176 | 177 | iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :], bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :]) 178 | max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1) 179 | 180 | respond_bgd = (1.0 - respond_bbox) * tf.cast( max_iou < IOU_LOSS_THRESH, tf.float32 ) 181 | 182 | conf_focal = tf.pow(respond_bbox - pred_conf, 2) 183 | 184 | conf_loss = conf_focal * ( 185 | respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf) 186 | + 187 | respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf) 188 | ) 189 | 190 | prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_prob, logits=conv_raw_prob) 191 | 192 | giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis=[1,2,3,4])) 193 | conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1,2,3,4])) 194 | prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1,2,3,4])) 195 | 196 | return giou_loss, conf_loss, prob_loss 197 | 198 | 199 | 200 | 201 | 202 | -------------------------------------------------------------------------------- /src/data/anchors/basline_anchors.txt: -------------------------------------------------------------------------------- 1 | 1.25,1.625, 2.0,3.75, 4.125,2.875, 1.875,3.8125, 3.875,2.8125, 3.6875,7.4375, 3.625,2.8125, 4.875,6.1875, 11.65625,10.1875 2 | -------------------------------------------------------------------------------- /src/data/classes/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /src/data/classes/yymnist.names: -------------------------------------------------------------------------------- 1 | 0 2 | 1 3 | 2 4 | 3 5 | 4 6 | 5 7 | 6 8 | 7 9 | 8 10 | 9 11 | -------------------------------------------------------------------------------- /src/data/log/events.out.tfevents.1576027966.dongjai.16966.6502.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kevinchan04/Skeleton-based-action-recognition/2c4d7dd18cd90a2392e4c45a061e9e205c6dfaca/src/data/log/events.out.tfevents.1576027966.dongjai.16966.6502.v2 -------------------------------------------------------------------------------- /src/dataprocessing.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding=utf-8 3 | 4 | import cv2 5 | import time 6 | import numpy as np 7 | import core.utils as utils 8 | import tensorflow as tf 9 | from core.yolov3 import YOLOv3, decode 10 | import core.operation as operation 11 | import core.camera as cam 12 | 13 | ##################################################################### 14 | ###### Import openpose && Openpose initialization ###### 15 | import os 16 | import sys 17 | from sys import platform 18 | import argparse 19 | 20 | sys.path.append('/home/dongjai/openpose/build/python') 21 | from openpose import pyopenpose as op 22 | 23 | ## setting up openpose flags ## 24 | params = dict() 25 | params["model_folder"] = "/home/dongjai/openpose/models/" 26 | params["model_pose"] = "COCO" 27 | params["face"] = False 28 | params["hand"] = False 29 | 30 | # Starting OpenPose 31 | opWrapper = op.WrapperPython() 32 | opWrapper.configure(params) 33 | opWrapper.start() 34 | 35 | # Initialize Openpose 36 | datum = op.Datum() 37 | ##################################################################### 38 | 39 | # video_path = "./docs/office2.mp4" 40 | # img_path = "./docs/color_270.png" 41 | # dp_path = "./docs/depth_8bit_270.png" 42 | #make sure the RGB image and the depth image in the same size 43 | alpha = 0.03 # depth frame is 8bit. if depth frame is 16bit, alpha = 0.0 44 | # video_path = 0 45 | num_classes = 80 46 | input_size = 416 47 | 48 | input_layer = tf.keras.layers.Input([input_size, input_size, 3]) 49 | feature_maps = YOLOv3(input_layer) 50 | 51 | bbox_tensors = [] 52 | for i, fm in enumerate(feature_maps): 53 | bbox_tensor = decode(fm, i) 54 | bbox_tensors.append(bbox_tensor) 55 | 56 | model = tf.keras.Model(input_layer, bbox_tensors) 57 | utils.load_weights(model, "./yolov3.weights") 58 | # model.summary() 59 | 60 | # vid = cv2.VideoCapture(video_path) 61 | # cam.wait4cam_init(vid, WaitTimes=10) #等待相机初始化调节自动曝光 62 | # vid = cv2.imread(img_path, -1) 63 | # dp_vid = cv2.imread(dp_path, -1) 64 | files_path = r"/home/dongjai/action data set/dataset" 65 | dirs = os.listdir(files_path) 66 | for files in dirs: 67 | img_file_path = r"/home/dongjai/action data set/dataset/" + files 68 | file_txt = open(img_file_path + "/" + files + ".txt", 'a')#根据不同的类别修改txt的文件命名前缀 69 | img_num = 0 70 | print("Action: ", files) 71 | while True: 72 | prev_time = time.time() 73 | if img_num == 2000: #读完所有照片,退出程序 74 | file_txt.close() 75 | break 76 | ######################################## 77 | ##### Read image ###### 78 | ######################################## 79 | print("Picture: ", img_num) 80 | img_path = img_file_path+"/RGB/color_"+str(img_num)+".png" 81 | dp_path = img_file_path+"/depth/depth_8bit_"+str(img_num)+".png" 82 | vid = cv2.imread(img_path, -1) 83 | dp_vid = cv2.imread(dp_path, -1) 84 | img_num = img_num + 1 85 | ######################################## 86 | ##### YOLOv3 human detection ###### 87 | ######################################## 88 | # return_value, frame = vid.read() 89 | frame = vid 90 | return_value = 1 91 | dp_frame = dp_vid 92 | # print("rgbshape:", frame.shape) 93 | # print("dpshape:", dp_frame.shape) 94 | if return_value: 95 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 96 | else: 97 | raise ValueError("No image!") 98 | frame_size = frame.shape[:2] 99 | image_data = utils.image_preporcess(np.copy(frame), [input_size, input_size])#裁剪图片成416x416 100 | image_data = image_data[np.newaxis, ...].astype(np.float32)#转换成32位浮点数格式 101 | 102 | pred_bbox = model.predict(image_data) 103 | pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] 104 | pred_bbox = tf.concat(pred_bbox, axis=0) 105 | 106 | bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size, 0.3)#对预测出来的bouding box作后处理 107 | bboxes = utils.nms(bboxes, 0.45, method='nms')#非极大抑制nms选择概括性最高的一个框 108 | only_one_bboxes = operation.detect_only_one(bboxes, IDi=0)#只输出一个类别 109 | prob_top_n_bboxes = operation.select_and_pick(only_one_bboxes, index=4, order=0, top_n=3)#index=4表示筛选的依据是分类概论 110 | top_n_bboxes_dp = operation.get_centerpoints_xyz(prob_top_n_bboxes, depth_frame=dp_frame) 111 | # print("top n bboxes dp : ", top_n_bboxes_dp) 112 | #############NOTE: 实际运行时,gap=的数值可以通过rs.get distance函数获取,这样就可以跳过在训练模型阶段抓取的img 8bit大小限制了深度只有8500的问题 113 | top_n_bboxes_in_gap = operation.select_in_gap(top_n_bboxes_dp, gap=[500, 8000], alpha=alpha)#过滤距离范围,gap单位mm 114 | the_one_bbox = operation.select_and_pick(top_n_bboxes_in_gap, index=-1, order=1, top_n=1)#深度值越靠近越低,所以用升序 115 | if len(the_one_bbox) == 0: 116 | print("error no detection") 117 | continue 118 | #确定操作者,选择yolo检测框中央坐标对应的深度值,认为处于距离范围(x~y)的最前者是操作者 119 | ######################################## 120 | ##### YOLOv3 End ###### 121 | ######################################## 122 | 123 | ######################################## 124 | ##### 切割人像 ###### 125 | ######################################## 126 | img_cut_set, left_top_list = operation.cut_bbox(frame, the_one_bbox, ext_pix_x=0, ext_pix_y=0)#获得每一帧图像上所有bbox内的分割图像,组成一个list图片集 127 | # img_cut_set = img_set[0:len(img_set)]#单独独立的图片 128 | if len(left_top_list) != len(img_cut_set): 129 | print("error") 130 | ######################################## 131 | ##### 切割人像 End ###### 132 | ######################################## 133 | 134 | ######################################## 135 | ##### Openpose ###### 136 | ######################################## 137 | for i in range(len(img_cut_set)):#每一帧切割出来的单人图像一张输入openpose中 138 | #openpose阶段 139 | datum.cvInputData = img_cut_set[i] 140 | opWrapper.emplaceAndPop([datum])#openpose input image->BGR ############### ATTENTION 可以结合起来改善运行效率? 141 | 142 | # cv2.namedWindow("cut_one", cv2.WINDOW_AUTOSIZE) 143 | # cv2.imshow("cut_one", cut_out) 144 | cv2.namedWindow("cut_backbone_result", cv2.WINDOW_AUTOSIZE) 145 | cv2.imshow("cut_backbone_result", datum.cvOutputData) 146 | 147 | #坐标映射到depth frame阶段 148 | kp_num = [0, 8, 9, 10, 11, 14, 15, 16, 17] 149 | kp_list = operation.get_keypoints_xy(datum=datum, kp_num=kp_num) #[x, y]获得5个点的2维坐标(x, y) 150 | # print("kp list: ",kp_list) 151 | if operation.filter_kp_list(kp_list, norm_p_index=0):#如果检测的人体骨骼缺少了鼻子,则认为这个骨架是背对相机 152 | continue 153 | # dp_nparray.shape = (5, 7, 7, 3) 154 | nn_pix = 3 155 | dp_nparray = operation.get_keypoints_xyz(kp_list=kp_list, depth_frame=dp_frame, left_top=left_top_list[i], nn_pix=nn_pix)#[x, y, z]获得5个点映射到原图frame的3维坐标(x, y, z),和附近的几个点 156 | #求平均值替代中心点深度值 157 | dp_list = operation.get_mean_dep_val_xyz(kp_list=kp_list, dp_nparray=dp_nparray) 158 | #坐标归一化 159 | dp_array_out = operation.norm_keypoints_xyz(dp_list, norm_p_index=0, cut_img=img_cut_set[i]) 160 | #坐标平移,让坐标值都变成非负数 161 | dp_array_no_neg = operation.no_negative(array=dp_array_out) 162 | #将关节点数据存储到txt文件中#骨骼点深度信息处理阶段, 获得最具代表性的深度信息. 五个点的深度信息生成一个output向量 163 | #将小于0.001的数字认为是0 164 | dp_array_no_neg_zero = operation.filter_zero(dp_array_no_neg, threadhold=0.001) 165 | # print("dp nparray: ", dp_array_out) 166 | # cv2.imshow("nparray", dp_array_no_neg) 167 | 168 | file_txt.write(str(dp_array_no_neg_zero)) 169 | file_txt.write('\n') 170 | ######################################## 171 | ##### Openpose End ###### 172 | ######################################## 173 | 174 | ######################################## 175 | ##### MLP ###### 176 | ######################################## 177 | 178 | #神经网络动作识别阶段 179 | 180 | #可视化阶段 181 | ########################## 182 | ### 原图 ### 183 | ########################## 184 | curr_time = time.time() 185 | exec_time = curr_time - prev_time 186 | 187 | image = utils.draw_bbox(frame, only_one_bboxes)#画bouding box 188 | result = np.asarray(image)#好像没用 189 | info = "time: %.2f ms" %(1000*exec_time) 190 | cv2.putText(result, text=info, org=(50, 70), fontFace=cv2.FONT_HERSHEY_SIMPLEX, 191 | fontScale=1, color=(255, 0, 0), thickness=2) 192 | cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE) 193 | result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 194 | cv2.imshow("result", result) 195 | 196 | if cv2.waitKey(1) & 0xFF == ord('q'): 197 | file_txt.close() 198 | break 199 | 200 | 201 | 202 | -------------------------------------------------------------------------------- /src/mediandensity.json: -------------------------------------------------------------------------------- 1 | { 2 | "aux-param-autoexposure-setpoint": "1536", 3 | "aux-param-colorcorrection1": "0.298828", 4 | "aux-param-colorcorrection10": "-0", 5 | "aux-param-colorcorrection11": "-0", 6 | "aux-param-colorcorrection12": "-0", 7 | "aux-param-colorcorrection2": "0.293945", 8 | "aux-param-colorcorrection3": "0.293945", 9 | "aux-param-colorcorrection4": "0.114258", 10 | "aux-param-colorcorrection5": "-0", 11 | "aux-param-colorcorrection6": "-0", 12 | "aux-param-colorcorrection7": "-0", 13 | "aux-param-colorcorrection8": "-0", 14 | "aux-param-colorcorrection9": "-0", 15 | "aux-param-depthclampmax": "65535", 16 | "aux-param-depthclampmin": "0", 17 | "aux-param-disparityshift": "0", 18 | "controls-autoexposure-auto": "False", 19 | "controls-autoexposure-manual": "10000", 20 | "controls-color-autoexposure-auto": "True", 21 | "controls-color-autoexposure-manual": "156", 22 | "controls-color-backlight-compensation": "0", 23 | "controls-color-brightness": "0", 24 | "controls-color-contrast": "50", 25 | "controls-color-gain": "64", 26 | "controls-color-gamma": "300", 27 | "controls-color-hue": "0", 28 | "controls-color-power-line-frequency": "3", 29 | "controls-color-saturation": "64", 30 | "controls-color-sharpness": "50", 31 | "controls-color-white-balance-auto": "True", 32 | "controls-color-white-balance-manual": "4600", 33 | "controls-depth-gain": "16", 34 | "controls-laserpower": "150", 35 | "controls-laserstate": "on", 36 | "ignoreSAD": "0", 37 | "param-amplitude-factor": "0", 38 | "param-autoexposure-setpoint": "1536", 39 | "param-censusenablereg-udiameter": "8", 40 | "param-censusenablereg-vdiameter": "5", 41 | "param-censususize": "8", 42 | "param-censusvsize": "5", 43 | "param-depthclampmax": "65535", 44 | "param-depthclampmin": "0", 45 | "param-depthunits": "1000", 46 | "param-disableraucolor": "0", 47 | "param-disablesadcolor": "0", 48 | "param-disablesadnormalize": "0", 49 | "param-disablesloleftcolor": "1", 50 | "param-disableslorightcolor": "0", 51 | "param-disparitymode": "0", 52 | "param-disparityshift": "0", 53 | "param-lambdaad": "978", 54 | "param-lambdacensus": "21", 55 | "param-leftrightthreshold": "32", 56 | "param-maxscorethreshb": "972", 57 | "param-medianthreshold": "175", 58 | "param-minscorethresha": "3", 59 | "param-neighborthresh": "1", 60 | "param-raumine": "3", 61 | "param-rauminn": "1", 62 | "param-rauminnssum": "6", 63 | "param-raumins": "2", 64 | "param-rauminw": "1", 65 | "param-rauminwesum": "5", 66 | "param-regioncolorthresholdb": "0.677104", 67 | "param-regioncolorthresholdg": "0.0117417", 68 | "param-regioncolorthresholdr": "0.0782779", 69 | "param-regionshrinku": "4", 70 | "param-regionshrinkv": "1", 71 | "param-robbinsmonrodecrement": "14", 72 | "param-robbinsmonroincrement": "22", 73 | "param-rsmdiffthreshold": "5.875", 74 | "param-rsmrauslodiffthreshold": "1", 75 | "param-rsmremovethreshold": "0.416667", 76 | "param-scanlineedgetaub": "18", 77 | "param-scanlineedgetaug": "493", 78 | "param-scanlineedgetaur": "390", 79 | "param-scanlinep1": "134", 80 | "param-scanlinep1onediscon": "10", 81 | "param-scanlinep1twodiscon": "70", 82 | "param-scanlinep2": "397", 83 | "param-scanlinep2onediscon": "21", 84 | "param-scanlinep2twodiscon": "226", 85 | "param-secondpeakdelta": "3", 86 | "param-texturecountthresh": "6", 87 | "param-texturedifferencethresh": "0", 88 | "param-usersm": "1", 89 | "param-zunits": "1000", 90 | "stream-depth-format": "Z16", 91 | "stream-fps": "30", 92 | "stream-height": "480", 93 | "stream-width": "640" 94 | } --------------------------------------------------------------------------------