├── Debug ├── makefile ├── nsightbuilddata ├── objects.mk ├── sources.mk └── src │ ├── darkSrc │ └── subdir.mk │ ├── feature │ └── subdir.mk │ ├── matching │ └── subdir.mk │ ├── subdir.mk │ └── thirdPart │ ├── munkres │ ├── adapters │ │ └── subdir.mk │ └── subdir.mk │ └── subdir.mk ├── README.md ├── RUNNINGDATA └── tensor_networks │ └── 111.meta └── src ├── VideoTracker.cpp ├── VideoTracker.h ├── darkSrc ├── activation_kernels.cu ├── activation_layer.c ├── activation_layer.h ├── activations.c ├── activations.h ├── avgpool_layer.c ├── avgpool_layer.h ├── avgpool_layer_kernels.cu ├── batchnorm_layer.c ├── batchnorm_layer.h ├── blas.c ├── blas.h ├── blas_kernels.cu ├── box.c ├── box.h ├── col2im.c ├── col2im.h ├── col2im_kernels.cu ├── connected_layer.c ├── connected_layer.h ├── convolutional_kernels.cu ├── convolutional_layer.c ├── convolutional_layer.h ├── cost_layer.c ├── cost_layer.h ├── crnn_layer.c ├── crnn_layer.h ├── crop_layer.c ├── crop_layer.h ├── crop_layer_kernels.cu ├── cuda.c ├── cuda.h ├── data.c ├── data.h ├── deconvolutional_kernels.cu ├── deconvolutional_layer.c ├── deconvolutional_layer.h ├── demo.c ├── demo.h ├── detection_layer.c ├── detection_layer.h ├── dropout_layer.c ├── dropout_layer.h ├── dropout_layer_kernels.cu ├── gemm.c ├── gemm.h ├── gettimeofday.c ├── gettimeofday.h ├── gru_layer.c ├── gru_layer.h ├── im2col.c ├── im2col.h ├── im2col_kernels.cu ├── image.c ├── image.h ├── layer.c ├── layer.h ├── list.c ├── list.h ├── local_layer.c ├── local_layer.h ├── matrix.c ├── matrix.h ├── maxpool_layer.c ├── maxpool_layer.h ├── maxpool_layer_kernels.cu ├── network.c ├── network.h ├── network_kernels.cu ├── normalization_layer.c ├── normalization_layer.h ├── option_list.c ├── option_list.h ├── parser.c ├── parser.h ├── region_layer.c ├── region_layer.h ├── reorg_layer.c ├── reorg_layer.h ├── rnn_layer.c ├── rnn_layer.h ├── route_layer.c ├── route_layer.h ├── shortcut_layer.c ├── shortcut_layer.h ├── softmax_layer.c ├── softmax_layer.h ├── stb_image.h ├── stb_image_write.h ├── tree.c ├── tree.h ├── utils.c └── utils.h ├── errmsg.cpp ├── errmsg.h ├── feature ├── FeatureTensor.cpp ├── FeatureTensor.h ├── dataType.h ├── model.cpp └── model.h ├── main.cpp ├── matching ├── kalmanfilter.cpp ├── kalmanfilter.h ├── linear_assignment.cpp ├── linear_assignment.h ├── nn_matching.cpp ├── nn_matching.h ├── track.cpp ├── track.h ├── tracker.cpp └── tracker.h └── thirdPart ├── hungarianoper.cpp ├── hungarianoper.h └── munkres ├── adapters ├── adapter.cpp ├── adapter.h ├── boostmatrixadapter.cpp └── boostmatrixadapter.h ├── matrix.h ├── munkres.cpp └── munkres.h /Debug/makefile: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Automatically-generated file. Do not edit! 3 | ################################################################################ 4 | 5 | -include ../makefile.init 6 | 7 | RM := rm -rf 8 | 9 | # All of the sources participating in the build are defined here 10 | -include sources.mk 11 | -include subdir.mk 12 | -include src/thirdPart/munkres/subdir.mk 13 | -include src/thirdPart/munkres/adapters/subdir.mk 14 | -include src/thirdPart/subdir.mk 15 | -include src/matching/subdir.mk 16 | -include src/subdir.mk 17 | -include src/feature/subdir.mk 18 | -include src/darkSrc/subdir.mk 19 | 20 | OS_SUFFIX := $(subst Linux,linux,$(subst Darwin/x86_64,darwin,$(shell uname -s)/$(shell uname -m))) 21 | 22 | -include objects.mk 23 | 24 | ifneq ($(MAKECMDGOALS),clean) 25 | ifneq ($(strip $(CC_DEPS)),) 26 | -include $(CC_DEPS) 27 | endif 28 | ifneq ($(strip $(C++_DEPS)),) 29 | -include $(C++_DEPS) 30 | endif 31 | ifneq ($(strip $(C_UPPER_DEPS)),) 32 | -include $(C_UPPER_DEPS) 33 | endif 34 | ifneq ($(strip $(CXX_DEPS)),) 35 | -include $(CXX_DEPS) 36 | endif 37 | ifneq ($(strip $(CU_DEPS)),) 38 | -include $(CU_DEPS) 39 | endif 40 | ifneq ($(strip $(CPP_DEPS)),) 41 | -include $(CPP_DEPS) 42 | endif 43 | ifneq ($(strip $(C_DEPS)),) 44 | -include $(C_DEPS) 45 | endif 46 | endif 47 | 48 | -include ../makefile.defs 49 | 50 | # OpenGL+GLUT OS-specific define 51 | ifeq ($(shell uname -s),Darwin) 52 | GLUT_LIBS := -Xlinker -framework -Xlinker GLUT -Xlinker -framework -Xlinker OpenGL 53 | else 54 | GLUT_LIBS := -lGL -lGLU -lglut 55 | endif 56 | 57 | 58 | # Add inputs and outputs from these tool invocations to the build variables 59 | 60 | # All Target 61 | all: VideoTracking 62 | 63 | # Tool invocations 64 | VideoTracking: $(OBJS) $(USER_OBJS) 65 | @echo 'Building target: $@' 66 | @echo 'Invoking: NVCC Linker' 67 | /usr/local/cuda-8.0/bin/nvcc --cudart static -L/usr/local/opencv3/lib -L/home/zy/Downloads/tensorflow-1.4.0/bazel-bin/tensorflow --relocatable-device-code=false -gencode arch=compute_61,code=compute_61 -gencode arch=compute_61,code=sm_61 -link -o "VideoTracking" $(OBJS) $(USER_OBJS) $(LIBS) 68 | @echo 'Finished building target: $@' 69 | @echo ' ' 70 | 71 | # Other Targets 72 | clean: 73 | -$(RM) $(CC_DEPS)$(C++_DEPS)$(EXECUTABLES)$(C_UPPER_DEPS)$(CXX_DEPS)$(OBJS)$(CU_DEPS)$(CPP_DEPS)$(C_DEPS) VideoTracking 74 | -@echo ' ' 75 | 76 | .PHONY: all clean dependents 77 | .SECONDARY: 78 | 79 | -include ../makefile.targets 80 | -------------------------------------------------------------------------------- /Debug/nsightbuilddata: -------------------------------------------------------------------------------- 1 | Local@Native 2 | Local@Native 3 | Local@Native 4 | Local@Native 5 | Local@Native 6 | Local@Native 7 | Local@Native 8 | Local@Native 9 | Local@Native 10 | Local@Native 11 | Local@Native 12 | Local@Native 13 | Local@Native 14 | Local@Native 15 | Local@Native 16 | Local@Native 17 | Local@Native 18 | Local@Native 19 | Local@Native 20 | Local@Native 21 | Local@Native 22 | Local@Native 23 | Local@Native 24 | Local@Native 25 | Local@Native 26 | Local@Native 27 | Local@Native 28 | Local@Native 29 | Local@Native 30 | Local@Native 31 | Local@Native 32 | Local@Native 33 | Local@Native 34 | -------------------------------------------------------------------------------- /Debug/objects.mk: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Automatically-generated file. Do not edit! 3 | ################################################################################ 4 | 5 | USER_OBJS := 6 | 7 | LIBS := -lopencv_core -lopencv_highgui -lopencv_imgcodecs -lopencv_videoio -lopencv_imgproc -ltensorflow_cc -lcuda -lcudart -lcublas -lcurand -lcudnn 8 | 9 | -------------------------------------------------------------------------------- /Debug/sources.mk: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Automatically-generated file. Do not edit! 3 | ################################################################################ 4 | 5 | CU_SRCS := 6 | C_UPPER_SRCS := 7 | CXX_SRCS := 8 | C++_SRCS := 9 | OBJ_SRCS := 10 | CC_SRCS := 11 | ASM_SRCS := 12 | CPP_SRCS := 13 | C_SRCS := 14 | O_SRCS := 15 | S_UPPER_SRCS := 16 | CC_DEPS := 17 | C++_DEPS := 18 | EXECUTABLES := 19 | C_UPPER_DEPS := 20 | CXX_DEPS := 21 | OBJS := 22 | CU_DEPS := 23 | CPP_DEPS := 24 | C_DEPS := 25 | 26 | # Every subdirectory with source files must be described here 27 | SUBDIRS := \ 28 | src/thirdPart/munkres \ 29 | src/thirdPart/munkres/adapters \ 30 | src/thirdPart \ 31 | src/matching \ 32 | src \ 33 | src/feature \ 34 | src/darkSrc \ 35 | 36 | -------------------------------------------------------------------------------- /Debug/src/feature/subdir.mk: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Automatically-generated file. Do not edit! 3 | ################################################################################ 4 | 5 | # Add inputs and outputs from these tool invocations to the build variables 6 | CPP_SRCS += \ 7 | ../src/feature/FeatureTensor.cpp \ 8 | ../src/feature/model.cpp 9 | 10 | OBJS += \ 11 | ./src/feature/FeatureTensor.o \ 12 | ./src/feature/model.o 13 | 14 | CPP_DEPS += \ 15 | ./src/feature/FeatureTensor.d \ 16 | ./src/feature/model.d 17 | 18 | 19 | # Each subdirectory must supply rules for building sources it contributes 20 | src/feature/%.o: ../src/feature/%.cpp 21 | @echo 'Building file: $<' 22 | @echo 'Invoking: NVCC Compiler' 23 | /usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 -gencode arch=compute_61,code=sm_61 -odir "src/feature" -M -o "$(@:%.o=%.d)" "$<" 24 | /usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 --compile -x c++ -o "$@" "$<" 25 | @echo 'Finished building: $<' 26 | @echo ' ' 27 | 28 | 29 | -------------------------------------------------------------------------------- /Debug/src/matching/subdir.mk: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Automatically-generated file. Do not edit! 3 | ################################################################################ 4 | 5 | # Add inputs and outputs from these tool invocations to the build variables 6 | CPP_SRCS += \ 7 | ../src/matching/kalmanfilter.cpp \ 8 | ../src/matching/linear_assignment.cpp \ 9 | ../src/matching/nn_matching.cpp \ 10 | ../src/matching/track.cpp \ 11 | ../src/matching/tracker.cpp 12 | 13 | OBJS += \ 14 | ./src/matching/kalmanfilter.o \ 15 | ./src/matching/linear_assignment.o \ 16 | ./src/matching/nn_matching.o \ 17 | ./src/matching/track.o \ 18 | ./src/matching/tracker.o 19 | 20 | CPP_DEPS += \ 21 | ./src/matching/kalmanfilter.d \ 22 | ./src/matching/linear_assignment.d \ 23 | ./src/matching/nn_matching.d \ 24 | ./src/matching/track.d \ 25 | ./src/matching/tracker.d 26 | 27 | 28 | # Each subdirectory must supply rules for building sources it contributes 29 | src/matching/%.o: ../src/matching/%.cpp 30 | @echo 'Building file: $<' 31 | @echo 'Invoking: NVCC Compiler' 32 | /usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 -gencode arch=compute_61,code=sm_61 -odir "src/matching" -M -o "$(@:%.o=%.d)" "$<" 33 | /usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 --compile -x c++ -o "$@" "$<" 34 | @echo 'Finished building: $<' 35 | @echo ' ' 36 | 37 | 38 | -------------------------------------------------------------------------------- /Debug/src/subdir.mk: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Automatically-generated file. Do not edit! 3 | ################################################################################ 4 | 5 | # Add inputs and outputs from these tool invocations to the build variables 6 | CPP_SRCS += \ 7 | ../src/VideoTracker.cpp \ 8 | ../src/errmsg.cpp \ 9 | ../src/main.cpp 10 | 11 | OBJS += \ 12 | ./src/VideoTracker.o \ 13 | ./src/errmsg.o \ 14 | ./src/main.o 15 | 16 | CPP_DEPS += \ 17 | ./src/VideoTracker.d \ 18 | ./src/errmsg.d \ 19 | ./src/main.d 20 | 21 | 22 | # Each subdirectory must supply rules for building sources it contributes 23 | src/%.o: ../src/%.cpp 24 | @echo 'Building file: $<' 25 | @echo 'Invoking: NVCC Compiler' 26 | /usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 -gencode arch=compute_61,code=sm_61 -odir "src" -M -o "$(@:%.o=%.d)" "$<" 27 | /usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 --compile -x c++ -o "$@" "$<" 28 | @echo 'Finished building: $<' 29 | @echo ' ' 30 | 31 | 32 | -------------------------------------------------------------------------------- /Debug/src/thirdPart/munkres/adapters/subdir.mk: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Automatically-generated file. Do not edit! 3 | ################################################################################ 4 | 5 | # Add inputs and outputs from these tool invocations to the build variables 6 | CPP_SRCS += \ 7 | ../src/thirdPart/munkres/adapters/adapter.cpp \ 8 | ../src/thirdPart/munkres/adapters/boostmatrixadapter.cpp 9 | 10 | OBJS += \ 11 | ./src/thirdPart/munkres/adapters/adapter.o \ 12 | ./src/thirdPart/munkres/adapters/boostmatrixadapter.o 13 | 14 | CPP_DEPS += \ 15 | ./src/thirdPart/munkres/adapters/adapter.d \ 16 | ./src/thirdPart/munkres/adapters/boostmatrixadapter.d 17 | 18 | 19 | # Each subdirectory must supply rules for building sources it contributes 20 | src/thirdPart/munkres/adapters/%.o: ../src/thirdPart/munkres/adapters/%.cpp 21 | @echo 'Building file: $<' 22 | @echo 'Invoking: NVCC Compiler' 23 | /usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 -gencode arch=compute_61,code=sm_61 -odir "src/thirdPart/munkres/adapters" -M -o "$(@:%.o=%.d)" "$<" 24 | /usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 --compile -x c++ -o "$@" "$<" 25 | @echo 'Finished building: $<' 26 | @echo ' ' 27 | 28 | 29 | -------------------------------------------------------------------------------- /Debug/src/thirdPart/munkres/subdir.mk: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Automatically-generated file. Do not edit! 3 | ################################################################################ 4 | 5 | # Add inputs and outputs from these tool invocations to the build variables 6 | CPP_SRCS += \ 7 | ../src/thirdPart/munkres/munkres.cpp 8 | 9 | OBJS += \ 10 | ./src/thirdPart/munkres/munkres.o 11 | 12 | CPP_DEPS += \ 13 | ./src/thirdPart/munkres/munkres.d 14 | 15 | 16 | # Each subdirectory must supply rules for building sources it contributes 17 | src/thirdPart/munkres/%.o: ../src/thirdPart/munkres/%.cpp 18 | @echo 'Building file: $<' 19 | @echo 'Invoking: NVCC Compiler' 20 | /usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 -gencode arch=compute_61,code=sm_61 -odir "src/thirdPart/munkres" -M -o "$(@:%.o=%.d)" "$<" 21 | /usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 --compile -x c++ -o "$@" "$<" 22 | @echo 'Finished building: $<' 23 | @echo ' ' 24 | 25 | 26 | -------------------------------------------------------------------------------- /Debug/src/thirdPart/subdir.mk: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Automatically-generated file. Do not edit! 3 | ################################################################################ 4 | 5 | # Add inputs and outputs from these tool invocations to the build variables 6 | CPP_SRCS += \ 7 | ../src/thirdPart/hungarianoper.cpp 8 | 9 | OBJS += \ 10 | ./src/thirdPart/hungarianoper.o 11 | 12 | CPP_DEPS += \ 13 | ./src/thirdPart/hungarianoper.d 14 | 15 | 16 | # Each subdirectory must supply rules for building sources it contributes 17 | src/thirdPart/%.o: ../src/thirdPart/%.cpp 18 | @echo 'Building file: $<' 19 | @echo 'Invoking: NVCC Compiler' 20 | /usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 -gencode arch=compute_61,code=sm_61 -odir "src/thirdPart" -M -o "$(@:%.o=%.d)" "$<" 21 | /usr/local/cuda-8.0/bin/nvcc -DGPU -DCUDNN -DOPENCV -I/usr/local/opencv3/include/opencv2 -I/usr/local/opencv3/include/opencv -I/usr/local/opencv3/include -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive/Eigen -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/eigen_archive -I/home/zy/.cache/bazel/_bazel_zy/69673882bdf22f44b89ee446a9525e4a/external/nsync/public -I/home/zy/Downloads/tensorflow-1.4.0/bazel-genfiles -I/home/zy/Downloads/tensorflow-1.4.0 -G -g -O0 -std=c++11 --compile -x c++ -o "$@" "$<" 22 | @echo 'Finished building: $<' 23 | @echo ' ' 24 | 25 | 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepSort 2 | The c++ version of https://github.com/nwojke/deep_sort. 3 | 4 | # dependency 5 | - opencv3 6 | - tensorflow 1.4 7 | - cuda 8.0 8 | - cudnn 6.0 9 | 10 | develop tool: Nsight Eclipse Edition Version:9.1 11 | 12 | # PREPARE DATA & RUN 13 | Defore you run, you need: 14 | 1. download YOLOV2's related model files https://pjreddie.com. Copy the **cfg**, **data** directory and **yolo.weights** to the **RUNNINGDATA/darknet_data**. 15 | 2. download the CNN checkpoint file **resources/networks/mars-small128.ckpt-68577** and put to **RUNNINGDATA/tensor_networks**. 16 | ``` 17 | cd Debug 18 | make 19 | cd .. #into the project root directory 20 | ./Debug/VideoTracking 21 | ``` 22 | 23 | ## OPTION in main.cpp 24 | There are there options in main.cpp which you cann't define simutanously. 25 | They are `RUNGT` `RUNMOTTENSOR` and `RUNLOCALVIDEO`. 26 | - `RUNGT`: you need download MOT dataset and define the specific directory by `MOTDIR` in the main.cpp. FUNCTION: tracking mot dataset using mot groundtruth. 27 | - `RUNMOTTENSOR`: like `RUNGT`. FUNCTION: tracking mot dataset using mot detection groundtruth but generate feature by tensorflow. 28 | - `RUNLOCALVIDEO`: you need define the local video's path by `VIDEO` in the main.cpp. FUNCTION: tracking local video without any known data. 29 | 30 | 31 | -------------------------------------------------------------------------------- /RUNNINGDATA/tensor_networks/111.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bitzy/DeepSort/db8b64e594c97493a0c289a7ce22fa69530b9075/RUNNINGDATA/tensor_networks/111.meta -------------------------------------------------------------------------------- /src/VideoTracker.h: -------------------------------------------------------------------------------- 1 | /* 2 | * VideoTracker.h 3 | * 4 | * Created on: Dec 15, 2017 5 | * Author: zy 6 | */ 7 | 8 | #ifndef VIDEOTRACKER_H_ 9 | #define VIDEOTRACKER_H_ 10 | #include 11 | 12 | /** 13 | * VideoTracker 14 | * 15 | * run: 16 | * -vpath: the video's path. 17 | * -showSwitch: whether show the tracking result. 18 | * 19 | * run_sequenceWithGT: 20 | * -motDir: the path of MOT directory. 21 | * -showSwitch: show or not. 22 | * 23 | * run_sequence: 24 | * -motDir: the path of MOT directory. 25 | * -showSwitch: show or not. 26 | */ 27 | class VideoTracker { 28 | public: 29 | bool run(const char* vpath, bool showSwitch); 30 | bool run_sequenceWithGT(const char* motDir, bool showSwitch); 31 | bool run_sequence(const char* motDir, bool showSwitch); 32 | std::string showErrMsg(); 33 | 34 | private: 35 | bool vShowFlag; 36 | std::string errorMsg; 37 | 38 | void videoLoad(std::string sequence_dir,int &min_frame_idx, int &max_frame_idx); 39 | std::string loadFromBUFFERFILE(); 40 | }; 41 | 42 | 43 | #endif /* VIDEOTRACKER_H_ */ 44 | 45 | -------------------------------------------------------------------------------- /src/darkSrc/activation_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "activations.h" 7 | #include "cuda.h" 8 | } 9 | 10 | 11 | __device__ float lhtan_activate_kernel(float x) 12 | { 13 | if(x < 0) return .001*x; 14 | if(x > 1) return .001*(x-1) + 1; 15 | return x; 16 | } 17 | __device__ float lhtan_gradient_kernel(float x) 18 | { 19 | if(x > 0 && x < 1) return 1; 20 | return .001; 21 | } 22 | 23 | __device__ float hardtan_activate_kernel(float x) 24 | { 25 | if (x < -1) return -1; 26 | if (x > 1) return 1; 27 | return x; 28 | } 29 | __device__ float linear_activate_kernel(float x){return x;} 30 | __device__ float logistic_activate_kernel(float x){return 1./(1. + exp(-x));} 31 | __device__ float loggy_activate_kernel(float x){return 2./(1. + exp(-x)) - 1;} 32 | __device__ float relu_activate_kernel(float x){return x*(x>0);} 33 | __device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);} 34 | __device__ float relie_activate_kernel(float x){return (x>0) ? x : .01*x;} 35 | __device__ float ramp_activate_kernel(float x){return x*(x>0)+.1*x;} 36 | __device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1*x;} 37 | __device__ float tanh_activate_kernel(float x){return (2/(1 + exp(-2*x)) - 1);} 38 | __device__ float plse_activate_kernel(float x) 39 | { 40 | if(x < -4) return .01 * (x + 4); 41 | if(x > 4) return .01 * (x - 4) + 1; 42 | return .125*x + .5; 43 | } 44 | __device__ float stair_activate_kernel(float x) 45 | { 46 | int n = floor(x); 47 | if (n%2 == 0) return floor(x/2.); 48 | else return (x - n) + floor(x/2.); 49 | } 50 | 51 | 52 | __device__ float hardtan_gradient_kernel(float x) 53 | { 54 | if (x > -1 && x < 1) return 1; 55 | return 0; 56 | } 57 | __device__ float linear_gradient_kernel(float x){return 1;} 58 | __device__ float logistic_gradient_kernel(float x){return (1-x)*x;} 59 | __device__ float loggy_gradient_kernel(float x) 60 | { 61 | float y = (x+1.)/2.; 62 | return 2*(1-y)*y; 63 | } 64 | __device__ float relu_gradient_kernel(float x){return (x>0);} 65 | __device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);} 66 | __device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01;} 67 | __device__ float ramp_gradient_kernel(float x){return (x>0)+.1;} 68 | __device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1;} 69 | __device__ float tanh_gradient_kernel(float x){return 1-x*x;} 70 | __device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01 : .125;} 71 | __device__ float stair_gradient_kernel(float x) 72 | { 73 | if (floor(x) == x) return 0; 74 | return 1; 75 | } 76 | 77 | __device__ float activate_kernel(float x, ACTIVATION a) 78 | { 79 | switch(a){ 80 | case LINEAR: 81 | return linear_activate_kernel(x); 82 | case LOGISTIC: 83 | return logistic_activate_kernel(x); 84 | case LOGGY: 85 | return loggy_activate_kernel(x); 86 | case RELU: 87 | return relu_activate_kernel(x); 88 | case ELU: 89 | return elu_activate_kernel(x); 90 | case RELIE: 91 | return relie_activate_kernel(x); 92 | case RAMP: 93 | return ramp_activate_kernel(x); 94 | case LEAKY: 95 | return leaky_activate_kernel(x); 96 | case TANH: 97 | return tanh_activate_kernel(x); 98 | case PLSE: 99 | return plse_activate_kernel(x); 100 | case STAIR: 101 | return stair_activate_kernel(x); 102 | case HARDTAN: 103 | return hardtan_activate_kernel(x); 104 | case LHTAN: 105 | return lhtan_activate_kernel(x); 106 | } 107 | return 0; 108 | } 109 | 110 | __device__ float gradient_kernel(float x, ACTIVATION a) 111 | { 112 | switch(a){ 113 | case LINEAR: 114 | return linear_gradient_kernel(x); 115 | case LOGISTIC: 116 | return logistic_gradient_kernel(x); 117 | case LOGGY: 118 | return loggy_gradient_kernel(x); 119 | case RELU: 120 | return relu_gradient_kernel(x); 121 | case ELU: 122 | return elu_gradient_kernel(x); 123 | case RELIE: 124 | return relie_gradient_kernel(x); 125 | case RAMP: 126 | return ramp_gradient_kernel(x); 127 | case LEAKY: 128 | return leaky_gradient_kernel(x); 129 | case TANH: 130 | return tanh_gradient_kernel(x); 131 | case PLSE: 132 | return plse_gradient_kernel(x); 133 | case STAIR: 134 | return stair_gradient_kernel(x); 135 | case HARDTAN: 136 | return hardtan_gradient_kernel(x); 137 | case LHTAN: 138 | return lhtan_gradient_kernel(x); 139 | } 140 | return 0; 141 | } 142 | 143 | __global__ void activate_array_kernel(float *x, int n, ACTIVATION a) 144 | { 145 | int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 146 | if(i < n) x[i] = activate_kernel(x[i], a); 147 | } 148 | 149 | __global__ void gradient_array_kernel(float *x, int n, ACTIVATION a, float *delta) 150 | { 151 | int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 152 | if(i < n) delta[i] *= gradient_kernel(x[i], a); 153 | } 154 | 155 | extern "C" void activate_array_ongpu(float *x, int n, ACTIVATION a) 156 | { 157 | activate_array_kernel<<>>(x, n, a); 158 | check_error(cudaPeekAtLastError()); 159 | } 160 | 161 | extern "C" void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta) 162 | { 163 | gradient_array_kernel<<>>(x, n, a, delta); 164 | check_error(cudaPeekAtLastError()); 165 | } 166 | -------------------------------------------------------------------------------- /src/darkSrc/activation_layer.c: -------------------------------------------------------------------------------- 1 | #include "activation_layer.h" 2 | #include "utils.h" 3 | #include "cuda.h" 4 | #include "blas.h" 5 | #include "gemm.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | layer make_activation_layer(int batch, int inputs, ACTIVATION activation) 13 | { 14 | layer l = {0}; 15 | l.type = ACTIVE; 16 | 17 | l.inputs = inputs; 18 | l.outputs = inputs; 19 | l.batch=batch; 20 | 21 | l.output = calloc(batch*inputs, sizeof(float*)); 22 | l.delta = calloc(batch*inputs, sizeof(float*)); 23 | 24 | l.forward = forward_activation_layer; 25 | l.backward = backward_activation_layer; 26 | #ifdef GPU 27 | l.forward_gpu = forward_activation_layer_gpu; 28 | l.backward_gpu = backward_activation_layer_gpu; 29 | 30 | l.output_gpu = cuda_make_array(l.output, inputs*batch); 31 | l.delta_gpu = cuda_make_array(l.delta, inputs*batch); 32 | #endif 33 | l.activation = activation; 34 | fprintf(stderr, "Activation Layer: %d inputs\n", inputs); 35 | return l; 36 | } 37 | 38 | void forward_activation_layer(layer l, network_state state) 39 | { 40 | copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1); 41 | activate_array(l.output, l.outputs*l.batch, l.activation); 42 | } 43 | 44 | void backward_activation_layer(layer l, network_state state) 45 | { 46 | gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); 47 | copy_cpu(l.outputs*l.batch, l.delta, 1, state.delta, 1); 48 | } 49 | 50 | #ifdef GPU 51 | 52 | void forward_activation_layer_gpu(layer l, network_state state) 53 | { 54 | copy_ongpu(l.outputs*l.batch, state.input, 1, l.output_gpu, 1); 55 | activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); 56 | } 57 | 58 | void backward_activation_layer_gpu(layer l, network_state state) 59 | { 60 | gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); 61 | copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, state.delta, 1); 62 | } 63 | #endif 64 | -------------------------------------------------------------------------------- /src/darkSrc/activation_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef ACTIVATION_LAYER_H 2 | #define ACTIVATION_LAYER_H 3 | 4 | #include "activations.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | 8 | layer make_activation_layer(int batch, int inputs, ACTIVATION activation); 9 | 10 | void forward_activation_layer(layer l, network_state state); 11 | void backward_activation_layer(layer l, network_state state); 12 | 13 | #ifdef GPU 14 | void forward_activation_layer_gpu(layer l, network_state state); 15 | void backward_activation_layer_gpu(layer l, network_state state); 16 | #endif 17 | 18 | #endif 19 | 20 | -------------------------------------------------------------------------------- /src/darkSrc/activations.c: -------------------------------------------------------------------------------- 1 | #include "activations.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | char *get_activation_string(ACTIVATION a) 9 | { 10 | switch(a){ 11 | case LOGISTIC: 12 | return "logistic"; 13 | case LOGGY: 14 | return "loggy"; 15 | case RELU: 16 | return "relu"; 17 | case ELU: 18 | return "elu"; 19 | case RELIE: 20 | return "relie"; 21 | case RAMP: 22 | return "ramp"; 23 | case LINEAR: 24 | return "linear"; 25 | case TANH: 26 | return "tanh"; 27 | case PLSE: 28 | return "plse"; 29 | case LEAKY: 30 | return "leaky"; 31 | case STAIR: 32 | return "stair"; 33 | case HARDTAN: 34 | return "hardtan"; 35 | case LHTAN: 36 | return "lhtan"; 37 | default: 38 | break; 39 | } 40 | return "relu"; 41 | } 42 | 43 | ACTIVATION get_activation(char *s) 44 | { 45 | if (strcmp(s, "logistic")==0) return LOGISTIC; 46 | if (strcmp(s, "loggy")==0) return LOGGY; 47 | if (strcmp(s, "relu")==0) return RELU; 48 | if (strcmp(s, "elu")==0) return ELU; 49 | if (strcmp(s, "relie")==0) return RELIE; 50 | if (strcmp(s, "plse")==0) return PLSE; 51 | if (strcmp(s, "hardtan")==0) return HARDTAN; 52 | if (strcmp(s, "lhtan")==0) return LHTAN; 53 | if (strcmp(s, "linear")==0) return LINEAR; 54 | if (strcmp(s, "ramp")==0) return RAMP; 55 | if (strcmp(s, "leaky")==0) return LEAKY; 56 | if (strcmp(s, "tanh")==0) return TANH; 57 | if (strcmp(s, "stair")==0) return STAIR; 58 | fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s); 59 | return RELU; 60 | } 61 | 62 | float activate(float x, ACTIVATION a) 63 | { 64 | switch(a){ 65 | case LINEAR: 66 | return linear_activate(x); 67 | case LOGISTIC: 68 | return logistic_activate(x); 69 | case LOGGY: 70 | return loggy_activate(x); 71 | case RELU: 72 | return relu_activate(x); 73 | case ELU: 74 | return elu_activate(x); 75 | case RELIE: 76 | return relie_activate(x); 77 | case RAMP: 78 | return ramp_activate(x); 79 | case LEAKY: 80 | return leaky_activate(x); 81 | case TANH: 82 | return tanh_activate(x); 83 | case PLSE: 84 | return plse_activate(x); 85 | case STAIR: 86 | return stair_activate(x); 87 | case HARDTAN: 88 | return hardtan_activate(x); 89 | case LHTAN: 90 | return lhtan_activate(x); 91 | } 92 | return 0; 93 | } 94 | 95 | void activate_array(float *x, const int n, const ACTIVATION a) 96 | { 97 | int i; 98 | for(i = 0; i < n; ++i){ 99 | x[i] = activate(x[i], a); 100 | } 101 | } 102 | 103 | float gradient(float x, ACTIVATION a) 104 | { 105 | switch(a){ 106 | case LINEAR: 107 | return linear_gradient(x); 108 | case LOGISTIC: 109 | return logistic_gradient(x); 110 | case LOGGY: 111 | return loggy_gradient(x); 112 | case RELU: 113 | return relu_gradient(x); 114 | case ELU: 115 | return elu_gradient(x); 116 | case RELIE: 117 | return relie_gradient(x); 118 | case RAMP: 119 | return ramp_gradient(x); 120 | case LEAKY: 121 | return leaky_gradient(x); 122 | case TANH: 123 | return tanh_gradient(x); 124 | case PLSE: 125 | return plse_gradient(x); 126 | case STAIR: 127 | return stair_gradient(x); 128 | case HARDTAN: 129 | return hardtan_gradient(x); 130 | case LHTAN: 131 | return lhtan_gradient(x); 132 | } 133 | return 0; 134 | } 135 | 136 | void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta) 137 | { 138 | int i; 139 | for(i = 0; i < n; ++i){ 140 | delta[i] *= gradient(x[i], a); 141 | } 142 | } 143 | 144 | -------------------------------------------------------------------------------- /src/darkSrc/activations.h: -------------------------------------------------------------------------------- 1 | #ifndef ACTIVATIONS_H 2 | #define ACTIVATIONS_H 3 | #include "cuda.h" 4 | #include "math.h" 5 | 6 | typedef enum{ 7 | LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN 8 | }ACTIVATION; 9 | 10 | ACTIVATION get_activation(char *s); 11 | 12 | char *get_activation_string(ACTIVATION a); 13 | float activate(float x, ACTIVATION a); 14 | float gradient(float x, ACTIVATION a); 15 | void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta); 16 | void activate_array(float *x, const int n, const ACTIVATION a); 17 | #ifdef GPU 18 | void activate_array_ongpu(float *x, int n, ACTIVATION a); 19 | void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta); 20 | #endif 21 | 22 | static inline float stair_activate(float x) 23 | { 24 | int n = floor(x); 25 | if (n%2 == 0) return floor(x/2.); 26 | else return (x - n) + floor(x/2.); 27 | } 28 | static inline float hardtan_activate(float x) 29 | { 30 | if (x < -1) return -1; 31 | if (x > 1) return 1; 32 | return x; 33 | } 34 | static inline float linear_activate(float x){return x;} 35 | static inline float logistic_activate(float x){return 1./(1. + exp(-x));} 36 | static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;} 37 | static inline float relu_activate(float x){return x*(x>0);} 38 | static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);} 39 | static inline float relie_activate(float x){return (x>0) ? x : .01*x;} 40 | static inline float ramp_activate(float x){return x*(x>0)+.1*x;} 41 | static inline float leaky_activate(float x){return (x>0) ? x : .1*x;} 42 | static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);} 43 | static inline float plse_activate(float x) 44 | { 45 | if(x < -4) return .01 * (x + 4); 46 | if(x > 4) return .01 * (x - 4) + 1; 47 | return .125*x + .5; 48 | } 49 | 50 | static inline float lhtan_activate(float x) 51 | { 52 | if(x < 0) return .001*x; 53 | if(x > 1) return .001*(x-1) + 1; 54 | return x; 55 | } 56 | static inline float lhtan_gradient(float x) 57 | { 58 | if(x > 0 && x < 1) return 1; 59 | return .001; 60 | } 61 | 62 | static inline float hardtan_gradient(float x) 63 | { 64 | if (x > -1 && x < 1) return 1; 65 | return 0; 66 | } 67 | static inline float linear_gradient(float x){return 1;} 68 | static inline float logistic_gradient(float x){return (1-x)*x;} 69 | static inline float loggy_gradient(float x) 70 | { 71 | float y = (x+1.)/2.; 72 | return 2*(1-y)*y; 73 | } 74 | static inline float stair_gradient(float x) 75 | { 76 | if (floor(x) == x) return 0; 77 | return 1; 78 | } 79 | static inline float relu_gradient(float x){return (x>0);} 80 | static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x + 1);} 81 | static inline float relie_gradient(float x){return (x>0) ? 1 : .01;} 82 | static inline float ramp_gradient(float x){return (x>0)+.1;} 83 | static inline float leaky_gradient(float x){return (x>0) ? 1 : .1;} 84 | static inline float tanh_gradient(float x){return 1-x*x;} 85 | static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01 : .125;} 86 | 87 | #endif 88 | 89 | -------------------------------------------------------------------------------- /src/darkSrc/avgpool_layer.c: -------------------------------------------------------------------------------- 1 | #include "avgpool_layer.h" 2 | #include "cuda.h" 3 | #include 4 | 5 | avgpool_layer make_avgpool_layer(int batch, int w, int h, int c) 6 | { 7 | fprintf(stderr, "avg %4d x%4d x%4d -> %4d\n", w, h, c, c); 8 | avgpool_layer l = {0}; 9 | l.type = AVGPOOL; 10 | l.batch = batch; 11 | l.h = h; 12 | l.w = w; 13 | l.c = c; 14 | l.out_w = 1; 15 | l.out_h = 1; 16 | l.out_c = c; 17 | l.outputs = l.out_c; 18 | l.inputs = h*w*c; 19 | int output_size = l.outputs * batch; 20 | l.output = calloc(output_size, sizeof(float)); 21 | l.delta = calloc(output_size, sizeof(float)); 22 | l.forward = forward_avgpool_layer; 23 | l.backward = backward_avgpool_layer; 24 | #ifdef GPU 25 | l.forward_gpu = forward_avgpool_layer_gpu; 26 | l.backward_gpu = backward_avgpool_layer_gpu; 27 | l.output_gpu = cuda_make_array(l.output, output_size); 28 | l.delta_gpu = cuda_make_array(l.delta, output_size); 29 | #endif 30 | return l; 31 | } 32 | 33 | void resize_avgpool_layer(avgpool_layer *l, int w, int h) 34 | { 35 | l->w = w; 36 | l->h = h; 37 | l->inputs = h*w*l->c; 38 | } 39 | 40 | void forward_avgpool_layer(const avgpool_layer l, network_state state) 41 | { 42 | int b,i,k; 43 | 44 | for(b = 0; b < l.batch; ++b){ 45 | for(k = 0; k < l.c; ++k){ 46 | int out_index = k + b*l.c; 47 | l.output[out_index] = 0; 48 | for(i = 0; i < l.h*l.w; ++i){ 49 | int in_index = i + l.h*l.w*(k + b*l.c); 50 | l.output[out_index] += state.input[in_index]; 51 | } 52 | l.output[out_index] /= l.h*l.w; 53 | } 54 | } 55 | } 56 | 57 | void backward_avgpool_layer(const avgpool_layer l, network_state state) 58 | { 59 | int b,i,k; 60 | 61 | for(b = 0; b < l.batch; ++b){ 62 | for(k = 0; k < l.c; ++k){ 63 | int out_index = k + b*l.c; 64 | for(i = 0; i < l.h*l.w; ++i){ 65 | int in_index = i + l.h*l.w*(k + b*l.c); 66 | state.delta[in_index] += l.delta[out_index] / (l.h*l.w); 67 | } 68 | } 69 | } 70 | } 71 | 72 | -------------------------------------------------------------------------------- /src/darkSrc/avgpool_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef AVGPOOL_LAYER_H 2 | #define AVGPOOL_LAYER_H 3 | 4 | #include "image.h" 5 | #include "cuda.h" 6 | #include "layer.h" 7 | #include "network.h" 8 | 9 | typedef layer avgpool_layer; 10 | 11 | image get_avgpool_image(avgpool_layer l); 12 | avgpool_layer make_avgpool_layer(int batch, int w, int h, int c); 13 | void resize_avgpool_layer(avgpool_layer *l, int w, int h); 14 | void forward_avgpool_layer(const avgpool_layer l, network_state state); 15 | void backward_avgpool_layer(const avgpool_layer l, network_state state); 16 | 17 | #ifdef GPU 18 | void forward_avgpool_layer_gpu(avgpool_layer l, network_state state); 19 | void backward_avgpool_layer_gpu(avgpool_layer l, network_state state); 20 | #endif 21 | 22 | #endif 23 | 24 | -------------------------------------------------------------------------------- /src/darkSrc/avgpool_layer_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "avgpool_layer.h" 7 | #include "cuda.h" 8 | } 9 | 10 | __global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output) 11 | { 12 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 13 | if(id >= n) return; 14 | 15 | int k = id % c; 16 | id /= c; 17 | int b = id; 18 | 19 | int i; 20 | int out_index = (k + c*b); 21 | output[out_index] = 0; 22 | for(i = 0; i < w*h; ++i){ 23 | int in_index = i + h*w*(k + b*c); 24 | output[out_index] += input[in_index]; 25 | } 26 | output[out_index] /= w*h; 27 | } 28 | 29 | __global__ void backward_avgpool_layer_kernel(int n, int w, int h, int c, float *in_delta, float *out_delta) 30 | { 31 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 32 | if(id >= n) return; 33 | 34 | int k = id % c; 35 | id /= c; 36 | int b = id; 37 | 38 | int i; 39 | int out_index = (k + c*b); 40 | for(i = 0; i < w*h; ++i){ 41 | int in_index = i + h*w*(k + b*c); 42 | in_delta[in_index] += out_delta[out_index] / (w*h); 43 | } 44 | } 45 | 46 | extern "C" void forward_avgpool_layer_gpu(avgpool_layer layer, network_state state) 47 | { 48 | size_t n = layer.c*layer.batch; 49 | 50 | forward_avgpool_layer_kernel<<>>(n, layer.w, layer.h, layer.c, state.input, layer.output_gpu); 51 | check_error(cudaPeekAtLastError()); 52 | } 53 | 54 | extern "C" void backward_avgpool_layer_gpu(avgpool_layer layer, network_state state) 55 | { 56 | size_t n = layer.c*layer.batch; 57 | 58 | backward_avgpool_layer_kernel<<>>(n, layer.w, layer.h, layer.c, state.delta, layer.delta_gpu); 59 | check_error(cudaPeekAtLastError()); 60 | } 61 | 62 | -------------------------------------------------------------------------------- /src/darkSrc/batchnorm_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef BATCHNORM_LAYER_H 2 | #define BATCHNORM_LAYER_H 3 | 4 | #include "image.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | 8 | layer make_batchnorm_layer(int batch, int w, int h, int c); 9 | void forward_batchnorm_layer(layer l, network_state state); 10 | void backward_batchnorm_layer(layer l, network_state state); 11 | 12 | #ifdef GPU 13 | void forward_batchnorm_layer_gpu(layer l, network_state state); 14 | void backward_batchnorm_layer_gpu(layer l, network_state state); 15 | void pull_batchnorm_layer(layer l); 16 | void push_batchnorm_layer(layer l); 17 | #endif 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /src/darkSrc/blas.c: -------------------------------------------------------------------------------- 1 | #include "blas.h" 2 | #include "math.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out) 9 | { 10 | int b,i,j,k; 11 | int out_c = c/(stride*stride); 12 | 13 | for(b = 0; b < batch; ++b){ 14 | for(k = 0; k < c; ++k){ 15 | for(j = 0; j < h; ++j){ 16 | for(i = 0; i < w; ++i){ 17 | int in_index = i + w*(j + h*(k + c*b)); 18 | int c2 = k % out_c; 19 | int offset = k / out_c; 20 | int w2 = i*stride + offset % stride; 21 | int h2 = j*stride + offset / stride; 22 | int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b)); 23 | if(forward) out[out_index] = x[in_index]; 24 | else out[in_index] = x[out_index]; 25 | } 26 | } 27 | } 28 | } 29 | } 30 | 31 | void flatten(float *x, int size, int layers, int batch, int forward) 32 | { 33 | float *swap = calloc(size*layers*batch, sizeof(float)); 34 | int i,c,b; 35 | for(b = 0; b < batch; ++b){ 36 | for(c = 0; c < layers; ++c){ 37 | for(i = 0; i < size; ++i){ 38 | int i1 = b*layers*size + c*size + i; 39 | int i2 = b*layers*size + i*layers + c; 40 | if (forward) swap[i2] = x[i1]; 41 | else swap[i1] = x[i2]; 42 | } 43 | } 44 | } 45 | memcpy(x, swap, size*layers*batch*sizeof(float)); 46 | free(swap); 47 | } 48 | 49 | void weighted_sum_cpu(float *a, float *b, float *s, int n, float *c) 50 | { 51 | int i; 52 | for(i = 0; i < n; ++i){ 53 | c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0); 54 | } 55 | } 56 | 57 | void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out) 58 | { 59 | int stride = w1/w2; 60 | int sample = w2/w1; 61 | assert(stride == h1/h2); 62 | assert(sample == h2/h1); 63 | if(stride < 1) stride = 1; 64 | if(sample < 1) sample = 1; 65 | int minw = (w1 < w2) ? w1 : w2; 66 | int minh = (h1 < h2) ? h1 : h2; 67 | int minc = (c1 < c2) ? c1 : c2; 68 | 69 | int i,j,k,b; 70 | for(b = 0; b < batch; ++b){ 71 | for(k = 0; k < minc; ++k){ 72 | for(j = 0; j < minh; ++j){ 73 | for(i = 0; i < minw; ++i){ 74 | int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); 75 | int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); 76 | out[out_index] += add[add_index]; 77 | } 78 | } 79 | } 80 | } 81 | } 82 | 83 | void mean_cpu(float *x, int batch, int filters, int spatial, float *mean) 84 | { 85 | float scale = 1./(batch * spatial); 86 | int i,j,k; 87 | for(i = 0; i < filters; ++i){ 88 | mean[i] = 0; 89 | for(j = 0; j < batch; ++j){ 90 | for(k = 0; k < spatial; ++k){ 91 | int index = j*filters*spatial + i*spatial + k; 92 | mean[i] += x[index]; 93 | } 94 | } 95 | mean[i] *= scale; 96 | } 97 | } 98 | 99 | void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) 100 | { 101 | float scale = 1./(batch * spatial - 1); 102 | int i,j,k; 103 | for(i = 0; i < filters; ++i){ 104 | variance[i] = 0; 105 | for(j = 0; j < batch; ++j){ 106 | for(k = 0; k < spatial; ++k){ 107 | int index = j*filters*spatial + i*spatial + k; 108 | variance[i] += pow((x[index] - mean[i]), 2); 109 | } 110 | } 111 | variance[i] *= scale; 112 | } 113 | } 114 | 115 | void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial) 116 | { 117 | int b, f, i; 118 | for(b = 0; b < batch; ++b){ 119 | for(f = 0; f < filters; ++f){ 120 | for(i = 0; i < spatial; ++i){ 121 | int index = b*filters*spatial + f*spatial + i; 122 | x[index] = (x[index] - mean[f])/(sqrt(variance[f]) + .000001f); 123 | } 124 | } 125 | } 126 | } 127 | 128 | void const_cpu(int N, float ALPHA, float *X, int INCX) 129 | { 130 | int i; 131 | for(i = 0; i < N; ++i) X[i*INCX] = ALPHA; 132 | } 133 | 134 | void mul_cpu(int N, float *X, int INCX, float *Y, int INCY) 135 | { 136 | int i; 137 | for(i = 0; i < N; ++i) Y[i*INCY] *= X[i*INCX]; 138 | } 139 | 140 | void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) 141 | { 142 | int i; 143 | for(i = 0; i < N; ++i) Y[i*INCY] = pow(X[i*INCX], ALPHA); 144 | } 145 | 146 | void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) 147 | { 148 | int i; 149 | for(i = 0; i < N; ++i) Y[i*INCY] += ALPHA*X[i*INCX]; 150 | } 151 | 152 | void scal_cpu(int N, float ALPHA, float *X, int INCX) 153 | { 154 | int i; 155 | for(i = 0; i < N; ++i) X[i*INCX] *= ALPHA; 156 | } 157 | 158 | void fill_cpu(int N, float ALPHA, float *X, int INCX) 159 | { 160 | int i; 161 | for(i = 0; i < N; ++i) X[i*INCX] = ALPHA; 162 | } 163 | 164 | void copy_cpu(int N, float *X, int INCX, float *Y, int INCY) 165 | { 166 | int i; 167 | for(i = 0; i < N; ++i) Y[i*INCY] = X[i*INCX]; 168 | } 169 | 170 | void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error) 171 | { 172 | int i; 173 | for(i = 0; i < n; ++i){ 174 | float diff = truth[i] - pred[i]; 175 | float abs_val = fabs(diff); 176 | if(abs_val < 1) { 177 | error[i] = diff * diff; 178 | delta[i] = diff; 179 | } 180 | else { 181 | error[i] = 2*abs_val - 1; 182 | delta[i] = (diff < 0) ? -1 : 1; 183 | } 184 | } 185 | } 186 | 187 | void l2_cpu(int n, float *pred, float *truth, float *delta, float *error) 188 | { 189 | int i; 190 | for(i = 0; i < n; ++i){ 191 | float diff = truth[i] - pred[i]; 192 | error[i] = diff * diff; 193 | delta[i] = diff; 194 | } 195 | } 196 | 197 | float dot_cpu(int N, float *X, int INCX, float *Y, int INCY) 198 | { 199 | int i; 200 | float dot = 0; 201 | for(i = 0; i < N; ++i) dot += X[i*INCX] * Y[i*INCY]; 202 | return dot; 203 | } 204 | 205 | void softmax(float *input, int n, float temp, float *output) 206 | { 207 | int i; 208 | float sum = 0; 209 | float largest = -FLT_MAX; 210 | for(i = 0; i < n; ++i){ 211 | if(input[i] > largest) largest = input[i]; 212 | } 213 | for(i = 0; i < n; ++i){ 214 | float e = exp(input[i]/temp - largest/temp); 215 | sum += e; 216 | output[i] = e; 217 | } 218 | for(i = 0; i < n; ++i){ 219 | output[i] /= sum; 220 | } 221 | } 222 | 223 | -------------------------------------------------------------------------------- /src/darkSrc/blas.h: -------------------------------------------------------------------------------- 1 | #ifndef BLAS_H 2 | #define BLAS_H 3 | void flatten(float *x, int size, int layers, int batch, int forward); 4 | void pm(int M, int N, float *A); 5 | float *random_matrix(int rows, int cols); 6 | void time_random_matrix(int TA, int TB, int m, int k, int n); 7 | void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out); 8 | 9 | void test_blas(); 10 | 11 | void const_cpu(int N, float ALPHA, float *X, int INCX); 12 | void constrain_ongpu(int N, float ALPHA, float * X, int INCX); 13 | void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); 14 | void mul_cpu(int N, float *X, int INCX, float *Y, int INCY); 15 | 16 | void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); 17 | void copy_cpu(int N, float *X, int INCX, float *Y, int INCY); 18 | void scal_cpu(int N, float ALPHA, float *X, int INCX); 19 | void fill_cpu(int N, float ALPHA, float * X, int INCX); 20 | float dot_cpu(int N, float *X, int INCX, float *Y, int INCY); 21 | void test_gpu_blas(); 22 | void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out); 23 | 24 | void mean_cpu(float *x, int batch, int filters, int spatial, float *mean); 25 | void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); 26 | void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); 27 | 28 | void scale_bias(float *output, float *scales, int batch, int n, int size); 29 | void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates); 30 | void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta); 31 | void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta); 32 | void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); 33 | 34 | void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error); 35 | void l2_cpu(int n, float *pred, float *truth, float *delta, float *error); 36 | void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c); 37 | 38 | void softmax(float *input, int n, float temp, float *output); 39 | 40 | #ifdef GPU 41 | #include "cuda.h" 42 | 43 | void axpy_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY); 44 | void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); 45 | void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY); 46 | void copy_ongpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); 47 | void scal_ongpu(int N, float ALPHA, float * X, int INCX); 48 | void supp_ongpu(int N, float ALPHA, float * X, int INCX); 49 | void mask_ongpu(int N, float * X, float mask_num, float * mask); 50 | void const_ongpu(int N, float ALPHA, float *X, int INCX); 51 | void pow_ongpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); 52 | void mul_ongpu(int N, float *X, int INCX, float *Y, int INCY); 53 | void fill_ongpu(int N, float ALPHA, float * X, int INCX); 54 | 55 | void mean_gpu(float *x, int batch, int filters, int spatial, float *mean); 56 | void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); 57 | void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); 58 | 59 | void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); 60 | 61 | void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta); 62 | void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta); 63 | 64 | void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); 65 | void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean); 66 | void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out); 67 | void scale_bias_gpu(float *output, float *biases, int batch, int n, int size); 68 | void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates); 69 | void scale_bias_gpu(float *output, float *biases, int batch, int n, int size); 70 | void add_bias_gpu(float *output, float *biases, int batch, int n, int size); 71 | void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); 72 | 73 | void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error); 74 | void l2_gpu(int n, float *pred, float *truth, float *delta, float *error); 75 | void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc); 76 | void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c); 77 | void mult_add_into_gpu(int num, float *a, float *b, float *c); 78 | 79 | void reorg_ongpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out); 80 | 81 | void softmax_gpu(float *input, int n, int offset, int groups, float temp, float *output); 82 | void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t); 83 | 84 | void flatten_ongpu(float *x, int spatial, int layers, int batch, int forward, float *out); 85 | 86 | #endif 87 | #endif 88 | -------------------------------------------------------------------------------- /src/darkSrc/box.h: -------------------------------------------------------------------------------- 1 | #ifndef BOX_H 2 | #define BOX_H 3 | 4 | typedef struct{ 5 | float x, y, w, h; 6 | } box; 7 | 8 | typedef struct{ 9 | float dx, dy, dw, dh; 10 | } dbox; 11 | 12 | box float_to_box(float *f); 13 | float box_iou(box a, box b); 14 | float box_rmse(box a, box b); 15 | dbox diou(box a, box b); 16 | void do_nms(box *boxes, float **probs, int total, int classes, float thresh); 17 | void do_nms_sort(box *boxes, float **probs, int total, int classes, float thresh); 18 | box decode_box(box b, box anchor); 19 | box encode_box(box b, box anchor); 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /src/darkSrc/col2im.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | void col2im_add_pixel(float *im, int height, int width, int channels, 4 | int row, int col, int channel, int pad, float val) 5 | { 6 | row -= pad; 7 | col -= pad; 8 | 9 | if (row < 0 || col < 0 || 10 | row >= height || col >= width) return; 11 | im[col + width*(row + height*channel)] += val; 12 | } 13 | //This one might be too, can't remember. 14 | void col2im_cpu(float* data_col, 15 | int channels, int height, int width, 16 | int ksize, int stride, int pad, float* data_im) 17 | { 18 | int c,h,w; 19 | int height_col = (height + 2*pad - ksize) / stride + 1; 20 | int width_col = (width + 2*pad - ksize) / stride + 1; 21 | 22 | int channels_col = channels * ksize * ksize; 23 | for (c = 0; c < channels_col; ++c) { 24 | int w_offset = c % ksize; 25 | int h_offset = (c / ksize) % ksize; 26 | int c_im = c / ksize / ksize; 27 | for (h = 0; h < height_col; ++h) { 28 | for (w = 0; w < width_col; ++w) { 29 | int im_row = h_offset + h * stride; 30 | int im_col = w_offset + w * stride; 31 | int col_index = (c * height_col + h) * width_col + w; 32 | double val = data_col[col_index]; 33 | col2im_add_pixel(data_im, height, width, channels, 34 | im_row, im_col, c_im, pad, val); 35 | } 36 | } 37 | } 38 | } 39 | 40 | -------------------------------------------------------------------------------- /src/darkSrc/col2im.h: -------------------------------------------------------------------------------- 1 | #ifndef COL2IM_H 2 | #define COL2IM_H 3 | 4 | void col2im_cpu(float* data_col, 5 | int channels, int height, int width, 6 | int ksize, int stride, int pad, float* data_im); 7 | 8 | #ifdef GPU 9 | void col2im_ongpu(float *data_col, 10 | int channels, int height, int width, 11 | int ksize, int stride, int pad, float *data_im); 12 | #endif 13 | #endif 14 | -------------------------------------------------------------------------------- /src/darkSrc/col2im_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "col2im.h" 7 | #include "cuda.h" 8 | } 9 | 10 | // src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu 11 | // You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE 12 | 13 | __global__ void col2im_gpu_kernel(const int n, const float* data_col, 14 | const int height, const int width, const int ksize, 15 | const int pad, 16 | const int stride, 17 | const int height_col, const int width_col, 18 | float *data_im) { 19 | int index = blockIdx.x*blockDim.x+threadIdx.x; 20 | for(; index < n; index += blockDim.x*gridDim.x){ 21 | float val = 0; 22 | int w = index % width + pad; 23 | int h = (index / width) % height + pad; 24 | int c = index / (width * height); 25 | // compute the start and end of the output 26 | int w_col_start = (w < ksize) ? 0 : (w - ksize) / stride + 1; 27 | int w_col_end = min(w / stride + 1, width_col); 28 | int h_col_start = (h < ksize) ? 0 : (h - ksize) / stride + 1; 29 | int h_col_end = min(h / stride + 1, height_col); 30 | // equivalent implementation 31 | int offset = 32 | (c * ksize * ksize + h * ksize + w) * height_col * width_col; 33 | int coeff_h_col = (1 - stride * ksize * height_col) * width_col; 34 | int coeff_w_col = (1 - stride * height_col * width_col); 35 | for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { 36 | for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { 37 | val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col]; 38 | } 39 | } 40 | data_im[index] += val; 41 | } 42 | } 43 | 44 | void col2im_ongpu(float *data_col, 45 | int channels, int height, int width, 46 | int ksize, int stride, int pad, float *data_im){ 47 | // We are going to launch channels * height_col * width_col kernels, each 48 | // kernel responsible for copying a single-channel grid. 49 | int height_col = (height + 2 * pad - ksize) / stride + 1; 50 | int width_col = (width + 2 * pad - ksize) / stride + 1; 51 | int num_kernels = channels * height * width; 52 | col2im_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, 53 | BLOCK>>>( 54 | num_kernels, data_col, height, width, ksize, pad, 55 | stride, height_col, 56 | width_col, data_im); 57 | } 58 | 59 | -------------------------------------------------------------------------------- /src/darkSrc/connected_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef CONNECTED_LAYER_H 2 | #define CONNECTED_LAYER_H 3 | 4 | #include "activations.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | 8 | typedef layer connected_layer; 9 | 10 | connected_layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize); 11 | 12 | void forward_connected_layer(connected_layer layer, network_state state); 13 | void backward_connected_layer(connected_layer layer, network_state state); 14 | void update_connected_layer(connected_layer layer, int batch, float learning_rate, float momentum, float decay); 15 | void denormalize_connected_layer(layer l); 16 | void statistics_connected_layer(layer l); 17 | 18 | #ifdef GPU 19 | void forward_connected_layer_gpu(connected_layer layer, network_state state); 20 | void backward_connected_layer_gpu(connected_layer layer, network_state state); 21 | void update_connected_layer_gpu(connected_layer layer, int batch, float learning_rate, float momentum, float decay); 22 | void push_connected_layer(connected_layer layer); 23 | void pull_connected_layer(connected_layer layer); 24 | #endif 25 | 26 | #endif 27 | 28 | -------------------------------------------------------------------------------- /src/darkSrc/convolutional_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef CONVOLUTIONAL_LAYER_H 2 | #define CONVOLUTIONAL_LAYER_H 3 | 4 | #include "cuda.h" 5 | #include "image.h" 6 | #include "activations.h" 7 | #include "layer.h" 8 | #include "network.h" 9 | 10 | typedef layer convolutional_layer; 11 | 12 | #ifdef GPU 13 | void forward_convolutional_layer_gpu(convolutional_layer layer, network_state state); 14 | void backward_convolutional_layer_gpu(convolutional_layer layer, network_state state); 15 | void update_convolutional_layer_gpu(convolutional_layer layer, int batch, float learning_rate, float momentum, float decay); 16 | 17 | void push_convolutional_layer(convolutional_layer layer); 18 | void pull_convolutional_layer(convolutional_layer layer); 19 | 20 | void add_bias_gpu(float *output, float *biases, int batch, int n, int size); 21 | void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); 22 | #ifdef CUDNN 23 | void cudnn_convolutional_setup(layer *l); 24 | #endif 25 | #endif 26 | 27 | convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam); 28 | void denormalize_convolutional_layer(convolutional_layer l); 29 | void resize_convolutional_layer(convolutional_layer *layer, int w, int h); 30 | void forward_convolutional_layer(const convolutional_layer layer, network_state state); 31 | void update_convolutional_layer(convolutional_layer layer, int batch, float learning_rate, float momentum, float decay); 32 | image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_weights); 33 | void binarize_weights(float *weights, int n, int size, float *binary); 34 | void swap_binary(convolutional_layer *l); 35 | void binarize_weights2(float *weights, int n, int size, char *binary, float *scales); 36 | 37 | void backward_convolutional_layer(convolutional_layer layer, network_state state); 38 | 39 | void add_bias(float *output, float *biases, int batch, int n, int size); 40 | void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); 41 | 42 | image get_convolutional_image(convolutional_layer layer); 43 | image get_convolutional_delta(convolutional_layer layer); 44 | image get_convolutional_weight(convolutional_layer layer, int i); 45 | 46 | int convolutional_out_height(convolutional_layer layer); 47 | int convolutional_out_width(convolutional_layer layer); 48 | void rescale_weights(convolutional_layer l, float scale, float trans); 49 | void rgbgr_weights(convolutional_layer l); 50 | 51 | #endif 52 | 53 | -------------------------------------------------------------------------------- /src/darkSrc/cost_layer.c: -------------------------------------------------------------------------------- 1 | #include "cost_layer.h" 2 | #include "utils.h" 3 | #include "cuda.h" 4 | #include "blas.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | COST_TYPE get_cost_type(char *s) 11 | { 12 | if (strcmp(s, "sse")==0) return SSE; 13 | if (strcmp(s, "masked")==0) return MASKED; 14 | if (strcmp(s, "smooth")==0) return SMOOTH; 15 | fprintf(stderr, "Couldn't find cost type %s, going with SSE\n", s); 16 | return SSE; 17 | } 18 | 19 | char *get_cost_string(COST_TYPE a) 20 | { 21 | switch(a){ 22 | case SSE: 23 | return "sse"; 24 | case MASKED: 25 | return "masked"; 26 | case SMOOTH: 27 | return "smooth"; 28 | } 29 | return "sse"; 30 | } 31 | 32 | cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale) 33 | { 34 | fprintf(stderr, "cost %4d\n", inputs); 35 | cost_layer l = {0}; 36 | l.type = COST; 37 | 38 | l.scale = scale; 39 | l.batch = batch; 40 | l.inputs = inputs; 41 | l.outputs = inputs; 42 | l.cost_type = cost_type; 43 | l.delta = calloc(inputs*batch, sizeof(float)); 44 | l.output = calloc(inputs*batch, sizeof(float)); 45 | l.cost = calloc(1, sizeof(float)); 46 | 47 | l.forward = forward_cost_layer; 48 | l.backward = backward_cost_layer; 49 | #ifdef GPU 50 | l.forward_gpu = forward_cost_layer_gpu; 51 | l.backward_gpu = backward_cost_layer_gpu; 52 | 53 | l.delta_gpu = cuda_make_array(l.output, inputs*batch); 54 | l.output_gpu = cuda_make_array(l.delta, inputs*batch); 55 | #endif 56 | return l; 57 | } 58 | 59 | void resize_cost_layer(cost_layer *l, int inputs) 60 | { 61 | l->inputs = inputs; 62 | l->outputs = inputs; 63 | l->delta = realloc(l->delta, inputs*l->batch*sizeof(float)); 64 | l->output = realloc(l->output, inputs*l->batch*sizeof(float)); 65 | #ifdef GPU 66 | cuda_free(l->delta_gpu); 67 | cuda_free(l->output_gpu); 68 | l->delta_gpu = cuda_make_array(l->delta, inputs*l->batch); 69 | l->output_gpu = cuda_make_array(l->output, inputs*l->batch); 70 | #endif 71 | } 72 | 73 | void forward_cost_layer(cost_layer l, network_state state) 74 | { 75 | if (!state.truth) return; 76 | if(l.cost_type == MASKED){ 77 | int i; 78 | for(i = 0; i < l.batch*l.inputs; ++i){ 79 | if(state.truth[i] == SECRET_NUM) state.input[i] = SECRET_NUM; 80 | } 81 | } 82 | if(l.cost_type == SMOOTH){ 83 | smooth_l1_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output); 84 | } else { 85 | l2_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output); 86 | } 87 | l.cost[0] = sum_array(l.output, l.batch*l.inputs); 88 | } 89 | 90 | void backward_cost_layer(const cost_layer l, network_state state) 91 | { 92 | axpy_cpu(l.batch*l.inputs, l.scale, l.delta, 1, state.delta, 1); 93 | } 94 | 95 | #ifdef GPU 96 | 97 | void pull_cost_layer(cost_layer l) 98 | { 99 | cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); 100 | } 101 | 102 | void push_cost_layer(cost_layer l) 103 | { 104 | cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs); 105 | } 106 | 107 | int float_abs_compare (const void * a, const void * b) 108 | { 109 | float fa = *(const float*) a; 110 | if(fa < 0) fa = -fa; 111 | float fb = *(const float*) b; 112 | if(fb < 0) fb = -fb; 113 | return (fa > fb) - (fa < fb); 114 | } 115 | 116 | void forward_cost_layer_gpu(cost_layer l, network_state state) 117 | { 118 | if (!state.truth) return; 119 | if (l.cost_type == MASKED) { 120 | mask_ongpu(l.batch*l.inputs, state.input, SECRET_NUM, state.truth); 121 | } 122 | 123 | if(l.cost_type == SMOOTH){ 124 | smooth_l1_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu); 125 | } else { 126 | l2_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu); 127 | } 128 | 129 | if(l.ratio){ 130 | cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); 131 | qsort(l.delta, l.batch*l.inputs, sizeof(float), float_abs_compare); 132 | int n = (1-l.ratio) * l.batch*l.inputs; 133 | float thresh = l.delta[n]; 134 | thresh = 0; 135 | printf("%f\n", thresh); 136 | supp_ongpu(l.batch*l.inputs, thresh, l.delta_gpu, 1); 137 | } 138 | 139 | cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs); 140 | l.cost[0] = sum_array(l.output, l.batch*l.inputs); 141 | } 142 | 143 | void backward_cost_layer_gpu(const cost_layer l, network_state state) 144 | { 145 | axpy_ongpu(l.batch*l.inputs, l.scale, l.delta_gpu, 1, state.delta, 1); 146 | } 147 | #endif 148 | 149 | -------------------------------------------------------------------------------- /src/darkSrc/cost_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef COST_LAYER_H 2 | #define COST_LAYER_H 3 | #include "layer.h" 4 | #include "network.h" 5 | 6 | typedef layer cost_layer; 7 | 8 | COST_TYPE get_cost_type(char *s); 9 | char *get_cost_string(COST_TYPE a); 10 | cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type, float scale); 11 | void forward_cost_layer(const cost_layer l, network_state state); 12 | void backward_cost_layer(const cost_layer l, network_state state); 13 | void resize_cost_layer(cost_layer *l, int inputs); 14 | 15 | #ifdef GPU 16 | void forward_cost_layer_gpu(cost_layer l, network_state state); 17 | void backward_cost_layer_gpu(const cost_layer l, network_state state); 18 | #endif 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /src/darkSrc/crnn_layer.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef CRNN_LAYER_H 3 | #define CRNN_LAYER_H 4 | 5 | #include "activations.h" 6 | #include "layer.h" 7 | #include "network.h" 8 | 9 | layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize); 10 | 11 | void forward_crnn_layer(layer l, network_state state); 12 | void backward_crnn_layer(layer l, network_state state); 13 | void update_crnn_layer(layer l, int batch, float learning_rate, float momentum, float decay); 14 | 15 | #ifdef GPU 16 | void forward_crnn_layer_gpu(layer l, network_state state); 17 | void backward_crnn_layer_gpu(layer l, network_state state); 18 | void update_crnn_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay); 19 | void push_crnn_layer(layer l); 20 | void pull_crnn_layer(layer l); 21 | #endif 22 | 23 | #endif 24 | 25 | -------------------------------------------------------------------------------- /src/darkSrc/crop_layer.c: -------------------------------------------------------------------------------- 1 | #include "crop_layer.h" 2 | #include "cuda.h" 3 | #include 4 | 5 | image get_crop_image(crop_layer l) 6 | { 7 | int h = l.out_h; 8 | int w = l.out_w; 9 | int c = l.out_c; 10 | return float_to_image(w,h,c,l.output); 11 | } 12 | 13 | void backward_crop_layer(const crop_layer l, network_state state){} 14 | void backward_crop_layer_gpu(const crop_layer l, network_state state){} 15 | 16 | crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure) 17 | { 18 | fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c); 19 | crop_layer l = {0}; 20 | l.type = CROP; 21 | l.batch = batch; 22 | l.h = h; 23 | l.w = w; 24 | l.c = c; 25 | l.scale = (float)crop_height / h; 26 | l.flip = flip; 27 | l.angle = angle; 28 | l.saturation = saturation; 29 | l.exposure = exposure; 30 | l.out_w = crop_width; 31 | l.out_h = crop_height; 32 | l.out_c = c; 33 | l.inputs = l.w * l.h * l.c; 34 | l.outputs = l.out_w * l.out_h * l.out_c; 35 | l.output = calloc(l.outputs*batch, sizeof(float)); 36 | l.forward = forward_crop_layer; 37 | l.backward = backward_crop_layer; 38 | 39 | #ifdef GPU 40 | l.forward_gpu = forward_crop_layer_gpu; 41 | l.backward_gpu = backward_crop_layer_gpu; 42 | l.output_gpu = cuda_make_array(l.output, l.outputs*batch); 43 | l.rand_gpu = cuda_make_array(0, l.batch*8); 44 | #endif 45 | return l; 46 | } 47 | 48 | void resize_crop_layer(layer *l, int w, int h) 49 | { 50 | l->w = w; 51 | l->h = h; 52 | 53 | l->out_w = l->scale*w; 54 | l->out_h = l->scale*h; 55 | 56 | l->inputs = l->w * l->h * l->c; 57 | l->outputs = l->out_h * l->out_w * l->out_c; 58 | 59 | l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); 60 | #ifdef GPU 61 | cuda_free(l->output_gpu); 62 | l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); 63 | #endif 64 | } 65 | 66 | 67 | void forward_crop_layer(const crop_layer l, network_state state) 68 | { 69 | int i,j,c,b,row,col; 70 | int index; 71 | int count = 0; 72 | int flip = (l.flip && rand()%2); 73 | int dh = rand()%(l.h - l.out_h + 1); 74 | int dw = rand()%(l.w - l.out_w + 1); 75 | float scale = 2; 76 | float trans = -1; 77 | if(l.noadjust){ 78 | scale = 1; 79 | trans = 0; 80 | } 81 | if(!state.train){ 82 | flip = 0; 83 | dh = (l.h - l.out_h)/2; 84 | dw = (l.w - l.out_w)/2; 85 | } 86 | for(b = 0; b < l.batch; ++b){ 87 | for(c = 0; c < l.c; ++c){ 88 | for(i = 0; i < l.out_h; ++i){ 89 | for(j = 0; j < l.out_w; ++j){ 90 | if(flip){ 91 | col = l.w - dw - j - 1; 92 | }else{ 93 | col = j + dw; 94 | } 95 | row = i + dh; 96 | index = col+l.w*(row+l.h*(c + l.c*b)); 97 | l.output[count++] = state.input[index]*scale + trans; 98 | } 99 | } 100 | } 101 | } 102 | } 103 | 104 | -------------------------------------------------------------------------------- /src/darkSrc/crop_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef CROP_LAYER_H 2 | #define CROP_LAYER_H 3 | 4 | #include "image.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | 8 | typedef layer crop_layer; 9 | 10 | image get_crop_image(crop_layer l); 11 | crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure); 12 | void forward_crop_layer(const crop_layer l, network_state state); 13 | void resize_crop_layer(layer *l, int w, int h); 14 | 15 | #ifdef GPU 16 | void forward_crop_layer_gpu(crop_layer l, network_state state); 17 | #endif 18 | 19 | #endif 20 | 21 | -------------------------------------------------------------------------------- /src/darkSrc/cuda.c: -------------------------------------------------------------------------------- 1 | int gpu_index = 0; 2 | 3 | #ifdef GPU 4 | 5 | #include "cuda.h" 6 | #include "utils.h" 7 | #include "blas.h" 8 | #include "assert.h" 9 | #include 10 | #include 11 | 12 | void cuda_set_device(int n) 13 | { 14 | gpu_index = n; 15 | cudaError_t status = cudaSetDevice(n); 16 | check_error(status); 17 | } 18 | 19 | int cuda_get_device() 20 | { 21 | int n = 0; 22 | cudaError_t status = cudaGetDevice(&n); 23 | check_error(status); 24 | return n; 25 | } 26 | 27 | void check_error(cudaError_t status) 28 | { 29 | //cudaDeviceSynchronize(); 30 | cudaError_t status2 = cudaGetLastError(); 31 | if (status != cudaSuccess) 32 | { 33 | const char *s = cudaGetErrorString(status); 34 | char buffer[256]; 35 | printf("CUDA Error: %s\n", s); 36 | assert(0); 37 | snprintf(buffer, 256, "CUDA Error: %s", s); 38 | error(buffer); 39 | } 40 | if (status2 != cudaSuccess) 41 | { 42 | const char *s = cudaGetErrorString(status); 43 | char buffer[256]; 44 | printf("CUDA Error Prev: %s\n", s); 45 | assert(0); 46 | snprintf(buffer, 256, "CUDA Error Prev: %s", s); 47 | error(buffer); 48 | } 49 | } 50 | 51 | dim3 cuda_gridsize(size_t n){ 52 | size_t k = (n-1) / BLOCK + 1; 53 | size_t x = k; 54 | size_t y = 1; 55 | if(x > 65535){ 56 | x = ceil(sqrt(k)); 57 | y = (n-1)/(x*BLOCK) + 1; 58 | } 59 | dim3 d = {x, y, 1}; 60 | //printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK); 61 | return d; 62 | } 63 | 64 | #ifdef CUDNN 65 | cudnnHandle_t cudnn_handle() 66 | { 67 | static int init[16] = {0}; 68 | static cudnnHandle_t handle[16]; 69 | int i = cuda_get_device(); 70 | if(!init[i]) { 71 | cudnnCreate(&handle[i]); 72 | init[i] = 1; 73 | } 74 | return handle[i]; 75 | } 76 | #endif 77 | 78 | cublasHandle_t blas_handle() 79 | { 80 | static int init[16] = {0}; 81 | static cublasHandle_t handle[16]; 82 | int i = cuda_get_device(); 83 | if(!init[i]) { 84 | cublasCreate(&handle[i]); 85 | init[i] = 1; 86 | } 87 | return handle[i]; 88 | } 89 | 90 | float *cuda_make_array(float *x, size_t n) 91 | { 92 | float *x_gpu; 93 | size_t size = sizeof(float)*n; 94 | cudaError_t status = cudaMalloc((void **)&x_gpu, size); 95 | check_error(status); 96 | if(x){ 97 | status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); 98 | check_error(status); 99 | } 100 | if(!x_gpu) error("Cuda malloc failed\n"); 101 | return x_gpu; 102 | } 103 | 104 | void cuda_random(float *x_gpu, size_t n) 105 | { 106 | static curandGenerator_t gen[16]; 107 | static int init[16] = {0}; 108 | int i = cuda_get_device(); 109 | if(!init[i]){ 110 | curandCreateGenerator(&gen[i], CURAND_RNG_PSEUDO_DEFAULT); 111 | curandSetPseudoRandomGeneratorSeed(gen[i], time(0)); 112 | init[i] = 1; 113 | } 114 | curandGenerateUniform(gen[i], x_gpu, n); 115 | check_error(cudaPeekAtLastError()); 116 | } 117 | 118 | float cuda_compare(float *x_gpu, float *x, size_t n, char *s) 119 | { 120 | float *tmp = calloc(n, sizeof(float)); 121 | cuda_pull_array(x_gpu, tmp, n); 122 | //int i; 123 | //for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]); 124 | axpy_cpu(n, -1, x, 1, tmp, 1); 125 | float err = dot_cpu(n, tmp, 1, tmp, 1); 126 | printf("Error %s: %f\n", s, sqrt(err/n)); 127 | free(tmp); 128 | return err; 129 | } 130 | 131 | int *cuda_make_int_array(size_t n) 132 | { 133 | int *x_gpu; 134 | size_t size = sizeof(int)*n; 135 | cudaError_t status = cudaMalloc((void **)&x_gpu, size); 136 | check_error(status); 137 | return x_gpu; 138 | } 139 | 140 | void cuda_free(float *x_gpu) 141 | { 142 | cudaError_t status = cudaFree(x_gpu); 143 | check_error(status); 144 | } 145 | 146 | void cuda_push_array(float *x_gpu, float *x, size_t n) 147 | { 148 | size_t size = sizeof(float)*n; 149 | cudaError_t status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); 150 | check_error(status); 151 | } 152 | 153 | void cuda_pull_array(float *x_gpu, float *x, size_t n) 154 | { 155 | size_t size = sizeof(float)*n; 156 | cudaError_t status = cudaMemcpy(x, x_gpu, size, cudaMemcpyDeviceToHost); 157 | check_error(status); 158 | } 159 | 160 | #endif 161 | -------------------------------------------------------------------------------- /src/darkSrc/cuda.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef CUDA_H 3 | #define CUDA_H 4 | 5 | #if defined(_MSC_VER) && _MSC_VER < 1900 6 | #define inline __inline 7 | #endif 8 | 9 | extern int gpu_index; 10 | 11 | #ifdef GPU 12 | 13 | #define BLOCK 512 14 | 15 | #include "cuda_runtime.h" 16 | #include "curand.h" 17 | #include "cublas_v2.h" 18 | 19 | #ifdef CUDNN 20 | #include "cudnn.h" 21 | #endif 22 | 23 | #ifdef __cplusplus 24 | extern "C" { 25 | #endif 26 | 27 | void check_error(cudaError_t status); 28 | cublasHandle_t blas_handle(); 29 | float *cuda_make_array(float *x, size_t n); 30 | int *cuda_make_int_array(size_t n); 31 | void cuda_push_array(float *x_gpu, float *x, size_t n); 32 | void cuda_pull_array(float *x_gpu, float *x, size_t n); 33 | void cuda_set_device(int n); 34 | void cuda_free(float *x_gpu); 35 | void cuda_random(float *x_gpu, size_t n); 36 | float cuda_compare(float *x_gpu, float *x, size_t n, char *s); 37 | dim3 cuda_gridsize(size_t n); 38 | 39 | #ifdef __cplusplus 40 | } 41 | #endif 42 | 43 | #ifdef CUDNN 44 | cudnnHandle_t cudnn_handle(); 45 | #endif 46 | 47 | #endif 48 | #endif 49 | 50 | 51 | -------------------------------------------------------------------------------- /src/darkSrc/data.h: -------------------------------------------------------------------------------- 1 | #ifndef DATA_H 2 | #define DATA_H 3 | #include 4 | 5 | #if defined(_MSC_VER) && _MSC_VER < 1900 6 | #define inline __inline 7 | #endif 8 | 9 | #include "matrix.h" 10 | #include "list.h" 11 | #include "image.h" 12 | #include "tree.h" 13 | 14 | static inline float distance_from_edge(int x, int max) 15 | { 16 | int dx = (max/2) - x; 17 | if (dx < 0) dx = -dx; 18 | dx = (max/2) + 1 - dx; 19 | dx *= 2; 20 | float dist = (float)dx/max; 21 | if (dist > 1) dist = 1; 22 | return dist; 23 | } 24 | 25 | typedef struct{ 26 | int w, h; 27 | matrix X; 28 | matrix y; 29 | int shallow; 30 | int *num_boxes; 31 | box **boxes; 32 | } data; 33 | 34 | typedef enum { 35 | CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA 36 | } data_type; 37 | 38 | typedef struct load_args{ 39 | int threads; 40 | char **paths; 41 | char *path; 42 | int n; 43 | int m; 44 | char **labels; 45 | int h; 46 | int w; 47 | int out_w; 48 | int out_h; 49 | int nh; 50 | int nw; 51 | int num_boxes; 52 | int min, max, size; 53 | int classes; 54 | int background; 55 | int scale; 56 | float jitter; 57 | float angle; 58 | float aspect; 59 | float saturation; 60 | float exposure; 61 | float hue; 62 | data *d; 63 | image *im; 64 | image *resized; 65 | data_type type; 66 | tree *hierarchy; 67 | } load_args; 68 | 69 | typedef struct{ 70 | int id; 71 | float x,y,w,h; 72 | float left, right, top, bottom; 73 | } box_label; 74 | 75 | void free_data(data d); 76 | 77 | pthread_t load_data(load_args args); 78 | 79 | pthread_t load_data_in_thread(load_args args); 80 | 81 | void print_letters(float *pred, int n); 82 | data load_data_captcha(char **paths, int n, int m, int k, int w, int h); 83 | data load_data_captcha_encode(char **paths, int n, int m, int w, int h); 84 | data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h); 85 | data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure); 86 | data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); 87 | matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); 88 | data load_data_super(char **paths, int n, int m, int w, int h, int scale); 89 | data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); 90 | data load_go(char *filename); 91 | 92 | box_label *read_boxes(char *filename, int *n); 93 | data load_cifar10_data(char *filename); 94 | data load_all_cifar10(); 95 | 96 | data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h); 97 | 98 | list *get_paths(char *filename); 99 | char **get_labels(char *filename); 100 | void get_random_batch(data d, int n, float *X, float *y); 101 | data get_data_part(data d, int part, int total); 102 | data get_random_data(data d, int num); 103 | void get_next_batch(data d, int n, int offset, float *X, float *y); 104 | data load_categorical_data_csv(char *filename, int target, int k); 105 | void normalize_data_rows(data d); 106 | void scale_data_rows(data d, float s); 107 | void translate_data_rows(data d, float s); 108 | void randomize_data(data d); 109 | data *split_data(data d, int part, int total); 110 | data concat_data(data d1, data d2); 111 | data concat_datas(data *d, int n); 112 | void fill_truth(char *path, char **labels, int k, float *truth); 113 | 114 | #endif 115 | -------------------------------------------------------------------------------- /src/darkSrc/deconvolutional_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "convolutional_layer.h" 7 | #include "deconvolutional_layer.h" 8 | #include "gemm.h" 9 | #include "blas.h" 10 | #include "im2col.h" 11 | #include "col2im.h" 12 | #include "utils.h" 13 | #include "cuda.h" 14 | } 15 | 16 | extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state) 17 | { 18 | int i; 19 | int out_h = deconvolutional_out_height(layer); 20 | int out_w = deconvolutional_out_width(layer); 21 | int size = out_h*out_w; 22 | 23 | int m = layer.size*layer.size*layer.n; 24 | int n = layer.h*layer.w; 25 | int k = layer.c; 26 | 27 | fill_ongpu(layer.outputs*layer.batch, 0, layer.output_gpu, 1); 28 | 29 | for(i = 0; i < layer.batch; ++i){ 30 | float *a = layer.weights_gpu; 31 | float *b = state.input + i*layer.c*layer.h*layer.w; 32 | float *c = layer.col_image_gpu; 33 | 34 | gemm_ongpu(1,0,m,n,k,1,a,m,b,n,0,c,n); 35 | 36 | col2im_ongpu(c, layer.n, out_h, out_w, layer.size, layer.stride, 0, layer.output_gpu+i*layer.n*size); 37 | } 38 | add_bias_gpu(layer.output_gpu, layer.biases_gpu, layer.batch, layer.n, size); 39 | activate_array(layer.output_gpu, layer.batch*layer.n*size, layer.activation); 40 | } 41 | 42 | extern "C" void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state) 43 | { 44 | float alpha = 1./layer.batch; 45 | int out_h = deconvolutional_out_height(layer); 46 | int out_w = deconvolutional_out_width(layer); 47 | int size = out_h*out_w; 48 | int i; 49 | 50 | gradient_array(layer.output_gpu, size*layer.n*layer.batch, layer.activation, layer.delta_gpu); 51 | backward_bias(layer.bias_updates_gpu, layer.delta, layer.batch, layer.n, size); 52 | 53 | if(state.delta) memset(state.delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); 54 | 55 | for(i = 0; i < layer.batch; ++i){ 56 | int m = layer.c; 57 | int n = layer.size*layer.size*layer.n; 58 | int k = layer.h*layer.w; 59 | 60 | float *a = state.input + i*m*n; 61 | float *b = layer.col_image_gpu; 62 | float *c = layer.weight_updates_gpu; 63 | 64 | im2col_ongpu(layer.delta_gpu + i*layer.n*size, layer.n, out_h, out_w, 65 | layer.size, layer.stride, 0, b); 66 | gemm_ongpu(0,1,m,n,k,alpha,a,k,b,k,1,c,n); 67 | 68 | if(state.delta){ 69 | int m = layer.c; 70 | int n = layer.h*layer.w; 71 | int k = layer.size*layer.size*layer.n; 72 | 73 | float *a = layer.weights_gpu; 74 | float *b = layer.col_image_gpu; 75 | float *c = state.delta + i*n*m; 76 | 77 | gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); 78 | } 79 | } 80 | } 81 | 82 | extern "C" void pull_deconvolutional_layer(deconvolutional_layer layer) 83 | { 84 | cuda_pull_array(layer.weights_gpu, layer.weights, layer.c*layer.n*layer.size*layer.size); 85 | cuda_pull_array(layer.biases_gpu, layer.biases, layer.n); 86 | cuda_pull_array(layer.weight_updates_gpu, layer.weight_updates, layer.c*layer.n*layer.size*layer.size); 87 | cuda_pull_array(layer.bias_updates_gpu, layer.bias_updates, layer.n); 88 | } 89 | 90 | extern "C" void push_deconvolutional_layer(deconvolutional_layer layer) 91 | { 92 | cuda_push_array(layer.weights_gpu, layer.weights, layer.c*layer.n*layer.size*layer.size); 93 | cuda_push_array(layer.biases_gpu, layer.biases, layer.n); 94 | cuda_push_array(layer.weight_updates_gpu, layer.weight_updates, layer.c*layer.n*layer.size*layer.size); 95 | cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.n); 96 | } 97 | 98 | extern "C" void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay) 99 | { 100 | int size = layer.size*layer.size*layer.c*layer.n; 101 | 102 | axpy_ongpu(layer.n, learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1); 103 | scal_ongpu(layer.n, momentum, layer.bias_updates_gpu, 1); 104 | 105 | axpy_ongpu(size, -decay, layer.weights_gpu, 1, layer.weight_updates_gpu, 1); 106 | axpy_ongpu(size, learning_rate, layer.weight_updates_gpu, 1, layer.weights_gpu, 1); 107 | scal_ongpu(size, momentum, layer.weight_updates_gpu, 1); 108 | } 109 | 110 | -------------------------------------------------------------------------------- /src/darkSrc/deconvolutional_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef DECONVOLUTIONAL_LAYER_H 2 | #define DECONVOLUTIONAL_LAYER_H 3 | 4 | #include "cuda.h" 5 | #include "image.h" 6 | #include "activations.h" 7 | #include "layer.h" 8 | #include "network.h" 9 | 10 | typedef layer deconvolutional_layer; 11 | 12 | #ifdef GPU 13 | void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state); 14 | void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state); 15 | void update_deconvolutional_layer_gpu(deconvolutional_layer layer, float learning_rate, float momentum, float decay); 16 | void push_deconvolutional_layer(deconvolutional_layer layer); 17 | void pull_deconvolutional_layer(deconvolutional_layer layer); 18 | #endif 19 | 20 | deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation); 21 | void resize_deconvolutional_layer(deconvolutional_layer *layer, int h, int w); 22 | void forward_deconvolutional_layer(const deconvolutional_layer layer, network_state state); 23 | void update_deconvolutional_layer(deconvolutional_layer layer, float learning_rate, float momentum, float decay); 24 | void backward_deconvolutional_layer(deconvolutional_layer layer, network_state state); 25 | 26 | image get_deconvolutional_image(deconvolutional_layer layer); 27 | image get_deconvolutional_delta(deconvolutional_layer layer); 28 | image get_deconvolutional_filter(deconvolutional_layer layer, int i); 29 | 30 | int deconvolutional_out_height(deconvolutional_layer layer); 31 | int deconvolutional_out_width(deconvolutional_layer layer); 32 | 33 | #endif 34 | 35 | -------------------------------------------------------------------------------- /src/darkSrc/demo.h: -------------------------------------------------------------------------------- 1 | #ifndef DEMO 2 | #define DEMO 3 | 4 | #include "image.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, char *out_filename); 11 | void demo1(); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /src/darkSrc/detection_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef DETECTION_LAYER_H 2 | #define DETECTION_LAYER_H 3 | 4 | #include "layer.h" 5 | #include "network.h" 6 | 7 | typedef layer detection_layer; 8 | 9 | detection_layer make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore); 10 | void forward_detection_layer(const detection_layer l, network_state state); 11 | void backward_detection_layer(const detection_layer l, network_state state); 12 | void get_detection_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness); 13 | 14 | #ifdef GPU 15 | void forward_detection_layer_gpu(const detection_layer l, network_state state); 16 | void backward_detection_layer_gpu(detection_layer l, network_state state); 17 | #endif 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /src/darkSrc/dropout_layer.c: -------------------------------------------------------------------------------- 1 | #include "dropout_layer.h" 2 | #include "utils.h" 3 | #include "cuda.h" 4 | #include 5 | #include 6 | 7 | dropout_layer make_dropout_layer(int batch, int inputs, float probability) 8 | { 9 | dropout_layer l = {0}; 10 | l.type = DROPOUT; 11 | l.probability = probability; 12 | l.inputs = inputs; 13 | l.outputs = inputs; 14 | l.batch = batch; 15 | l.rand = calloc(inputs*batch, sizeof(float)); 16 | l.scale = 1./(1.-probability); 17 | l.forward = forward_dropout_layer; 18 | l.backward = backward_dropout_layer; 19 | #ifdef GPU 20 | l.forward_gpu = forward_dropout_layer_gpu; 21 | l.backward_gpu = backward_dropout_layer_gpu; 22 | l.rand_gpu = cuda_make_array(l.rand, inputs*batch); 23 | #endif 24 | fprintf(stderr, "dropout p = %.2f %4d -> %4d\n", probability, inputs, inputs); 25 | return l; 26 | } 27 | 28 | void resize_dropout_layer(dropout_layer *l, int inputs) 29 | { 30 | l->rand = realloc(l->rand, l->inputs*l->batch*sizeof(float)); 31 | #ifdef GPU 32 | cuda_free(l->rand_gpu); 33 | 34 | l->rand_gpu = cuda_make_array(l->rand, inputs*l->batch); 35 | #endif 36 | } 37 | 38 | void forward_dropout_layer(dropout_layer l, network_state state) 39 | { 40 | int i; 41 | if (!state.train) return; 42 | for(i = 0; i < l.batch * l.inputs; ++i){ 43 | float r = rand_uniform(0, 1); 44 | l.rand[i] = r; 45 | if(r < l.probability) state.input[i] = 0; 46 | else state.input[i] *= l.scale; 47 | } 48 | } 49 | 50 | void backward_dropout_layer(dropout_layer l, network_state state) 51 | { 52 | int i; 53 | if(!state.delta) return; 54 | for(i = 0; i < l.batch * l.inputs; ++i){ 55 | float r = l.rand[i]; 56 | if(r < l.probability) state.delta[i] = 0; 57 | else state.delta[i] *= l.scale; 58 | } 59 | } 60 | 61 | -------------------------------------------------------------------------------- /src/darkSrc/dropout_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef DROPOUT_LAYER_H 2 | #define DROPOUT_LAYER_H 3 | 4 | #include "layer.h" 5 | #include "network.h" 6 | 7 | typedef layer dropout_layer; 8 | 9 | dropout_layer make_dropout_layer(int batch, int inputs, float probability); 10 | 11 | void forward_dropout_layer(dropout_layer l, network_state state); 12 | void backward_dropout_layer(dropout_layer l, network_state state); 13 | void resize_dropout_layer(dropout_layer *l, int inputs); 14 | 15 | #ifdef GPU 16 | void forward_dropout_layer_gpu(dropout_layer l, network_state state); 17 | void backward_dropout_layer_gpu(dropout_layer l, network_state state); 18 | 19 | #endif 20 | #endif 21 | -------------------------------------------------------------------------------- /src/darkSrc/dropout_layer_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "dropout_layer.h" 7 | #include "cuda.h" 8 | #include "utils.h" 9 | } 10 | 11 | __global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale) 12 | { 13 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 14 | if(id < size) input[id] = (rand[id] < prob) ? 0 : input[id]*scale; 15 | } 16 | 17 | void forward_dropout_layer_gpu(dropout_layer layer, network_state state) 18 | { 19 | if (!state.train) return; 20 | int size = layer.inputs*layer.batch; 21 | cuda_random(layer.rand_gpu, size); 22 | /* 23 | int i; 24 | for(i = 0; i < size; ++i){ 25 | layer.rand[i] = rand_uniform(); 26 | } 27 | cuda_push_array(layer.rand_gpu, layer.rand, size); 28 | */ 29 | 30 | yoloswag420blazeit360noscope<<>>(state.input, size, layer.rand_gpu, layer.probability, layer.scale); 31 | check_error(cudaPeekAtLastError()); 32 | } 33 | 34 | void backward_dropout_layer_gpu(dropout_layer layer, network_state state) 35 | { 36 | if(!state.delta) return; 37 | int size = layer.inputs*layer.batch; 38 | 39 | yoloswag420blazeit360noscope<<>>(state.delta, size, layer.rand_gpu, layer.probability, layer.scale); 40 | check_error(cudaPeekAtLastError()); 41 | } 42 | -------------------------------------------------------------------------------- /src/darkSrc/gemm.h: -------------------------------------------------------------------------------- 1 | #ifndef GEMM_H 2 | #define GEMM_H 3 | 4 | void gemm_bin(int M, int N, int K, float ALPHA, 5 | char *A, int lda, 6 | float *B, int ldb, 7 | float *C, int ldc); 8 | 9 | void gemm(int TA, int TB, int M, int N, int K, float ALPHA, 10 | float *A, int lda, 11 | float *B, int ldb, 12 | float BETA, 13 | float *C, int ldc); 14 | 15 | void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA, 16 | float *A, int lda, 17 | float *B, int ldb, 18 | float BETA, 19 | float *C, int ldc); 20 | 21 | #ifdef GPU 22 | void gemm_ongpu(int TA, int TB, int M, int N, int K, float ALPHA, 23 | float *A_gpu, int lda, 24 | float *B_gpu, int ldb, 25 | float BETA, 26 | float *C_gpu, int ldc); 27 | 28 | void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA, 29 | float *A, int lda, 30 | float *B, int ldb, 31 | float BETA, 32 | float *C, int ldc); 33 | #endif 34 | #endif 35 | -------------------------------------------------------------------------------- /src/darkSrc/gettimeofday.c: -------------------------------------------------------------------------------- 1 | #include "gettimeofday.h" 2 | #ifdef WIN32 3 | 4 | int gettimeofday(struct timeval *tv, struct timezone *tz) 5 | { 6 | FILETIME ft; 7 | unsigned __int64 tmpres = 0; 8 | static int tzflag; 9 | 10 | if (NULL != tv) 11 | { 12 | GetSystemTimeAsFileTime(&ft); 13 | 14 | tmpres |= ft.dwHighDateTime; 15 | tmpres <<= 32; 16 | tmpres |= ft.dwLowDateTime; 17 | 18 | /*converting file time to unix epoch*/ 19 | tmpres -= DELTA_EPOCH_IN_MICROSECS; 20 | tmpres /= 10; /*convert into microseconds*/ 21 | tv->tv_sec = (long)(tmpres / 1000000UL); 22 | tv->tv_usec = (long)(tmpres % 1000000UL); 23 | } 24 | 25 | if (NULL != tz) 26 | { 27 | if (!tzflag) 28 | { 29 | _tzset(); 30 | tzflag++; 31 | } 32 | tz->tz_minuteswest = _timezone / 60; 33 | tz->tz_dsttime = _daylight; 34 | } 35 | 36 | return 0; 37 | } 38 | 39 | /* never worry about timersub type activies again -- from GLIBC and upcased. */ 40 | int timersub(struct timeval *a, struct timeval *b, struct timeval *result) 41 | { 42 | (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; 43 | (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; 44 | if ((result)->tv_usec < 0) { 45 | --(result)->tv_sec; 46 | (result)->tv_usec += 1000000; 47 | } 48 | 49 | return 0; 50 | } 51 | #endif 52 | -------------------------------------------------------------------------------- /src/darkSrc/gettimeofday.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef WIN32 4 | #include < time.h > 5 | #include //I've ommited this line. 6 | #if defined(_MSC_VER) || defined(_MSC_EXTENSIONS) 7 | #define DELTA_EPOCH_IN_MICROSECS 11644473600000000Ui64 8 | #else 9 | #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL 10 | #endif 11 | 12 | struct timezone 13 | { 14 | int tz_minuteswest; /* minutes W of Greenwich */ 15 | int tz_dsttime; /* type of dst correction */ 16 | }; 17 | 18 | int gettimeofday(struct timeval *tv, struct timezone *tz); 19 | 20 | /* never worry about timersub type activies again -- from GLIBC and upcased. */ 21 | int timersub(struct timeval *a, struct timeval *b, struct timeval *result); 22 | #endif 23 | -------------------------------------------------------------------------------- /src/darkSrc/gru_layer.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef GRU_LAYER_H 3 | #define GRU_LAYER_H 4 | 5 | #include "activations.h" 6 | #include "layer.h" 7 | #include "network.h" 8 | 9 | layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize); 10 | 11 | void forward_gru_layer(layer l, network_state state); 12 | void backward_gru_layer(layer l, network_state state); 13 | void update_gru_layer(layer l, int batch, float learning_rate, float momentum, float decay); 14 | 15 | #ifdef GPU 16 | void forward_gru_layer_gpu(layer l, network_state state); 17 | void backward_gru_layer_gpu(layer l, network_state state); 18 | void update_gru_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay); 19 | void push_gru_layer(layer l); 20 | void pull_gru_layer(layer l); 21 | #endif 22 | 23 | #endif 24 | 25 | -------------------------------------------------------------------------------- /src/darkSrc/im2col.c: -------------------------------------------------------------------------------- 1 | #include "im2col.h" 2 | #include 3 | float im2col_get_pixel(float *im, int height, int width, int channels, 4 | int row, int col, int channel, int pad) 5 | { 6 | row -= pad; 7 | col -= pad; 8 | 9 | if (row < 0 || col < 0 || 10 | row >= height || col >= width) return 0; 11 | return im[col + width*(row + height*channel)]; 12 | } 13 | 14 | //From Berkeley Vision's Caffe! 15 | //https://github.com/BVLC/caffe/blob/master/LICENSE 16 | void im2col_cpu(float* data_im, 17 | int channels, int height, int width, 18 | int ksize, int stride, int pad, float* data_col) 19 | { 20 | int c,h,w; 21 | int height_col = (height + 2*pad - ksize) / stride + 1; 22 | int width_col = (width + 2*pad - ksize) / stride + 1; 23 | 24 | int channels_col = channels * ksize * ksize; 25 | for (c = 0; c < channels_col; ++c) { 26 | int w_offset = c % ksize; 27 | int h_offset = (c / ksize) % ksize; 28 | int c_im = c / ksize / ksize; 29 | for (h = 0; h < height_col; ++h) { 30 | for (w = 0; w < width_col; ++w) { 31 | int im_row = h_offset + h * stride; 32 | int im_col = w_offset + w * stride; 33 | int col_index = (c * height_col + h) * width_col + w; 34 | data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, 35 | im_row, im_col, c_im, pad); 36 | } 37 | } 38 | } 39 | } 40 | 41 | -------------------------------------------------------------------------------- /src/darkSrc/im2col.h: -------------------------------------------------------------------------------- 1 | #ifndef IM2COL_H 2 | #define IM2COL_H 3 | 4 | void im2col_cpu(float* data_im, 5 | int channels, int height, int width, 6 | int ksize, int stride, int pad, float* data_col); 7 | 8 | #ifdef GPU 9 | 10 | void im2col_ongpu(float *im, 11 | int channels, int height, int width, 12 | int ksize, int stride, int pad,float *data_col); 13 | 14 | #endif 15 | #endif 16 | -------------------------------------------------------------------------------- /src/darkSrc/im2col_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "im2col.h" 7 | #include "cuda.h" 8 | } 9 | 10 | // src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu 11 | // You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE 12 | 13 | __global__ void im2col_gpu_kernel(const int n, const float* data_im, 14 | const int height, const int width, const int ksize, 15 | const int pad, 16 | const int stride, 17 | const int height_col, const int width_col, 18 | float *data_col) { 19 | int index = blockIdx.x*blockDim.x+threadIdx.x; 20 | for(; index < n; index += blockDim.x*gridDim.x){ 21 | int w_out = index % width_col; 22 | int h_index = index / width_col; 23 | int h_out = h_index % height_col; 24 | int channel_in = h_index / height_col; 25 | int channel_out = channel_in * ksize * ksize; 26 | int h_in = h_out * stride - pad; 27 | int w_in = w_out * stride - pad; 28 | float* data_col_ptr = data_col; 29 | data_col_ptr += (channel_out * height_col + h_out) * width_col + w_out; 30 | const float* data_im_ptr = data_im; 31 | data_im_ptr += (channel_in * height + h_in) * width + w_in; 32 | for (int i = 0; i < ksize; ++i) { 33 | for (int j = 0; j < ksize; ++j) { 34 | int h = h_in + i; 35 | int w = w_in + j; 36 | 37 | *data_col_ptr = (h >= 0 && w >= 0 && h < height && w < width) ? 38 | data_im_ptr[i * width + j] : 0; 39 | 40 | //*data_col_ptr = data_im_ptr[ii * width + jj]; 41 | 42 | data_col_ptr += height_col * width_col; 43 | } 44 | } 45 | } 46 | } 47 | 48 | void im2col_ongpu(float *im, 49 | int channels, int height, int width, 50 | int ksize, int stride, int pad, float *data_col){ 51 | // We are going to launch channels * height_col * width_col kernels, each 52 | // kernel responsible for copying a single-channel grid. 53 | int height_col = (height + 2 * pad - ksize) / stride + 1; 54 | int width_col = (width + 2 * pad - ksize) / stride + 1; 55 | int num_kernels = channels * height_col * width_col; 56 | im2col_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, 57 | BLOCK>>>( 58 | num_kernels, im, height, width, ksize, pad, 59 | stride, height_col, 60 | width_col, data_col); 61 | } 62 | -------------------------------------------------------------------------------- /src/darkSrc/image.h: -------------------------------------------------------------------------------- 1 | #ifndef IMAGE_H 2 | #define IMAGE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "box.h" 10 | 11 | typedef struct { 12 | int h; 13 | int w; 14 | int c; 15 | float *data; 16 | } image; 17 | 18 | #ifdef __cplusplus 19 | extern "C" { 20 | #endif 21 | 22 | float get_color(int c, int x, int max); 23 | void flip_image(image a); 24 | void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b); 25 | void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b); 26 | void draw_bbox(image a, box bbox, int w, float r, float g, float b); 27 | void draw_label(image a, int r, int c, image label, const float *rgb); 28 | void write_label(image a, int r, int c, image *characters, char *string, float *rgb); 29 | void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image **labels, int classes); 30 | image image_distance(image a, image b); 31 | void scale_image(image m, float s); 32 | image crop_image(image im, int dx, int dy, int w, int h); 33 | image random_crop_image(image im, int w, int h); 34 | image random_augment_image(image im, float angle, float aspect, int low, int high, int size); 35 | void random_distort_image(image im, float hue, float saturation, float exposure); 36 | image resize_image(image im, int w, int h); 37 | void fill_image(image m, float s); 38 | void letterbox_image_into(image im, int w, int h, image boxed); 39 | image letterbox_image(image im, int w, int h); 40 | image resize_min(image im, int min); 41 | image resize_max(image im, int max); 42 | void translate_image(image m, float s); 43 | void normalize_image(image p); 44 | image rotate_image(image m, float rad); 45 | void rotate_image_cw(image im, int times); 46 | void embed_image(image source, image dest, int dx, int dy); 47 | void saturate_image(image im, float sat); 48 | void exposure_image(image im, float sat); 49 | void distort_image(image im, float hue, float sat, float val); 50 | void saturate_exposure_image(image im, float sat, float exposure); 51 | void hsv_to_rgb(image im); 52 | void rgbgr_image(image im); 53 | void constrain_image(image im); 54 | void composite_3d(char *f1, char *f2, char *out, int delta); 55 | int best_3d_shift_r(image a, image b, int min, int max); 56 | 57 | image grayscale_image(image im); 58 | image threshold_image(image im, float thresh); 59 | 60 | image collapse_image_layers(image source, int border); 61 | image collapse_images_horz(image *ims, int n); 62 | image collapse_images_vert(image *ims, int n); 63 | 64 | void show_image(image p, const char *name); 65 | void show_image_normalized(image im, const char *name); 66 | void save_image_png(image im, const char *name); 67 | void save_image(image p, const char *name); 68 | void show_images(image *ims, int n, char *window); 69 | void show_image_layers(image p, char *name); 70 | void show_image_collapsed(image p, char *name); 71 | 72 | void print_image(image m); 73 | 74 | image make_image(int w, int h, int c); 75 | image make_random_image(int w, int h, int c); 76 | image make_empty_image(int w, int h, int c); 77 | image float_to_image(int w, int h, int c, float *data); 78 | image copy_image(image p); 79 | image load_image(char *filename, int w, int h, int c); 80 | image load_image_color(char *filename, int w, int h); 81 | image **load_alphabet(); 82 | 83 | float get_pixel(image m, int x, int y, int c); 84 | float get_pixel_extend(image m, int x, int y, int c); 85 | void set_pixel(image m, int x, int y, int c, float val); 86 | void add_pixel(image m, int x, int y, int c, float val); 87 | float bilinear_interpolate(image im, float x, float y, int c); 88 | 89 | image get_image_layer(image m, int l); 90 | 91 | void free_image(image m); 92 | void test_resize(char *filename); 93 | 94 | //#ifdef OPENCV 95 | //#include "opencv2/imgproc/imgproc_c.h" 96 | //image ipl_to_image(IplImage* src); 97 | //void draw_detections_cv(IplImage* show_img, int num, float thresh, box *boxes, float **probs, char **names, image **alphabet, int classes); 98 | //void show_image_cv_ipl(IplImage *disp, const char *name, const char *out_filename); 99 | //#endif 100 | #ifdef __cplusplus 101 | } 102 | #endif 103 | 104 | #endif 105 | 106 | -------------------------------------------------------------------------------- /src/darkSrc/layer.c: -------------------------------------------------------------------------------- 1 | #include "layer.h" 2 | #include "cuda.h" 3 | #include 4 | 5 | void free_layer(layer l) 6 | { 7 | if (l.type == DROPOUT) { 8 | if (l.rand) free(l.rand); 9 | #ifdef GPU 10 | if (l.rand_gpu) cuda_free(l.rand_gpu); 11 | #endif 12 | return; 13 | } 14 | if (l.cweights) free(l.cweights); 15 | if (l.indexes) free(l.indexes); 16 | if (l.input_layers) free(l.input_layers); 17 | if (l.input_sizes) free(l.input_sizes); 18 | if (l.map) free(l.map); 19 | if (l.rand) free(l.rand); 20 | if (l.cost) free(l.cost); 21 | if (l.state) free(l.state); 22 | if (l.prev_state) free(l.prev_state); 23 | if (l.forgot_state) free(l.forgot_state); 24 | if (l.forgot_delta) free(l.forgot_delta); 25 | if (l.state_delta) free(l.state_delta); 26 | if (l.concat) free(l.concat); 27 | if (l.concat_delta) free(l.concat_delta); 28 | if (l.binary_weights) free(l.binary_weights); 29 | if (l.biases) free(l.biases); 30 | if (l.bias_updates) free(l.bias_updates); 31 | if (l.scales) free(l.scales); 32 | if (l.scale_updates) free(l.scale_updates); 33 | if (l.weights) free(l.weights); 34 | if (l.weight_updates) free(l.weight_updates); 35 | if (l.delta) free(l.delta); 36 | if (l.output) free(l.output); 37 | if (l.squared) free(l.squared); 38 | if (l.norms) free(l.norms); 39 | if (l.spatial_mean) free(l.spatial_mean); 40 | if (l.mean) free(l.mean); 41 | if (l.variance) free(l.variance); 42 | if (l.mean_delta) free(l.mean_delta); 43 | if (l.variance_delta) free(l.variance_delta); 44 | if (l.rolling_mean) free(l.rolling_mean); 45 | if (l.rolling_variance) free(l.rolling_variance); 46 | if (l.x) free(l.x); 47 | if (l.x_norm) free(l.x_norm); 48 | if (l.m) free(l.m); 49 | if (l.v) free(l.v); 50 | if (l.z_cpu) free(l.z_cpu); 51 | if (l.r_cpu) free(l.r_cpu); 52 | if (l.h_cpu) free(l.h_cpu); 53 | if (l.binary_input) free(l.binary_input); 54 | 55 | #ifdef GPU 56 | if (l.indexes_gpu) cuda_free((float *)l.indexes_gpu); 57 | 58 | if (l.z_gpu) cuda_free(l.z_gpu); 59 | if (l.r_gpu) cuda_free(l.r_gpu); 60 | if (l.h_gpu) cuda_free(l.h_gpu); 61 | if (l.m_gpu) cuda_free(l.m_gpu); 62 | if (l.v_gpu) cuda_free(l.v_gpu); 63 | if (l.prev_state_gpu) cuda_free(l.prev_state_gpu); 64 | if (l.forgot_state_gpu) cuda_free(l.forgot_state_gpu); 65 | if (l.forgot_delta_gpu) cuda_free(l.forgot_delta_gpu); 66 | if (l.state_gpu) cuda_free(l.state_gpu); 67 | if (l.state_delta_gpu) cuda_free(l.state_delta_gpu); 68 | if (l.gate_gpu) cuda_free(l.gate_gpu); 69 | if (l.gate_delta_gpu) cuda_free(l.gate_delta_gpu); 70 | if (l.save_gpu) cuda_free(l.save_gpu); 71 | if (l.save_delta_gpu) cuda_free(l.save_delta_gpu); 72 | if (l.concat_gpu) cuda_free(l.concat_gpu); 73 | if (l.concat_delta_gpu) cuda_free(l.concat_delta_gpu); 74 | if (l.binary_input_gpu) cuda_free(l.binary_input_gpu); 75 | if (l.binary_weights_gpu) cuda_free(l.binary_weights_gpu); 76 | if (l.mean_gpu) cuda_free(l.mean_gpu); 77 | if (l.variance_gpu) cuda_free(l.variance_gpu); 78 | if (l.rolling_mean_gpu) cuda_free(l.rolling_mean_gpu); 79 | if (l.rolling_variance_gpu) cuda_free(l.rolling_variance_gpu); 80 | if (l.variance_delta_gpu) cuda_free(l.variance_delta_gpu); 81 | if (l.mean_delta_gpu) cuda_free(l.mean_delta_gpu); 82 | if (l.x_gpu) cuda_free(l.x_gpu); 83 | if (l.x_norm_gpu) cuda_free(l.x_norm_gpu); 84 | if (l.weights_gpu) cuda_free(l.weights_gpu); 85 | if (l.weight_updates_gpu) cuda_free(l.weight_updates_gpu); 86 | if (l.biases_gpu) cuda_free(l.biases_gpu); 87 | if (l.bias_updates_gpu) cuda_free(l.bias_updates_gpu); 88 | if (l.scales_gpu) cuda_free(l.scales_gpu); 89 | if (l.scale_updates_gpu) cuda_free(l.scale_updates_gpu); 90 | if (l.output_gpu) cuda_free(l.output_gpu); 91 | if (l.delta_gpu) cuda_free(l.delta_gpu); 92 | if (l.rand_gpu) cuda_free(l.rand_gpu); 93 | if (l.squared_gpu) cuda_free(l.squared_gpu); 94 | if (l.norms_gpu) cuda_free(l.norms_gpu); 95 | #endif 96 | } 97 | -------------------------------------------------------------------------------- /src/darkSrc/layer.h: -------------------------------------------------------------------------------- 1 | #ifndef BASE_LAYER_H 2 | #define BASE_LAYER_H 3 | 4 | #include "activations.h" 5 | #include "stddef.h" 6 | #include "tree.h" 7 | 8 | struct network_state; 9 | 10 | struct layer; 11 | typedef struct layer layer; 12 | 13 | typedef enum { 14 | CONVOLUTIONAL, 15 | DECONVOLUTIONAL, 16 | CONNECTED, 17 | MAXPOOL, 18 | SOFTMAX, 19 | DETECTION, 20 | DROPOUT, 21 | CROP, 22 | ROUTE, 23 | COST, 24 | NORMALIZATION, 25 | AVGPOOL, 26 | LOCAL, 27 | SHORTCUT, 28 | ACTIVE, 29 | RNN, 30 | GRU, 31 | CRNN, 32 | BATCHNORM, 33 | NETWORK, 34 | XNOR, 35 | REGION, 36 | REORG, 37 | BLANK 38 | } LAYER_TYPE; 39 | 40 | typedef enum{ 41 | SSE, MASKED, SMOOTH 42 | } COST_TYPE; 43 | 44 | struct layer{ 45 | LAYER_TYPE type; 46 | ACTIVATION activation; 47 | COST_TYPE cost_type; 48 | void (*forward) (struct layer, struct network_state); 49 | void (*backward) (struct layer, struct network_state); 50 | void (*update) (struct layer, int, float, float, float); 51 | void (*forward_gpu) (struct layer, struct network_state); 52 | void (*backward_gpu) (struct layer, struct network_state); 53 | void (*update_gpu) (struct layer, int, float, float, float); 54 | int batch_normalize; 55 | int shortcut; 56 | int batch; 57 | int forced; 58 | int flipped; 59 | int inputs; 60 | int outputs; 61 | int truths; 62 | int h,w,c; 63 | int out_h, out_w, out_c; 64 | int n; 65 | int max_boxes; 66 | int groups; 67 | int size; 68 | int side; 69 | int stride; 70 | int reverse; 71 | int pad; 72 | int sqrt; 73 | int flip; 74 | int index; 75 | int binary; 76 | int xnor; 77 | int steps; 78 | int hidden; 79 | float dot; 80 | float angle; 81 | float jitter; 82 | float saturation; 83 | float exposure; 84 | float shift; 85 | float ratio; 86 | int softmax; 87 | int classes; 88 | int coords; 89 | int background; 90 | int rescore; 91 | int objectness; 92 | int does_cost; 93 | int joint; 94 | int noadjust; 95 | int reorg; 96 | int log; 97 | 98 | int adam; 99 | float B1; 100 | float B2; 101 | float eps; 102 | float *m_gpu; 103 | float *v_gpu; 104 | int t; 105 | float *m; 106 | float *v; 107 | 108 | tree *softmax_tree; 109 | int *map; 110 | 111 | float alpha; 112 | float beta; 113 | float kappa; 114 | 115 | float coord_scale; 116 | float object_scale; 117 | float noobject_scale; 118 | float class_scale; 119 | int bias_match; 120 | int random; 121 | float thresh; 122 | int classfix; 123 | int absolute; 124 | 125 | int dontload; 126 | int dontloadscales; 127 | 128 | float temperature; 129 | float probability; 130 | float scale; 131 | 132 | int *indexes; 133 | float *rand; 134 | float *cost; 135 | char *cweights; 136 | float *state; 137 | float *prev_state; 138 | float *forgot_state; 139 | float *forgot_delta; 140 | float *state_delta; 141 | 142 | float *concat; 143 | float *concat_delta; 144 | 145 | float *binary_weights; 146 | 147 | float *biases; 148 | float *bias_updates; 149 | 150 | float *scales; 151 | float *scale_updates; 152 | 153 | float *weights; 154 | float *weight_updates; 155 | 156 | float *col_image; 157 | int * input_layers; 158 | int * input_sizes; 159 | float * delta; 160 | float * output; 161 | float * squared; 162 | float * norms; 163 | 164 | float * spatial_mean; 165 | float * mean; 166 | float * variance; 167 | 168 | float * mean_delta; 169 | float * variance_delta; 170 | 171 | float * rolling_mean; 172 | float * rolling_variance; 173 | 174 | float * x; 175 | float * x_norm; 176 | 177 | struct layer *input_layer; 178 | struct layer *self_layer; 179 | struct layer *output_layer; 180 | 181 | struct layer *input_gate_layer; 182 | struct layer *state_gate_layer; 183 | struct layer *input_save_layer; 184 | struct layer *state_save_layer; 185 | struct layer *input_state_layer; 186 | struct layer *state_state_layer; 187 | 188 | struct layer *input_z_layer; 189 | struct layer *state_z_layer; 190 | 191 | struct layer *input_r_layer; 192 | struct layer *state_r_layer; 193 | 194 | struct layer *input_h_layer; 195 | struct layer *state_h_layer; 196 | 197 | float *z_cpu; 198 | float *r_cpu; 199 | float *h_cpu; 200 | 201 | float *binary_input; 202 | 203 | size_t workspace_size; 204 | 205 | #ifdef GPU 206 | float *z_gpu; 207 | float *r_gpu; 208 | float *h_gpu; 209 | 210 | int *indexes_gpu; 211 | float * prev_state_gpu; 212 | float * forgot_state_gpu; 213 | float * forgot_delta_gpu; 214 | float * state_gpu; 215 | float * state_delta_gpu; 216 | float * gate_gpu; 217 | float * gate_delta_gpu; 218 | float * save_gpu; 219 | float * save_delta_gpu; 220 | float * concat_gpu; 221 | float * concat_delta_gpu; 222 | 223 | float *binary_input_gpu; 224 | float *binary_weights_gpu; 225 | 226 | float * mean_gpu; 227 | float * variance_gpu; 228 | 229 | float * rolling_mean_gpu; 230 | float * rolling_variance_gpu; 231 | 232 | float * variance_delta_gpu; 233 | float * mean_delta_gpu; 234 | 235 | float * col_image_gpu; 236 | 237 | float * x_gpu; 238 | float * x_norm_gpu; 239 | float * weights_gpu; 240 | float * weight_updates_gpu; 241 | 242 | float * biases_gpu; 243 | float * bias_updates_gpu; 244 | 245 | float * scales_gpu; 246 | float * scale_updates_gpu; 247 | 248 | float * output_gpu; 249 | float * delta_gpu; 250 | float * rand_gpu; 251 | float * squared_gpu; 252 | float * norms_gpu; 253 | #ifdef CUDNN 254 | cudnnTensorDescriptor_t srcTensorDesc, dstTensorDesc; 255 | cudnnTensorDescriptor_t dsrcTensorDesc, ddstTensorDesc; 256 | cudnnFilterDescriptor_t weightDesc; 257 | cudnnFilterDescriptor_t dweightDesc; 258 | cudnnConvolutionDescriptor_t convDesc; 259 | cudnnConvolutionFwdAlgo_t fw_algo; 260 | cudnnConvolutionBwdDataAlgo_t bd_algo; 261 | cudnnConvolutionBwdFilterAlgo_t bf_algo; 262 | #endif 263 | #endif 264 | }; 265 | 266 | void free_layer(layer); 267 | 268 | #endif 269 | -------------------------------------------------------------------------------- /src/darkSrc/list.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "list.h" 4 | 5 | list *make_list() 6 | { 7 | list *l = malloc(sizeof(list)); 8 | l->size = 0; 9 | l->front = 0; 10 | l->back = 0; 11 | return l; 12 | } 13 | 14 | /* 15 | void transfer_node(list *s, list *d, node *n) 16 | { 17 | node *prev, *next; 18 | prev = n->prev; 19 | next = n->next; 20 | if(prev) prev->next = next; 21 | if(next) next->prev = prev; 22 | --s->size; 23 | if(s->front == n) s->front = next; 24 | if(s->back == n) s->back = prev; 25 | } 26 | */ 27 | 28 | void *list_pop(list *l){ 29 | if(!l->back) return 0; 30 | node *b = l->back; 31 | void *val = b->val; 32 | l->back = b->prev; 33 | if(l->back) l->back->next = 0; 34 | free(b); 35 | --l->size; 36 | 37 | return val; 38 | } 39 | 40 | void list_insert(list *l, void *val) 41 | { 42 | node *new = malloc(sizeof(node)); 43 | new->val = val; 44 | new->next = 0; 45 | 46 | if(!l->back){ 47 | l->front = new; 48 | new->prev = 0; 49 | }else{ 50 | l->back->next = new; 51 | new->prev = l->back; 52 | } 53 | l->back = new; 54 | ++l->size; 55 | } 56 | 57 | void free_node(node *n) 58 | { 59 | node *next; 60 | while(n) { 61 | next = n->next; 62 | free(n); 63 | n = next; 64 | } 65 | } 66 | 67 | void free_list(list *l) 68 | { 69 | free_node(l->front); 70 | free(l); 71 | } 72 | 73 | void free_list_contents(list *l) 74 | { 75 | node *n = l->front; 76 | while(n){ 77 | free(n->val); 78 | n = n->next; 79 | } 80 | } 81 | 82 | void **list_to_array(list *l) 83 | { 84 | void **a = calloc(l->size, sizeof(void*)); 85 | int count = 0; 86 | node *n = l->front; 87 | while(n){ 88 | a[count++] = n->val; 89 | n = n->next; 90 | } 91 | return a; 92 | } 93 | -------------------------------------------------------------------------------- /src/darkSrc/list.h: -------------------------------------------------------------------------------- 1 | #ifndef LIST_H 2 | #define LIST_H 3 | 4 | typedef struct node{ 5 | void *val; 6 | struct node *next; 7 | struct node *prev; 8 | } node; 9 | 10 | typedef struct list{ 11 | int size; 12 | node *front; 13 | node *back; 14 | } list; 15 | 16 | list *make_list(); 17 | int list_find(list *l, void *val); 18 | 19 | void list_insert(list *, void *); 20 | 21 | void **list_to_array(list *l); 22 | 23 | void free_list(list *l); 24 | void free_list_contents(list *l); 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /src/darkSrc/local_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef LOCAL_LAYER_H 2 | #define LOCAL_LAYER_H 3 | 4 | #include "cuda.h" 5 | #include "image.h" 6 | #include "activations.h" 7 | #include "layer.h" 8 | #include "network.h" 9 | 10 | typedef layer local_layer; 11 | 12 | #ifdef GPU 13 | void forward_local_layer_gpu(local_layer layer, network_state state); 14 | void backward_local_layer_gpu(local_layer layer, network_state state); 15 | void update_local_layer_gpu(local_layer layer, int batch, float learning_rate, float momentum, float decay); 16 | 17 | void push_local_layer(local_layer layer); 18 | void pull_local_layer(local_layer layer); 19 | #endif 20 | 21 | local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation); 22 | 23 | void forward_local_layer(const local_layer layer, network_state state); 24 | void backward_local_layer(local_layer layer, network_state state); 25 | void update_local_layer(local_layer layer, int batch, float learning_rate, float momentum, float decay); 26 | 27 | void bias_output(float *output, float *biases, int batch, int n, int size); 28 | void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); 29 | 30 | #endif 31 | 32 | -------------------------------------------------------------------------------- /src/darkSrc/matrix.c: -------------------------------------------------------------------------------- 1 | #include "matrix.h" 2 | #include "utils.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | void free_matrix(matrix m) 10 | { 11 | int i; 12 | for(i = 0; i < m.rows; ++i) free(m.vals[i]); 13 | free(m.vals); 14 | } 15 | 16 | float matrix_topk_accuracy(matrix truth, matrix guess, int k) 17 | { 18 | int *indexes = calloc(k, sizeof(int)); 19 | int n = truth.cols; 20 | int i,j; 21 | int correct = 0; 22 | for(i = 0; i < truth.rows; ++i){ 23 | top_k(guess.vals[i], n, k, indexes); 24 | for(j = 0; j < k; ++j){ 25 | int class = indexes[j]; 26 | if(truth.vals[i][class]){ 27 | ++correct; 28 | break; 29 | } 30 | } 31 | } 32 | free(indexes); 33 | return (float)correct/truth.rows; 34 | } 35 | 36 | void scale_matrix(matrix m, float scale) 37 | { 38 | int i,j; 39 | for(i = 0; i < m.rows; ++i){ 40 | for(j = 0; j < m.cols; ++j){ 41 | m.vals[i][j] *= scale; 42 | } 43 | } 44 | } 45 | 46 | matrix resize_matrix(matrix m, int size) 47 | { 48 | int i; 49 | if (m.rows == size) return m; 50 | if (m.rows < size) { 51 | m.vals = realloc(m.vals, size*sizeof(float*)); 52 | for (i = m.rows; i < size; ++i) { 53 | m.vals[i] = calloc(m.cols, sizeof(float)); 54 | } 55 | } else if (m.rows > size) { 56 | for (i = size; i < m.rows; ++i) { 57 | free(m.vals[i]); 58 | } 59 | m.vals = realloc(m.vals, size*sizeof(float*)); 60 | } 61 | m.rows = size; 62 | return m; 63 | } 64 | 65 | void matrix_add_matrix(matrix from, matrix to) 66 | { 67 | assert(from.rows == to.rows && from.cols == to.cols); 68 | int i,j; 69 | for(i = 0; i < from.rows; ++i){ 70 | for(j = 0; j < from.cols; ++j){ 71 | to.vals[i][j] += from.vals[i][j]; 72 | } 73 | } 74 | } 75 | 76 | matrix make_matrix(int rows, int cols) 77 | { 78 | int i; 79 | matrix m; 80 | m.rows = rows; 81 | m.cols = cols; 82 | m.vals = calloc(m.rows, sizeof(float *)); 83 | for(i = 0; i < m.rows; ++i){ 84 | m.vals[i] = calloc(m.cols, sizeof(float)); 85 | } 86 | return m; 87 | } 88 | 89 | matrix hold_out_matrix(matrix *m, int n) 90 | { 91 | int i; 92 | matrix h; 93 | h.rows = n; 94 | h.cols = m->cols; 95 | h.vals = calloc(h.rows, sizeof(float *)); 96 | for(i = 0; i < n; ++i){ 97 | int index = rand()%m->rows; 98 | h.vals[i] = m->vals[index]; 99 | m->vals[index] = m->vals[--(m->rows)]; 100 | } 101 | return h; 102 | } 103 | 104 | float *pop_column(matrix *m, int c) 105 | { 106 | float *col = calloc(m->rows, sizeof(float)); 107 | int i, j; 108 | for(i = 0; i < m->rows; ++i){ 109 | col[i] = m->vals[i][c]; 110 | for(j = c; j < m->cols-1; ++j){ 111 | m->vals[i][j] = m->vals[i][j+1]; 112 | } 113 | } 114 | --m->cols; 115 | return col; 116 | } 117 | 118 | matrix csv_to_matrix(char *filename) 119 | { 120 | FILE *fp = fopen(filename, "r"); 121 | if(!fp) file_error(filename); 122 | 123 | matrix m; 124 | m.cols = -1; 125 | 126 | char *line; 127 | 128 | int n = 0; 129 | int size = 1024; 130 | m.vals = calloc(size, sizeof(float*)); 131 | while((line = fgetl(fp))){ 132 | if(m.cols == -1) m.cols = count_fields(line); 133 | if(n == size){ 134 | size *= 2; 135 | m.vals = realloc(m.vals, size*sizeof(float*)); 136 | } 137 | m.vals[n] = parse_fields(line, m.cols); 138 | free(line); 139 | ++n; 140 | } 141 | m.vals = realloc(m.vals, n*sizeof(float*)); 142 | m.rows = n; 143 | return m; 144 | } 145 | 146 | void matrix_to_csv(matrix m) 147 | { 148 | int i, j; 149 | 150 | for(i = 0; i < m.rows; ++i){ 151 | for(j = 0; j < m.cols; ++j){ 152 | if(j > 0) printf(","); 153 | printf("%.17g", m.vals[i][j]); 154 | } 155 | printf("\n"); 156 | } 157 | } 158 | 159 | void print_matrix(matrix m) 160 | { 161 | int i, j; 162 | printf("%d X %d Matrix:\n",m.rows, m.cols); 163 | printf(" __"); 164 | for(j = 0; j < 16*m.cols-1; ++j) printf(" "); 165 | printf("__ \n"); 166 | 167 | printf("| "); 168 | for(j = 0; j < 16*m.cols-1; ++j) printf(" "); 169 | printf(" |\n"); 170 | 171 | for(i = 0; i < m.rows; ++i){ 172 | printf("| "); 173 | for(j = 0; j < m.cols; ++j){ 174 | printf("%15.7f ", m.vals[i][j]); 175 | } 176 | printf(" |\n"); 177 | } 178 | printf("|__"); 179 | for(j = 0; j < 16*m.cols-1; ++j) printf(" "); 180 | printf("__|\n"); 181 | } 182 | -------------------------------------------------------------------------------- /src/darkSrc/matrix.h: -------------------------------------------------------------------------------- 1 | #ifndef MATRIX_H 2 | #define MATRIX_H 3 | typedef struct matrix{ 4 | int rows, cols; 5 | float **vals; 6 | } matrix; 7 | 8 | matrix make_matrix(int rows, int cols); 9 | void free_matrix(matrix m); 10 | void print_matrix(matrix m); 11 | 12 | matrix csv_to_matrix(char *filename); 13 | void matrix_to_csv(matrix m); 14 | matrix hold_out_matrix(matrix *m, int n); 15 | float matrix_topk_accuracy(matrix truth, matrix guess, int k); 16 | void matrix_add_matrix(matrix from, matrix to); 17 | void scale_matrix(matrix m, float scale); 18 | matrix resize_matrix(matrix m, int size); 19 | 20 | float *pop_column(matrix *m, int c); 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/darkSrc/maxpool_layer.c: -------------------------------------------------------------------------------- 1 | #include "maxpool_layer.h" 2 | #include "cuda.h" 3 | #include 4 | 5 | image get_maxpool_image(maxpool_layer l) 6 | { 7 | int h = l.out_h; 8 | int w = l.out_w; 9 | int c = l.c; 10 | return float_to_image(w,h,c,l.output); 11 | } 12 | 13 | image get_maxpool_delta(maxpool_layer l) 14 | { 15 | int h = l.out_h; 16 | int w = l.out_w; 17 | int c = l.c; 18 | return float_to_image(w,h,c,l.delta); 19 | } 20 | 21 | maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding) 22 | { 23 | maxpool_layer l = {0}; 24 | l.type = MAXPOOL; 25 | l.batch = batch; 26 | l.h = h; 27 | l.w = w; 28 | l.c = c; 29 | l.pad = padding; 30 | l.out_w = (w + 2*padding)/stride; 31 | l.out_h = (h + 2*padding)/stride; 32 | l.out_c = c; 33 | l.outputs = l.out_h * l.out_w * l.out_c; 34 | l.inputs = h*w*c; 35 | l.size = size; 36 | l.stride = stride; 37 | int output_size = l.out_h * l.out_w * l.out_c * batch; 38 | l.indexes = calloc(output_size, sizeof(int)); 39 | l.output = calloc(output_size, sizeof(float)); 40 | l.delta = calloc(output_size, sizeof(float)); 41 | l.forward = forward_maxpool_layer; 42 | l.backward = backward_maxpool_layer; 43 | #ifdef GPU 44 | l.forward_gpu = forward_maxpool_layer_gpu; 45 | l.backward_gpu = backward_maxpool_layer_gpu; 46 | l.indexes_gpu = cuda_make_int_array(output_size); 47 | l.output_gpu = cuda_make_array(l.output, output_size); 48 | l.delta_gpu = cuda_make_array(l.delta, output_size); 49 | #endif 50 | fprintf(stderr, "max %d x %d / %d %4d x%4d x%4d -> %4d x%4d x%4d\n", size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); 51 | return l; 52 | } 53 | 54 | void resize_maxpool_layer(maxpool_layer *l, int w, int h) 55 | { 56 | l->h = h; 57 | l->w = w; 58 | l->inputs = h*w*l->c; 59 | 60 | l->out_w = (w + 2*l->pad)/l->stride; 61 | l->out_h = (h + 2*l->pad)/l->stride; 62 | l->outputs = l->out_w * l->out_h * l->c; 63 | int output_size = l->outputs * l->batch; 64 | 65 | l->indexes = realloc(l->indexes, output_size * sizeof(int)); 66 | l->output = realloc(l->output, output_size * sizeof(float)); 67 | l->delta = realloc(l->delta, output_size * sizeof(float)); 68 | 69 | #ifdef GPU 70 | cuda_free((float *)l->indexes_gpu); 71 | cuda_free(l->output_gpu); 72 | cuda_free(l->delta_gpu); 73 | l->indexes_gpu = cuda_make_int_array(output_size); 74 | l->output_gpu = cuda_make_array(l->output, output_size); 75 | l->delta_gpu = cuda_make_array(l->delta, output_size); 76 | #endif 77 | } 78 | 79 | void forward_maxpool_layer(const maxpool_layer l, network_state state) 80 | { 81 | int b,i,j,k,m,n; 82 | int w_offset = -l.pad; 83 | int h_offset = -l.pad; 84 | 85 | int h = l.out_h; 86 | int w = l.out_w; 87 | int c = l.c; 88 | 89 | for(b = 0; b < l.batch; ++b){ 90 | for(k = 0; k < c; ++k){ 91 | for(i = 0; i < h; ++i){ 92 | for(j = 0; j < w; ++j){ 93 | int out_index = j + w*(i + h*(k + c*b)); 94 | float max = -FLT_MAX; 95 | int max_i = -1; 96 | for(n = 0; n < l.size; ++n){ 97 | for(m = 0; m < l.size; ++m){ 98 | int cur_h = h_offset + i*l.stride + n; 99 | int cur_w = w_offset + j*l.stride + m; 100 | int index = cur_w + l.w*(cur_h + l.h*(k + b*l.c)); 101 | int valid = (cur_h >= 0 && cur_h < l.h && 102 | cur_w >= 0 && cur_w < l.w); 103 | float val = (valid != 0) ? state.input[index] : -FLT_MAX; 104 | max_i = (val > max) ? index : max_i; 105 | max = (val > max) ? val : max; 106 | } 107 | } 108 | l.output[out_index] = max; 109 | l.indexes[out_index] = max_i; 110 | } 111 | } 112 | } 113 | } 114 | } 115 | 116 | void backward_maxpool_layer(const maxpool_layer l, network_state state) 117 | { 118 | int i; 119 | int h = l.out_h; 120 | int w = l.out_w; 121 | int c = l.c; 122 | for(i = 0; i < h*w*c*l.batch; ++i){ 123 | int index = l.indexes[i]; 124 | state.delta[index] += l.delta[i]; 125 | } 126 | } 127 | 128 | -------------------------------------------------------------------------------- /src/darkSrc/maxpool_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef MAXPOOL_LAYER_H 2 | #define MAXPOOL_LAYER_H 3 | 4 | #include "image.h" 5 | #include "cuda.h" 6 | #include "layer.h" 7 | #include "network.h" 8 | 9 | typedef layer maxpool_layer; 10 | 11 | image get_maxpool_image(maxpool_layer l); 12 | maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding); 13 | void resize_maxpool_layer(maxpool_layer *l, int w, int h); 14 | void forward_maxpool_layer(const maxpool_layer l, network_state state); 15 | void backward_maxpool_layer(const maxpool_layer l, network_state state); 16 | 17 | #ifdef GPU 18 | void forward_maxpool_layer_gpu(maxpool_layer l, network_state state); 19 | void backward_maxpool_layer_gpu(maxpool_layer l, network_state state); 20 | #endif 21 | 22 | #endif 23 | 24 | -------------------------------------------------------------------------------- /src/darkSrc/maxpool_layer_kernels.cu: -------------------------------------------------------------------------------- 1 | #include "cuda_runtime.h" 2 | #include "curand.h" 3 | #include "cublas_v2.h" 4 | 5 | extern "C" { 6 | #include "maxpool_layer.h" 7 | #include "cuda.h" 8 | } 9 | 10 | __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes) 11 | { 12 | int h = (in_h + 2*pad)/stride; 13 | int w = (in_w + 2*pad)/stride; 14 | int c = in_c; 15 | 16 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 17 | if(id >= n) return; 18 | 19 | int j = id % w; 20 | id /= w; 21 | int i = id % h; 22 | id /= h; 23 | int k = id % c; 24 | id /= c; 25 | int b = id; 26 | 27 | int w_offset = -pad; 28 | int h_offset = -pad; 29 | 30 | int out_index = j + w*(i + h*(k + c*b)); 31 | float max = -INFINITY; 32 | int max_i = -1; 33 | int l, m; 34 | for(l = 0; l < size; ++l){ 35 | for(m = 0; m < size; ++m){ 36 | int cur_h = h_offset + i*stride + l; 37 | int cur_w = w_offset + j*stride + m; 38 | int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c)); 39 | int valid = (cur_h >= 0 && cur_h < in_h && 40 | cur_w >= 0 && cur_w < in_w); 41 | float val = (valid != 0) ? input[index] : -INFINITY; 42 | max_i = (val > max) ? index : max_i; 43 | max = (val > max) ? val : max; 44 | } 45 | } 46 | output[out_index] = max; 47 | indexes[out_index] = max_i; 48 | } 49 | 50 | __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes) 51 | { 52 | int h = (in_h + 2*pad)/stride; 53 | int w = (in_w + 2*pad)/stride; 54 | int c = in_c; 55 | int area = (size-1)/stride; 56 | 57 | int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; 58 | if(id >= n) return; 59 | 60 | int index = id; 61 | int j = id % in_w; 62 | id /= in_w; 63 | int i = id % in_h; 64 | id /= in_h; 65 | int k = id % in_c; 66 | id /= in_c; 67 | int b = id; 68 | 69 | int w_offset = -pad; 70 | int h_offset = -pad; 71 | 72 | float d = 0; 73 | int l, m; 74 | for(l = -area; l < area+1; ++l){ 75 | for(m = -area; m < area+1; ++m){ 76 | int out_w = (j-w_offset)/stride + m; 77 | int out_h = (i-h_offset)/stride + l; 78 | int out_index = out_w + w*(out_h + h*(k + c*b)); 79 | int valid = (out_w >= 0 && out_w < w && 80 | out_h >= 0 && out_h < h); 81 | d += (valid && indexes[out_index] == index) ? delta[out_index] : 0; 82 | } 83 | } 84 | prev_delta[index] += d; 85 | } 86 | 87 | extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state) 88 | { 89 | int h = layer.out_h; 90 | int w = layer.out_w; 91 | int c = layer.c; 92 | 93 | size_t n = h*w*c*layer.batch; 94 | 95 | forward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, state.input, layer.output_gpu, layer.indexes_gpu); 96 | check_error(cudaPeekAtLastError()); 97 | } 98 | 99 | extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state state) 100 | { 101 | size_t n = layer.h*layer.w*layer.c*layer.batch; 102 | 103 | backward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, state.delta, layer.indexes_gpu); 104 | check_error(cudaPeekAtLastError()); 105 | } 106 | 107 | -------------------------------------------------------------------------------- /src/darkSrc/network.h: -------------------------------------------------------------------------------- 1 | // Oh boy, why am I about to do this.... 2 | #ifndef NETWORK_H 3 | #define NETWORK_H 4 | 5 | #include 6 | #include "layer.h" 7 | 8 | #ifdef __cplusplus 9 | extern "C" { 10 | #endif 11 | 12 | #include "image.h" 13 | #include "data.h" 14 | #include "tree.h" 15 | 16 | typedef enum { 17 | CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM 18 | } learning_rate_policy; 19 | 20 | typedef struct network{ 21 | float *workspace; 22 | int n; 23 | int batch; 24 | int *seen; 25 | float epoch; 26 | int subdivisions; 27 | float momentum; 28 | float decay; 29 | layer *layers; 30 | int outputs; 31 | float *output; 32 | learning_rate_policy policy; 33 | 34 | float learning_rate; 35 | float gamma; 36 | float scale; 37 | float power; 38 | int time_steps; 39 | int step; 40 | int max_batches; 41 | float *scales; 42 | int *steps; 43 | int num_steps; 44 | int burn_in; 45 | 46 | int adam; 47 | float B1; 48 | float B2; 49 | float eps; 50 | 51 | int inputs; 52 | int h, w, c; 53 | int max_crop; 54 | int min_crop; 55 | float angle; 56 | float aspect; 57 | float exposure; 58 | float saturation; 59 | float hue; 60 | 61 | int gpu_index; 62 | tree *hierarchy; 63 | 64 | #ifdef GPU 65 | float **input_gpu; 66 | float **truth_gpu; 67 | #endif 68 | } network; 69 | 70 | typedef struct network_state { 71 | float *truth; 72 | float *input; 73 | float *delta; 74 | float *workspace; 75 | int train; 76 | int index; 77 | network net; 78 | } network_state; 79 | 80 | #ifdef GPU 81 | float train_networks(network *nets, int n, data d, int interval); 82 | void sync_nets(network *nets, int n, int interval); 83 | float train_network_datum_gpu(network net, float *x, float *y); 84 | float *network_predict_gpu(network net, float *input); 85 | float * get_network_output_gpu_layer(network net, int i); 86 | float * get_network_delta_gpu_layer(network net, int i); 87 | float *get_network_output_gpu(network net); 88 | void forward_network_gpu(network net, network_state state); 89 | void backward_network_gpu(network net, network_state state); 90 | void update_network_gpu(network net); 91 | #endif 92 | 93 | float get_current_rate(network net); 94 | int get_current_batch(network net); 95 | void free_network(network net); 96 | void compare_networks(network n1, network n2, data d); 97 | char *get_layer_string(LAYER_TYPE a); 98 | 99 | network make_network(int n); 100 | void forward_network(network net, network_state state); 101 | void backward_network(network net, network_state state); 102 | void update_network(network net); 103 | 104 | float train_network(network net, data d); 105 | float train_network_batch(network net, data d, int n); 106 | float train_network_sgd(network net, data d, int n); 107 | float train_network_datum(network net, float *x, float *y); 108 | 109 | matrix network_predict_data(network net, data test); 110 | float *network_predict(network net, float *input); 111 | float network_accuracy(network net, data d); 112 | float *network_accuracies(network net, data d, int n); 113 | float network_accuracy_multi(network net, data d, int n); 114 | void top_predictions(network net, int n, int *index); 115 | float *get_network_output(network net); 116 | float *get_network_output_layer(network net, int i); 117 | float *get_network_delta_layer(network net, int i); 118 | float *get_network_delta(network net); 119 | int get_network_output_size_layer(network net, int i); 120 | int get_network_output_size(network net); 121 | image get_network_image(network net); 122 | image get_network_image_layer(network net, int i); 123 | int get_predicted_class_network(network net); 124 | void print_network(network net); 125 | void visualize_network(network net); 126 | int resize_network(network *net, int w, int h); 127 | void set_batch_network(network *net, int b); 128 | int get_network_input_size(network net); 129 | float get_network_cost(network net); 130 | 131 | int get_network_nuisance(network net); 132 | int get_network_background(network net); 133 | 134 | #ifdef __cplusplus 135 | } 136 | #endif 137 | 138 | #endif 139 | 140 | -------------------------------------------------------------------------------- /src/darkSrc/normalization_layer.c: -------------------------------------------------------------------------------- 1 | #include "normalization_layer.h" 2 | #include "blas.h" 3 | #include 4 | 5 | layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa) 6 | { 7 | fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size); 8 | layer layer = {0}; 9 | layer.type = NORMALIZATION; 10 | layer.batch = batch; 11 | layer.h = layer.out_h = h; 12 | layer.w = layer.out_w = w; 13 | layer.c = layer.out_c = c; 14 | layer.kappa = kappa; 15 | layer.size = size; 16 | layer.alpha = alpha; 17 | layer.beta = beta; 18 | layer.output = calloc(h * w * c * batch, sizeof(float)); 19 | layer.delta = calloc(h * w * c * batch, sizeof(float)); 20 | layer.squared = calloc(h * w * c * batch, sizeof(float)); 21 | layer.norms = calloc(h * w * c * batch, sizeof(float)); 22 | layer.inputs = w*h*c; 23 | layer.outputs = layer.inputs; 24 | 25 | layer.forward = forward_normalization_layer; 26 | layer.backward = backward_normalization_layer; 27 | #ifdef GPU 28 | layer.forward_gpu = forward_normalization_layer_gpu; 29 | layer.backward_gpu = backward_normalization_layer_gpu; 30 | 31 | layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch); 32 | layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch); 33 | layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch); 34 | layer.norms_gpu = cuda_make_array(layer.norms, h * w * c * batch); 35 | #endif 36 | return layer; 37 | } 38 | 39 | void resize_normalization_layer(layer *layer, int w, int h) 40 | { 41 | int c = layer->c; 42 | int batch = layer->batch; 43 | layer->h = h; 44 | layer->w = w; 45 | layer->out_h = h; 46 | layer->out_w = w; 47 | layer->inputs = w*h*c; 48 | layer->outputs = layer->inputs; 49 | layer->output = realloc(layer->output, h * w * c * batch * sizeof(float)); 50 | layer->delta = realloc(layer->delta, h * w * c * batch * sizeof(float)); 51 | layer->squared = realloc(layer->squared, h * w * c * batch * sizeof(float)); 52 | layer->norms = realloc(layer->norms, h * w * c * batch * sizeof(float)); 53 | #ifdef GPU 54 | cuda_free(layer->output_gpu); 55 | cuda_free(layer->delta_gpu); 56 | cuda_free(layer->squared_gpu); 57 | cuda_free(layer->norms_gpu); 58 | layer->output_gpu = cuda_make_array(layer->output, h * w * c * batch); 59 | layer->delta_gpu = cuda_make_array(layer->delta, h * w * c * batch); 60 | layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch); 61 | layer->norms_gpu = cuda_make_array(layer->norms, h * w * c * batch); 62 | #endif 63 | } 64 | 65 | void forward_normalization_layer(const layer layer, network_state state) 66 | { 67 | int k,b; 68 | int w = layer.w; 69 | int h = layer.h; 70 | int c = layer.c; 71 | scal_cpu(w*h*c*layer.batch, 0, layer.squared, 1); 72 | 73 | for(b = 0; b < layer.batch; ++b){ 74 | float *squared = layer.squared + w*h*c*b; 75 | float *norms = layer.norms + w*h*c*b; 76 | float *input = state.input + w*h*c*b; 77 | pow_cpu(w*h*c, 2, input, 1, squared, 1); 78 | 79 | const_cpu(w*h, layer.kappa, norms, 1); 80 | for(k = 0; k < layer.size/2; ++k){ 81 | axpy_cpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1); 82 | } 83 | 84 | for(k = 1; k < layer.c; ++k){ 85 | copy_cpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1); 86 | int prev = k - ((layer.size-1)/2) - 1; 87 | int next = k + (layer.size/2); 88 | if(prev >= 0) axpy_cpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1); 89 | if(next < layer.c) axpy_cpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1); 90 | } 91 | } 92 | pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, layer.output, 1); 93 | mul_cpu(w*h*c*layer.batch, state.input, 1, layer.output, 1); 94 | } 95 | 96 | void backward_normalization_layer(const layer layer, network_state state) 97 | { 98 | // TODO This is approximate ;-) 99 | // Also this should add in to delta instead of overwritting. 100 | 101 | int w = layer.w; 102 | int h = layer.h; 103 | int c = layer.c; 104 | pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, state.delta, 1); 105 | mul_cpu(w*h*c*layer.batch, layer.delta, 1, state.delta, 1); 106 | } 107 | 108 | #ifdef GPU 109 | void forward_normalization_layer_gpu(const layer layer, network_state state) 110 | { 111 | int k,b; 112 | int w = layer.w; 113 | int h = layer.h; 114 | int c = layer.c; 115 | scal_ongpu(w*h*c*layer.batch, 0, layer.squared_gpu, 1); 116 | 117 | for(b = 0; b < layer.batch; ++b){ 118 | float *squared = layer.squared_gpu + w*h*c*b; 119 | float *norms = layer.norms_gpu + w*h*c*b; 120 | float *input = state.input + w*h*c*b; 121 | pow_ongpu(w*h*c, 2, input, 1, squared, 1); 122 | 123 | const_ongpu(w*h, layer.kappa, norms, 1); 124 | for(k = 0; k < layer.size/2; ++k){ 125 | axpy_ongpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1); 126 | } 127 | 128 | for(k = 1; k < layer.c; ++k){ 129 | copy_ongpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1); 130 | int prev = k - ((layer.size-1)/2) - 1; 131 | int next = k + (layer.size/2); 132 | if(prev >= 0) axpy_ongpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1); 133 | if(next < layer.c) axpy_ongpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1); 134 | } 135 | } 136 | pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, layer.output_gpu, 1); 137 | mul_ongpu(w*h*c*layer.batch, state.input, 1, layer.output_gpu, 1); 138 | } 139 | 140 | void backward_normalization_layer_gpu(const layer layer, network_state state) 141 | { 142 | // TODO This is approximate ;-) 143 | 144 | int w = layer.w; 145 | int h = layer.h; 146 | int c = layer.c; 147 | pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, state.delta, 1); 148 | mul_ongpu(w*h*c*layer.batch, layer.delta_gpu, 1, state.delta, 1); 149 | } 150 | #endif 151 | -------------------------------------------------------------------------------- /src/darkSrc/normalization_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef NORMALIZATION_LAYER_H 2 | #define NORMALIZATION_LAYER_H 3 | 4 | #include "image.h" 5 | #include "layer.h" 6 | #include "network.h" 7 | 8 | layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa); 9 | void resize_normalization_layer(layer *layer, int h, int w); 10 | void forward_normalization_layer(const layer layer, network_state state); 11 | void backward_normalization_layer(const layer layer, network_state state); 12 | void visualize_normalization_layer(layer layer, char *window); 13 | 14 | #ifdef GPU 15 | void forward_normalization_layer_gpu(const layer layer, network_state state); 16 | void backward_normalization_layer_gpu(const layer layer, network_state state); 17 | #endif 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /src/darkSrc/option_list.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "option_list.h" 5 | #include "utils.h" 6 | 7 | list *read_data_cfg(char *filename) 8 | { 9 | FILE *file = fopen(filename, "r"); 10 | if(file == 0) file_error(filename); 11 | char *line; 12 | int nu = 0; 13 | list *options = make_list(); 14 | while((line=fgetl(file)) != 0){ 15 | ++ nu; 16 | strip(line); 17 | switch(line[0]){ 18 | case '\0': 19 | case '#': 20 | case ';': 21 | free(line); 22 | break; 23 | default: 24 | if(!read_option(line, options)){ 25 | fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line); 26 | free(line); 27 | } 28 | break; 29 | } 30 | } 31 | fclose(file); 32 | return options; 33 | } 34 | 35 | int read_option(char *s, list *options) 36 | { 37 | size_t i; 38 | size_t len = strlen(s); 39 | char *val = 0; 40 | for(i = 0; i < len; ++i){ 41 | if(s[i] == '='){ 42 | s[i] = '\0'; 43 | val = s+i+1; 44 | break; 45 | } 46 | } 47 | if(i == len-1) return 0; 48 | char *key = s; 49 | option_insert(options, key, val); 50 | return 1; 51 | } 52 | 53 | void option_insert(list *l, char *key, char *val) 54 | { 55 | kvp *p = malloc(sizeof(kvp)); 56 | p->key = key; 57 | p->val = val; 58 | p->used = 0; 59 | list_insert(l, p); 60 | } 61 | 62 | void option_unused(list *l) 63 | { 64 | node *n = l->front; 65 | while(n){ 66 | kvp *p = (kvp *)n->val; 67 | if(!p->used){ 68 | fprintf(stderr, "Unused field: '%s = %s'\n", p->key, p->val); 69 | } 70 | n = n->next; 71 | } 72 | } 73 | 74 | char *option_find(list *l, char *key) 75 | { 76 | node *n = l->front; 77 | while(n){ 78 | kvp *p = (kvp *)n->val; 79 | if(strcmp(p->key, key) == 0){ 80 | p->used = 1; 81 | return p->val; 82 | } 83 | n = n->next; 84 | } 85 | return 0; 86 | } 87 | char *option_find_str(list *l, char *key, char *def) 88 | { 89 | char *v = option_find(l, key); 90 | if(v) return v; 91 | if(def) fprintf(stderr, "%s: Using default '%s'\n", key, def); 92 | return def; 93 | } 94 | 95 | int option_find_int(list *l, char *key, int def) 96 | { 97 | char *v = option_find(l, key); 98 | if(v) return atoi(v); 99 | fprintf(stderr, "%s: Using default '%d'\n", key, def); 100 | return def; 101 | } 102 | 103 | int option_find_int_quiet(list *l, char *key, int def) 104 | { 105 | char *v = option_find(l, key); 106 | if(v) return atoi(v); 107 | return def; 108 | } 109 | 110 | float option_find_float_quiet(list *l, char *key, float def) 111 | { 112 | char *v = option_find(l, key); 113 | if(v) return atof(v); 114 | return def; 115 | } 116 | 117 | float option_find_float(list *l, char *key, float def) 118 | { 119 | char *v = option_find(l, key); 120 | if(v) return atof(v); 121 | fprintf(stderr, "%s: Using default '%lf'\n", key, def); 122 | return def; 123 | } 124 | -------------------------------------------------------------------------------- /src/darkSrc/option_list.h: -------------------------------------------------------------------------------- 1 | #ifndef OPTION_LIST_H 2 | #define OPTION_LIST_H 3 | #include "list.h" 4 | 5 | typedef struct{ 6 | char *key; 7 | char *val; 8 | int used; 9 | } kvp; 10 | 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif 15 | 16 | list *read_data_cfg(char *filename); 17 | int read_option(char *s, list *options); 18 | void option_insert(list *l, char *key, char *val); 19 | char *option_find(list *l, char *key); 20 | char *option_find_str(list *l, char *key, char *def); 21 | int option_find_int(list *l, char *key, int def); 22 | int option_find_int_quiet(list *l, char *key, int def); 23 | float option_find_float(list *l, char *key, float def); 24 | float option_find_float_quiet(list *l, char *key, float def); 25 | void option_unused(list *l); 26 | 27 | #ifdef __cplusplus 28 | } 29 | #endif 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /src/darkSrc/parser.h: -------------------------------------------------------------------------------- 1 | #ifndef PARSER_H 2 | #define PARSER_H 3 | #include "network.h" 4 | 5 | #ifdef __cplusplus 6 | extern "C" { 7 | #endif 8 | 9 | network parse_network_cfg(char *filename); 10 | network parse_network_cfg_custom(char *filename, int batch); 11 | void save_network(network net, char *filename); 12 | void save_weights(network net, char *filename); 13 | void save_weights_upto(network net, char *filename, int cutoff); 14 | void save_weights_double(network net, char *filename); 15 | void load_weights(network *net, char *filename); 16 | void load_weights_upto(network *net, char *filename, int cutoff); 17 | 18 | #ifdef __cplusplus 19 | } 20 | #endif 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/darkSrc/region_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef REGION_LAYER_H 2 | #define REGION_LAYER_H 3 | 4 | #include "layer.h" 5 | #include "network.h" 6 | 7 | typedef layer region_layer; 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | region_layer make_region_layer(int batch, int h, int w, int n, int classes, int coords); 14 | void forward_region_layer(const region_layer l, network_state state); 15 | void backward_region_layer(const region_layer l, network_state state); 16 | void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map); 17 | void resize_region_layer(layer *l, int w, int h); 18 | 19 | #ifdef GPU 20 | void forward_region_layer_gpu(const region_layer l, network_state state); 21 | void backward_region_layer_gpu(region_layer l, network_state state); 22 | #endif 23 | 24 | #ifdef __cplusplus 25 | } 26 | #endif 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /src/darkSrc/reorg_layer.c: -------------------------------------------------------------------------------- 1 | #include "reorg_layer.h" 2 | #include "cuda.h" 3 | #include "blas.h" 4 | #include 5 | 6 | 7 | layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse) 8 | { 9 | layer l = {0}; 10 | l.type = REORG; 11 | l.batch = batch; 12 | l.stride = stride; 13 | l.h = h; 14 | l.w = w; 15 | l.c = c; 16 | if(reverse){ 17 | l.out_w = w*stride; 18 | l.out_h = h*stride; 19 | l.out_c = c/(stride*stride); 20 | }else{ 21 | l.out_w = w/stride; 22 | l.out_h = h/stride; 23 | l.out_c = c*(stride*stride); 24 | } 25 | l.reverse = reverse; 26 | fprintf(stderr, "reorg /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); 27 | l.outputs = l.out_h * l.out_w * l.out_c; 28 | l.inputs = h*w*c; 29 | int output_size = l.out_h * l.out_w * l.out_c * batch; 30 | l.output = calloc(output_size, sizeof(float)); 31 | l.delta = calloc(output_size, sizeof(float)); 32 | 33 | l.forward = forward_reorg_layer; 34 | l.backward = backward_reorg_layer; 35 | #ifdef GPU 36 | l.forward_gpu = forward_reorg_layer_gpu; 37 | l.backward_gpu = backward_reorg_layer_gpu; 38 | 39 | l.output_gpu = cuda_make_array(l.output, output_size); 40 | l.delta_gpu = cuda_make_array(l.delta, output_size); 41 | #endif 42 | return l; 43 | } 44 | 45 | void resize_reorg_layer(layer *l, int w, int h) 46 | { 47 | int stride = l->stride; 48 | int c = l->c; 49 | 50 | l->h = h; 51 | l->w = w; 52 | 53 | if(l->reverse){ 54 | l->out_w = w*stride; 55 | l->out_h = h*stride; 56 | l->out_c = c/(stride*stride); 57 | }else{ 58 | l->out_w = w/stride; 59 | l->out_h = h/stride; 60 | l->out_c = c*(stride*stride); 61 | } 62 | 63 | l->outputs = l->out_h * l->out_w * l->out_c; 64 | l->inputs = l->outputs; 65 | int output_size = l->outputs * l->batch; 66 | 67 | l->output = realloc(l->output, output_size * sizeof(float)); 68 | l->delta = realloc(l->delta, output_size * sizeof(float)); 69 | 70 | #ifdef GPU 71 | cuda_free(l->output_gpu); 72 | cuda_free(l->delta_gpu); 73 | l->output_gpu = cuda_make_array(l->output, output_size); 74 | l->delta_gpu = cuda_make_array(l->delta, output_size); 75 | #endif 76 | } 77 | 78 | void forward_reorg_layer(const layer l, network_state state) 79 | { 80 | if(l.reverse){ 81 | reorg_cpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output); 82 | }else { 83 | reorg_cpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output); 84 | } 85 | } 86 | 87 | void backward_reorg_layer(const layer l, network_state state) 88 | { 89 | if(l.reverse){ 90 | reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 0, state.delta); 91 | }else{ 92 | reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 1, state.delta); 93 | } 94 | } 95 | 96 | #ifdef GPU 97 | void forward_reorg_layer_gpu(layer l, network_state state) 98 | { 99 | if(l.reverse){ 100 | reorg_ongpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output_gpu); 101 | }else { 102 | reorg_ongpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output_gpu); 103 | } 104 | } 105 | 106 | void backward_reorg_layer_gpu(layer l, network_state state) 107 | { 108 | if(l.reverse){ 109 | reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, state.delta); 110 | }else{ 111 | reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, state.delta); 112 | } 113 | } 114 | #endif 115 | -------------------------------------------------------------------------------- /src/darkSrc/reorg_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef REORG_LAYER_H 2 | #define REORG_LAYER_H 3 | 4 | #include "image.h" 5 | #include "cuda.h" 6 | #include "layer.h" 7 | #include "network.h" 8 | 9 | layer make_reorg_layer(int batch, int h, int w, int c, int stride, int reverse); 10 | void resize_reorg_layer(layer *l, int w, int h); 11 | void forward_reorg_layer(const layer l, network_state state); 12 | void backward_reorg_layer(const layer l, network_state state); 13 | 14 | #ifdef GPU 15 | void forward_reorg_layer_gpu(layer l, network_state state); 16 | void backward_reorg_layer_gpu(layer l, network_state state); 17 | #endif 18 | 19 | #endif 20 | 21 | -------------------------------------------------------------------------------- /src/darkSrc/rnn_layer.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef RNN_LAYER_H 3 | #define RNN_LAYER_H 4 | 5 | #include "activations.h" 6 | #include "layer.h" 7 | #include "network.h" 8 | #define USET 9 | 10 | layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize, int log); 11 | 12 | void forward_rnn_layer(layer l, network_state state); 13 | void backward_rnn_layer(layer l, network_state state); 14 | void update_rnn_layer(layer l, int batch, float learning_rate, float momentum, float decay); 15 | 16 | #ifdef GPU 17 | void forward_rnn_layer_gpu(layer l, network_state state); 18 | void backward_rnn_layer_gpu(layer l, network_state state); 19 | void update_rnn_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay); 20 | void push_rnn_layer(layer l); 21 | void pull_rnn_layer(layer l); 22 | #endif 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /src/darkSrc/route_layer.c: -------------------------------------------------------------------------------- 1 | #include "route_layer.h" 2 | #include "cuda.h" 3 | #include "blas.h" 4 | #include 5 | 6 | route_layer make_route_layer(int batch, int n, int *input_layers, int *input_sizes) 7 | { 8 | fprintf(stderr,"route "); 9 | route_layer l = {0}; 10 | l.type = ROUTE; 11 | l.batch = batch; 12 | l.n = n; 13 | l.input_layers = input_layers; 14 | l.input_sizes = input_sizes; 15 | int i; 16 | int outputs = 0; 17 | for(i = 0; i < n; ++i){ 18 | fprintf(stderr," %d", input_layers[i]); 19 | outputs += input_sizes[i]; 20 | } 21 | fprintf(stderr, "\n"); 22 | l.outputs = outputs; 23 | l.inputs = outputs; 24 | l.delta = calloc(outputs*batch, sizeof(float)); 25 | l.output = calloc(outputs*batch, sizeof(float));; 26 | 27 | l.forward = forward_route_layer; 28 | l.backward = backward_route_layer; 29 | #ifdef GPU 30 | l.forward_gpu = forward_route_layer_gpu; 31 | l.backward_gpu = backward_route_layer_gpu; 32 | 33 | l.delta_gpu = cuda_make_array(l.delta, outputs*batch); 34 | l.output_gpu = cuda_make_array(l.output, outputs*batch); 35 | #endif 36 | return l; 37 | } 38 | 39 | void resize_route_layer(route_layer *l, network *net) 40 | { 41 | int i; 42 | layer first = net->layers[l->input_layers[0]]; 43 | l->out_w = first.out_w; 44 | l->out_h = first.out_h; 45 | l->out_c = first.out_c; 46 | l->outputs = first.outputs; 47 | l->input_sizes[0] = first.outputs; 48 | for(i = 1; i < l->n; ++i){ 49 | int index = l->input_layers[i]; 50 | layer next = net->layers[index]; 51 | l->outputs += next.outputs; 52 | l->input_sizes[i] = next.outputs; 53 | if(next.out_w == first.out_w && next.out_h == first.out_h){ 54 | l->out_c += next.out_c; 55 | }else{ 56 | printf("%d %d, %d %d\n", next.out_w, next.out_h, first.out_w, first.out_h); 57 | l->out_h = l->out_w = l->out_c = 0; 58 | } 59 | } 60 | l->inputs = l->outputs; 61 | l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); 62 | l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); 63 | 64 | #ifdef GPU 65 | cuda_free(l->output_gpu); 66 | cuda_free(l->delta_gpu); 67 | l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); 68 | l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); 69 | #endif 70 | 71 | } 72 | 73 | void forward_route_layer(const route_layer l, network_state state) 74 | { 75 | int i, j; 76 | int offset = 0; 77 | for(i = 0; i < l.n; ++i){ 78 | int index = l.input_layers[i]; 79 | float *input = state.net.layers[index].output; 80 | int input_size = l.input_sizes[i]; 81 | for(j = 0; j < l.batch; ++j){ 82 | copy_cpu(input_size, input + j*input_size, 1, l.output + offset + j*l.outputs, 1); 83 | } 84 | offset += input_size; 85 | } 86 | } 87 | 88 | void backward_route_layer(const route_layer l, network_state state) 89 | { 90 | int i, j; 91 | int offset = 0; 92 | for(i = 0; i < l.n; ++i){ 93 | int index = l.input_layers[i]; 94 | float *delta = state.net.layers[index].delta; 95 | int input_size = l.input_sizes[i]; 96 | for(j = 0; j < l.batch; ++j){ 97 | axpy_cpu(input_size, 1, l.delta + offset + j*l.outputs, 1, delta + j*input_size, 1); 98 | } 99 | offset += input_size; 100 | } 101 | } 102 | 103 | #ifdef GPU 104 | void forward_route_layer_gpu(const route_layer l, network_state state) 105 | { 106 | int i, j; 107 | int offset = 0; 108 | for(i = 0; i < l.n; ++i){ 109 | int index = l.input_layers[i]; 110 | float *input = state.net.layers[index].output_gpu; 111 | int input_size = l.input_sizes[i]; 112 | for(j = 0; j < l.batch; ++j){ 113 | copy_ongpu(input_size, input + j*input_size, 1, l.output_gpu + offset + j*l.outputs, 1); 114 | } 115 | offset += input_size; 116 | } 117 | } 118 | 119 | void backward_route_layer_gpu(const route_layer l, network_state state) 120 | { 121 | int i, j; 122 | int offset = 0; 123 | for(i = 0; i < l.n; ++i){ 124 | int index = l.input_layers[i]; 125 | float *delta = state.net.layers[index].delta_gpu; 126 | int input_size = l.input_sizes[i]; 127 | for(j = 0; j < l.batch; ++j){ 128 | axpy_ongpu(input_size, 1, l.delta_gpu + offset + j*l.outputs, 1, delta + j*input_size, 1); 129 | } 130 | offset += input_size; 131 | } 132 | } 133 | #endif 134 | -------------------------------------------------------------------------------- /src/darkSrc/route_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef ROUTE_LAYER_H 2 | #define ROUTE_LAYER_H 3 | #include "network.h" 4 | #include "layer.h" 5 | 6 | typedef layer route_layer; 7 | 8 | route_layer make_route_layer(int batch, int n, int *input_layers, int *input_size); 9 | void forward_route_layer(const route_layer l, network_state state); 10 | void backward_route_layer(const route_layer l, network_state state); 11 | void resize_route_layer(route_layer *l, network *net); 12 | 13 | #ifdef GPU 14 | void forward_route_layer_gpu(const route_layer l, network_state state); 15 | void backward_route_layer_gpu(const route_layer l, network_state state); 16 | #endif 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /src/darkSrc/shortcut_layer.c: -------------------------------------------------------------------------------- 1 | #include "shortcut_layer.h" 2 | #include "cuda.h" 3 | #include "blas.h" 4 | #include 5 | #include 6 | 7 | layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2) 8 | { 9 | fprintf(stderr,"Shortcut Layer: %d\n", index); 10 | layer l = {0}; 11 | l.type = SHORTCUT; 12 | l.batch = batch; 13 | l.w = w2; 14 | l.h = h2; 15 | l.c = c2; 16 | l.out_w = w; 17 | l.out_h = h; 18 | l.out_c = c; 19 | l.outputs = w*h*c; 20 | l.inputs = l.outputs; 21 | 22 | l.index = index; 23 | 24 | l.delta = calloc(l.outputs*batch, sizeof(float)); 25 | l.output = calloc(l.outputs*batch, sizeof(float));; 26 | 27 | l.forward = forward_shortcut_layer; 28 | l.backward = backward_shortcut_layer; 29 | #ifdef GPU 30 | l.forward_gpu = forward_shortcut_layer_gpu; 31 | l.backward_gpu = backward_shortcut_layer_gpu; 32 | 33 | l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); 34 | l.output_gpu = cuda_make_array(l.output, l.outputs*batch); 35 | #endif 36 | return l; 37 | } 38 | 39 | void forward_shortcut_layer(const layer l, network_state state) 40 | { 41 | copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1); 42 | shortcut_cpu(l.batch, l.w, l.h, l.c, state.net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.output); 43 | activate_array(l.output, l.outputs*l.batch, l.activation); 44 | } 45 | 46 | void backward_shortcut_layer(const layer l, network_state state) 47 | { 48 | gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); 49 | axpy_cpu(l.outputs*l.batch, 1, l.delta, 1, state.delta, 1); 50 | shortcut_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, state.net.layers[l.index].delta); 51 | } 52 | 53 | #ifdef GPU 54 | void forward_shortcut_layer_gpu(const layer l, network_state state) 55 | { 56 | copy_ongpu(l.outputs*l.batch, state.input, 1, l.output_gpu, 1); 57 | shortcut_gpu(l.batch, l.w, l.h, l.c, state.net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.output_gpu); 58 | activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); 59 | } 60 | 61 | void backward_shortcut_layer_gpu(const layer l, network_state state) 62 | { 63 | gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); 64 | axpy_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1, state.delta, 1); 65 | shortcut_gpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta_gpu, l.w, l.h, l.c, state.net.layers[l.index].delta_gpu); 66 | } 67 | #endif 68 | -------------------------------------------------------------------------------- /src/darkSrc/shortcut_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef SHORTCUT_LAYER_H 2 | #define SHORTCUT_LAYER_H 3 | 4 | #include "layer.h" 5 | #include "network.h" 6 | 7 | layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2); 8 | void forward_shortcut_layer(const layer l, network_state state); 9 | void backward_shortcut_layer(const layer l, network_state state); 10 | 11 | #ifdef GPU 12 | void forward_shortcut_layer_gpu(const layer l, network_state state); 13 | void backward_shortcut_layer_gpu(const layer l, network_state state); 14 | #endif 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /src/darkSrc/softmax_layer.c: -------------------------------------------------------------------------------- 1 | #include "softmax_layer.h" 2 | #include "blas.h" 3 | #include "cuda.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | softmax_layer make_softmax_layer(int batch, int inputs, int groups) 11 | { 12 | assert(inputs%groups == 0); 13 | fprintf(stderr, "softmax %4d\n", inputs); 14 | softmax_layer l = {0}; 15 | l.type = SOFTMAX; 16 | l.batch = batch; 17 | l.groups = groups; 18 | l.inputs = inputs; 19 | l.outputs = inputs; 20 | l.output = calloc(inputs*batch, sizeof(float)); 21 | l.delta = calloc(inputs*batch, sizeof(float)); 22 | 23 | l.forward = forward_softmax_layer; 24 | l.backward = backward_softmax_layer; 25 | #ifdef GPU 26 | l.forward_gpu = forward_softmax_layer_gpu; 27 | l.backward_gpu = backward_softmax_layer_gpu; 28 | 29 | l.output_gpu = cuda_make_array(l.output, inputs*batch); 30 | l.delta_gpu = cuda_make_array(l.delta, inputs*batch); 31 | #endif 32 | return l; 33 | } 34 | 35 | void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarchy, float *output) 36 | { 37 | int b; 38 | for(b = 0; b < batch; ++b){ 39 | int i; 40 | int count = 0; 41 | for(i = 0; i < hierarchy->groups; ++i){ 42 | int group_size = hierarchy->group_size[i]; 43 | softmax(input+b*inputs + count, group_size, temp, output+b*inputs + count); 44 | count += group_size; 45 | } 46 | } 47 | } 48 | 49 | void forward_softmax_layer(const softmax_layer l, network_state state) 50 | { 51 | int b; 52 | int inputs = l.inputs / l.groups; 53 | int batch = l.batch * l.groups; 54 | if(l.softmax_tree){ 55 | softmax_tree(state.input, batch, inputs, l.temperature, l.softmax_tree, l.output); 56 | } else { 57 | for(b = 0; b < batch; ++b){ 58 | softmax(state.input+b*inputs, inputs, l.temperature, l.output+b*inputs); 59 | } 60 | } 61 | } 62 | 63 | void backward_softmax_layer(const softmax_layer l, network_state state) 64 | { 65 | int i; 66 | for(i = 0; i < l.inputs*l.batch; ++i){ 67 | state.delta[i] += l.delta[i]; 68 | } 69 | } 70 | 71 | #ifdef GPU 72 | 73 | void pull_softmax_layer_output(const softmax_layer layer) 74 | { 75 | cuda_pull_array(layer.output_gpu, layer.output, layer.inputs*layer.batch); 76 | } 77 | 78 | void forward_softmax_layer_gpu(const softmax_layer l, network_state state) 79 | { 80 | int inputs = l.inputs / l.groups; 81 | int batch = l.batch * l.groups; 82 | if(l.softmax_tree){ 83 | int i; 84 | int count = 0; 85 | for (i = 0; i < l.softmax_tree->groups; ++i) { 86 | int group_size = l.softmax_tree->group_size[i]; 87 | softmax_gpu(state.input+count, group_size, inputs, batch, l.temperature, l.output_gpu + count); 88 | count += group_size; 89 | } 90 | } else { 91 | softmax_gpu(state.input, inputs, inputs, batch, l.temperature, l.output_gpu); 92 | } 93 | } 94 | 95 | void backward_softmax_layer_gpu(const softmax_layer layer, network_state state) 96 | { 97 | axpy_ongpu(layer.batch*layer.inputs, 1, layer.delta_gpu, 1, state.delta, 1); 98 | } 99 | 100 | #endif 101 | -------------------------------------------------------------------------------- /src/darkSrc/softmax_layer.h: -------------------------------------------------------------------------------- 1 | #ifndef SOFTMAX_LAYER_H 2 | #define SOFTMAX_LAYER_H 3 | #include "layer.h" 4 | #include "network.h" 5 | 6 | typedef layer softmax_layer; 7 | 8 | void softmax_array(float *input, int n, float temp, float *output); 9 | softmax_layer make_softmax_layer(int batch, int inputs, int groups); 10 | void forward_softmax_layer(const softmax_layer l, network_state state); 11 | void backward_softmax_layer(const softmax_layer l, network_state state); 12 | 13 | #ifdef GPU 14 | void pull_softmax_layer_output(const softmax_layer l); 15 | void forward_softmax_layer_gpu(const softmax_layer l, network_state state); 16 | void backward_softmax_layer_gpu(const softmax_layer l, network_state state); 17 | #endif 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /src/darkSrc/tree.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "tree.h" 4 | #include "utils.h" 5 | #include "data.h" 6 | 7 | void change_leaves(tree *t, char *leaf_list) 8 | { 9 | list *llist = get_paths(leaf_list); 10 | char **leaves = (char **)list_to_array(llist); 11 | int n = llist->size; 12 | int i,j; 13 | int found = 0; 14 | for(i = 0; i < t->n; ++i){ 15 | t->leaf[i] = 0; 16 | for(j = 0; j < n; ++j){ 17 | if (0==strcmp(t->name[i], leaves[j])){ 18 | t->leaf[i] = 1; 19 | ++found; 20 | break; 21 | } 22 | } 23 | } 24 | fprintf(stderr, "Found %d leaves.\n", found); 25 | } 26 | 27 | float get_hierarchy_probability(float *x, tree *hier, int c) 28 | { 29 | float p = 1; 30 | while(c >= 0){ 31 | p = p * x[c]; 32 | c = hier->parent[c]; 33 | } 34 | return p; 35 | } 36 | 37 | void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves) 38 | { 39 | int j; 40 | for(j = 0; j < n; ++j){ 41 | int parent = hier->parent[j]; 42 | if(parent >= 0){ 43 | predictions[j] *= predictions[parent]; 44 | } 45 | } 46 | if(only_leaves){ 47 | for(j = 0; j < n; ++j){ 48 | if(!hier->leaf[j]) predictions[j] = 0; 49 | } 50 | } 51 | } 52 | 53 | tree *read_tree(char *filename) 54 | { 55 | tree t = {0}; 56 | FILE *fp = fopen(filename, "r"); 57 | 58 | char *line; 59 | int last_parent = -1; 60 | int group_size = 0; 61 | int groups = 0; 62 | int n = 0; 63 | while((line=fgetl(fp)) != 0){ 64 | char *id = calloc(256, sizeof(char)); 65 | int parent = -1; 66 | sscanf(line, "%s %d", id, &parent); 67 | t.parent = realloc(t.parent, (n+1)*sizeof(int)); 68 | t.parent[n] = parent; 69 | 70 | t.name = realloc(t.name, (n+1)*sizeof(char *)); 71 | t.name[n] = id; 72 | if(parent != last_parent){ 73 | ++groups; 74 | t.group_offset = realloc(t.group_offset, groups * sizeof(int)); 75 | t.group_offset[groups - 1] = n - group_size; 76 | t.group_size = realloc(t.group_size, groups * sizeof(int)); 77 | t.group_size[groups - 1] = group_size; 78 | group_size = 0; 79 | last_parent = parent; 80 | } 81 | t.group = realloc(t.group, (n+1)*sizeof(int)); 82 | t.group[n] = groups; 83 | ++n; 84 | ++group_size; 85 | } 86 | ++groups; 87 | t.group_offset = realloc(t.group_offset, groups * sizeof(int)); 88 | t.group_offset[groups - 1] = n - group_size; 89 | t.group_size = realloc(t.group_size, groups * sizeof(int)); 90 | t.group_size[groups - 1] = group_size; 91 | t.n = n; 92 | t.groups = groups; 93 | t.leaf = calloc(n, sizeof(int)); 94 | int i; 95 | for(i = 0; i < n; ++i) t.leaf[i] = 1; 96 | for(i = 0; i < n; ++i) if(t.parent[i] >= 0) t.leaf[t.parent[i]] = 0; 97 | 98 | fclose(fp); 99 | tree *tree_ptr = calloc(1, sizeof(tree)); 100 | *tree_ptr = t; 101 | //error(0); 102 | return tree_ptr; 103 | } 104 | -------------------------------------------------------------------------------- /src/darkSrc/tree.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_H 2 | #define TREE_H 3 | 4 | typedef struct{ 5 | int *leaf; 6 | int n; 7 | int *parent; 8 | int *group; 9 | char **name; 10 | 11 | int groups; 12 | int *group_size; 13 | int *group_offset; 14 | } tree; 15 | 16 | tree *read_tree(char *filename); 17 | void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves); 18 | void change_leaves(tree *t, char *leaf_list); 19 | float get_hierarchy_probability(float *x, tree *hier, int c); 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /src/darkSrc/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H 2 | #define UTILS_H 3 | #include 4 | #include 5 | #include "list.h" 6 | 7 | #if defined(_MSC_VER) && _MSC_VER < 1900 8 | #define snprintf(buf,len, format,...) _snprintf_s(buf, len,len, format, __VA_ARGS__) 9 | #endif 10 | 11 | #define SECRET_NUM -1234 12 | #define TWO_PI 6.2831853071795864769252866 13 | 14 | #ifdef __cplusplus 15 | extern "C" { 16 | #endif 17 | 18 | int *read_map(char *filename); 19 | void shuffle(void *arr, size_t n, size_t size); 20 | void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections); 21 | void free_ptrs(void **ptrs, int n); 22 | char *basecfg(char *cfgfile); 23 | int alphanum_to_int(char c); 24 | char int_to_alphanum(int i); 25 | int read_int(int fd); 26 | void write_int(int fd, int n); 27 | void read_all(int fd, char *buffer, size_t bytes); 28 | void write_all(int fd, char *buffer, size_t bytes); 29 | int read_all_fail(int fd, char *buffer, size_t bytes); 30 | int write_all_fail(int fd, char *buffer, size_t bytes); 31 | void find_replace(char *str, char *orig, char *rep, char *output); 32 | void error(const char *s); 33 | void malloc_error(); 34 | void file_error(char *s); 35 | void strip(char *s); 36 | void strip_char(char *s, char bad); 37 | void top_k(float *a, int n, int k, int *index); 38 | list *split_str(char *s, char delim); 39 | char *fgetl(FILE *fp); 40 | list *parse_csv_line(char *line); 41 | char *copy_string(char *s); 42 | int count_fields(char *line); 43 | float *parse_fields(char *line, int n); 44 | void normalize_array(float *a, int n); 45 | void scale_array(float *a, int n, float s); 46 | void translate_array(float *a, int n, float s); 47 | int max_index(float *a, int n); 48 | float constrain(float min, float max, float a); 49 | int constrain_int(int a, int min, int max); 50 | float mse_array(float *a, int n); 51 | float rand_normal(); 52 | size_t rand_size_t(); 53 | float rand_uniform(float min, float max); 54 | float rand_scale(float s); 55 | int rand_int(int min, int max); 56 | float sum_array(float *a, int n); 57 | float mean_array(float *a, int n); 58 | void mean_arrays(float **a, int n, int els, float *avg); 59 | float variance_array(float *a, int n); 60 | float mag_array(float *a, int n); 61 | float dist_array(float *a, float *b, int n, int sub); 62 | float **one_hot_encode(float *a, int n, int k); 63 | float sec(clock_t clocks); 64 | int find_int_arg(int argc, char **argv, char *arg, int def); 65 | float find_float_arg(int argc, char **argv, char *arg, float def); 66 | int find_arg(int argc, char* argv[], char *arg); 67 | char *find_char_arg(int argc, char **argv, char *arg, char *def); 68 | int sample_array(float *a, int n); 69 | void print_statistics(float *a, int n); 70 | unsigned int random_gen(); 71 | float random_float(); 72 | float rand_uniform_strong(float min, float max); 73 | 74 | #ifdef __cplusplus 75 | } 76 | #endif 77 | 78 | #endif 79 | 80 | -------------------------------------------------------------------------------- /src/errmsg.cpp: -------------------------------------------------------------------------------- 1 | #include "errmsg.h" 2 | #include 3 | 4 | errMsg* errMsg::instance = NULL; 5 | errMsg *errMsg::getInstance() 6 | { 7 | if(instance == NULL) instance = new errMsg(); 8 | return instance; 9 | } 10 | 11 | errMsg::errMsg() 12 | { 13 | } 14 | 15 | void errMsg::out( 16 | std::string file, 17 | std::string func, 18 | std::string msg, bool pause) 19 | { 20 | std::cout << "IN file<" << file << "> " 21 | << func << " : " << msg << std::endl; 22 | if(pause) exit(0); 23 | } 24 | -------------------------------------------------------------------------------- /src/errmsg.h: -------------------------------------------------------------------------------- 1 | #ifndef ERRMSG_H 2 | #define ERRMSG_H 3 | #include 4 | #include 5 | 6 | class errMsg 7 | { 8 | public: 9 | static errMsg* getInstance(); 10 | void out(std::string file, 11 | std::string func, 12 | std::string msd, 13 | bool pause = true); 14 | private: 15 | errMsg(); 16 | errMsg(const errMsg&); 17 | errMsg& operator=(const errMsg&); 18 | 19 | static errMsg* instance; 20 | }; 21 | 22 | #endif // ERRMSG_H 23 | -------------------------------------------------------------------------------- /src/feature/FeatureTensor.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * FeatureTensor.cpp 3 | * 4 | * Created on: Dec 15, 2017 5 | * Author: zy 6 | */ 7 | 8 | #include "FeatureTensor.h" 9 | using namespace tensorflow; 10 | 11 | #define TENSORFLOW_MODEL_META "./RUNNINGDATA/tensor_networks/111.meta" 12 | #define TENSORFLOW_MODEL "./RUNNINGDATA/tensor_networks/mars-small128.ckpt-68577" 13 | 14 | FeatureTensor *FeatureTensor::instance = NULL; 15 | 16 | FeatureTensor *FeatureTensor::getInstance() { 17 | if(instance == NULL) { 18 | instance = new FeatureTensor(); 19 | } 20 | return instance; 21 | } 22 | 23 | FeatureTensor::FeatureTensor() { 24 | //prepare model: 25 | bool status = init(); 26 | if(status == false) exit(1); 27 | } 28 | 29 | FeatureTensor::~FeatureTensor() { 30 | session->Close(); 31 | delete session; 32 | output_tensors.clear(); 33 | outnames.clear(); 34 | } 35 | 36 | bool FeatureTensor::init() { 37 | tensorflow::SessionOptions sessOptions; 38 | sessOptions.config.mutable_gpu_options()->set_allow_growth(true); 39 | session = NewSession(sessOptions); 40 | if(session == nullptr) return false; 41 | 42 | const tensorflow::string pathToGraph = TENSORFLOW_MODEL_META; 43 | Status status; 44 | MetaGraphDef graph_def; 45 | status = ReadBinaryProto(tensorflow::Env::Default(), pathToGraph, &graph_def); 46 | if(status.ok() == false) return false; 47 | 48 | status = session->Create(graph_def.graph_def()); 49 | if(status.ok() == false) return false; 50 | 51 | const tensorflow::string checkpointPath = TENSORFLOW_MODEL; 52 | Tensor checkpointTensor(DT_STRING, TensorShape()); 53 | checkpointTensor.scalar()() = checkpointPath; 54 | status = session->Run( 55 | { {graph_def.saver_def().filename_tensor_name(), checkpointTensor}, }, 56 | {}, {graph_def.saver_def().restore_op_name()}, nullptr ); 57 | if(status.ok() == false) return false; 58 | 59 | input_layer = "Placeholder:0"; 60 | outnames.push_back("truediv:0"); 61 | feature_dim = 128; 62 | return true; 63 | } 64 | 65 | bool FeatureTensor::getRectsFeature(const cv::Mat& img, DETECTIONS& d) { 66 | std::vector mats; 67 | for(DETECTION_ROW& dbox : d) { 68 | cv::Rect rc = cv::Rect(int(dbox.tlwh(0)), int(dbox.tlwh(1)), 69 | int(dbox.tlwh(2)), int(dbox.tlwh(3))); 70 | rc.x -= (rc.height * 0.5 - rc.width) * 0.5; 71 | rc.width = rc.height * 0.5; 72 | rc.x = (rc.x >= 0 ? rc.x : 0); 73 | rc.y = (rc.y >= 0 ? rc.y : 0); 74 | rc.width = (rc.x + rc.width <= img.cols? rc.width: (img.cols-rc.x)); 75 | rc.height = (rc.y + rc.height <= img.rows? rc.height:(img.rows - rc.y)); 76 | 77 | cv::Mat mattmp = img(rc).clone(); 78 | cv::resize(mattmp, mattmp, cv::Size(64, 128)); 79 | mats.push_back(mattmp); 80 | } 81 | int count = mats.size(); 82 | 83 | Tensor input_tensor(DT_UINT8, TensorShape({count, 128, 64, 3})); 84 | tobuffer(mats, input_tensor.flat().data()); 85 | std::vector> feed_dict = { 86 | {input_layer, input_tensor}, 87 | }; 88 | Status status = session->Run(feed_dict, outnames, {}, &output_tensors); 89 | if(status.ok() == false) return false; 90 | float* tensor_buffer = output_tensors[0].flat().data(); 91 | int i = 0; 92 | for(DETECTION_ROW& dbox : d) { 93 | for(int j = 0; j < feature_dim; j++) 94 | dbox.feature[j] = tensor_buffer[i*feature_dim+j]; 95 | i++; 96 | } 97 | return true; 98 | } 99 | 100 | void FeatureTensor::tobuffer(const std::vector &imgs, uint8 *buf) { 101 | int pos = 0; 102 | for(const cv::Mat& img : imgs) { 103 | int Lenth = img.rows * img.cols * 3; 104 | int nr = img.rows; 105 | int nc = img.cols; 106 | if(img.isContinuous()) { 107 | nr = 1; 108 | nc = Lenth; 109 | } 110 | for(int i = 0; i < nr; i++) { 111 | const uchar* inData = img.ptr(i); 112 | for(int j = 0; j < nc; j++) { 113 | buf[pos] = *inData++; 114 | pos++; 115 | } 116 | }//end for 117 | }//end imgs; 118 | } 119 | -------------------------------------------------------------------------------- /src/feature/FeatureTensor.h: -------------------------------------------------------------------------------- 1 | #include "opencv2/opencv.hpp" 2 | #include "opencv2/core/core.hpp" 3 | #include "opencv2/highgui/highgui.hpp" 4 | #include "tensorflow/core/public/session.h" 5 | #include "tensorflow/core/protobuf/meta_graph.pb.h" 6 | 7 | #include "model.h" 8 | 9 | typedef unsigned char uint8; 10 | 11 | class FeatureTensor 12 | { 13 | public: 14 | static FeatureTensor* getInstance(); 15 | bool getRectsFeature(const cv::Mat& img, DETECTIONS& d); 16 | 17 | private: 18 | FeatureTensor(); 19 | FeatureTensor(const FeatureTensor&); 20 | FeatureTensor& operator = (const FeatureTensor&); 21 | static FeatureTensor* instance; 22 | bool init(); 23 | ~FeatureTensor(); 24 | 25 | void tobuffer(const std::vector &imgs, uint8 *buf); 26 | 27 | int feature_dim; 28 | tensorflow::Session* session; 29 | std::vector output_tensors; 30 | std::vector outnames; 31 | tensorflow::string input_layer; 32 | }; 33 | -------------------------------------------------------------------------------- /src/feature/dataType.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef DATATYPE_H 3 | #define DATATYPEH 4 | 5 | #include 6 | #include 7 | //#include 8 | #include 9 | 10 | typedef Eigen::Matrix DETECTBOX; 11 | typedef Eigen::Matrix DETECTBOXSS; 12 | typedef Eigen::Matrix FEATURE; 13 | typedef Eigen::Matrix FEATURESS; 14 | //typedef std::vector FEATURESS; 15 | 16 | //Kalmanfilter 17 | //typedef Eigen::Matrix KAL_FILTER; 18 | typedef Eigen::Matrix KAL_MEAN; 19 | typedef Eigen::Matrix KAL_COVA; 20 | typedef Eigen::Matrix KAL_HMEAN; 21 | typedef Eigen::Matrix KAL_HCOVA; 22 | using KAL_DATA = std::pair; 23 | using KAL_HDATA = std::pair; 24 | 25 | //main 26 | using RESULT_DATA = std::pair; 27 | 28 | //tracker: 29 | using TRACKER_DATA = std::pair; 30 | using MATCH_DATA = std::pair; 31 | typedef struct t{ 32 | std::vector matches; 33 | std::vector unmatched_tracks; 34 | std::vector unmatched_detections; 35 | }TRACHER_MATCHD; 36 | 37 | //linear_assignment: 38 | typedef Eigen::Matrix DYNAMICM; 39 | 40 | 41 | #endif // DATATYPE_H 42 | -------------------------------------------------------------------------------- /src/feature/model.h: -------------------------------------------------------------------------------- 1 | #ifndef MODEL_H 2 | #define MODEL_H 3 | #include "dataType.h" 4 | #include 5 | #include "opencv2/opencv.hpp" 6 | #include "../darkSrc/network.h" 7 | 8 | /** 9 | * Each rect's data structure. 10 | * tlwh: topleft point & (w,h) 11 | * confidence: detection confidence. 12 | * feature: the rect's 128d feature. 13 | */ 14 | class DETECTION_ROW { 15 | public: 16 | DETECTBOX tlwh; //np.float 17 | float confidence; //float 18 | FEATURE feature; //np.float32 19 | DETECTBOX to_xyah() const; 20 | DETECTBOX to_tlbr() const; 21 | }; 22 | 23 | typedef std::vector DETECTIONS; 24 | 25 | /** 26 | * Get each image's rects & corresponding features. 27 | * Method of filter conf. 28 | * Method of preprocessing. 29 | */ 30 | class ModelDetection 31 | { 32 | 33 | public: 34 | static ModelDetection* getInstance(); 35 | bool loadDataFromFile(const char* motDir, bool withFeature); 36 | bool getFrameDetections(int frame_idx, DETECTIONS& res); 37 | bool getFrameDetections(cv::Mat& frame, DETECTIONS& res); 38 | void dataMoreConf(float min_confidence, DETECTIONS& d); 39 | void dataPreprocessing(float max_bbox_overlap, DETECTIONS& d); 40 | 41 | private: 42 | ModelDetection(); 43 | ModelDetection(const ModelDetection&); 44 | ModelDetection& operator =(const ModelDetection&); 45 | static ModelDetection* instance; 46 | 47 | using AREAPAIR = std::pair; 48 | struct cmp { 49 | bool operator()(const AREAPAIR a, const AREAPAIR b) { 50 | return a.second < b.second; 51 | } 52 | }; 53 | std::map data; 54 | void _Qsort(DETECTIONS d, std::vector& a, int low, int high); 55 | bool loadFromFile; 56 | 57 | //darknet: 58 | char *input; 59 | network net; 60 | clock_t time; 61 | float thresh; 62 | float nms; 63 | char **names; 64 | //image **alphabet; 65 | 66 | image ipl_to_image(IplImage* src); 67 | }; 68 | 69 | #endif // MODEL_H 70 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "VideoTracker.h" 3 | using namespace std; 4 | 5 | #define MOTDIR "/home/zy/git-project/MOT_tracking/deep_sort/MOT16/test/MOT16-06/" 6 | #define VIDEO "./RUNNINGDATA/test.avi" 7 | //#define RUNGT 8 | //#define RUNMOTTENSOR 9 | #define RUNLOCALVIDEO 10 | 11 | int main() 12 | { 13 | VideoTracker* t = new VideoTracker; 14 | #ifdef RUNGT 15 | if(t->run_sequenceWithGT(MOTDIR, true) == false) { 16 | cout << t->showErrMsg() << endl; 17 | } 18 | #endif 19 | 20 | #ifdef RUNMOTTENSOR 21 | if(t->run_sequence(MOTDIR, true) == false) { 22 | cout << t->showErrMsg() << endl; 23 | } 24 | #endif 25 | 26 | #ifdef RUNLOCALVIDEO 27 | if(t->run(VIDEO, true) == false) { 28 | cout << t->showErrMsg() << endl; 29 | } 30 | #endif 31 | return 0; 32 | } 33 | -------------------------------------------------------------------------------- /src/matching/kalmanfilter.cpp: -------------------------------------------------------------------------------- 1 | #include "kalmanfilter.h" 2 | #include 3 | 4 | const double KalmanFilter::chi2inv95[10] = { 5 | 0, 6 | 3.8415, 7 | 5.9915, 8 | 7.8147, 9 | 9.4877, 10 | 11.070, 11 | 12.592, 12 | 14.067, 13 | 15.507, 14 | 16.919 15 | }; 16 | KalmanFilter::KalmanFilter() 17 | { 18 | int ndim = 4; 19 | double dt = 1.; 20 | 21 | _motion_mat = Eigen::MatrixXf::Identity(8, 8); 22 | for(int i = 0; i < ndim; i++) { 23 | _motion_mat(i, ndim+i) = dt; 24 | } 25 | _update_mat = Eigen::MatrixXf::Identity(4, 8); 26 | 27 | this->_std_weight_position = 1. / 20; 28 | this->_std_weight_velocity = 1. / 160; 29 | } 30 | 31 | KAL_DATA KalmanFilter::initiate(const DETECTBOX &measurement) 32 | { 33 | DETECTBOX mean_pos = measurement; 34 | DETECTBOX mean_vel; 35 | for(int i = 0; i < 4; i++) mean_vel(i) = 0; 36 | 37 | KAL_MEAN mean; 38 | for(int i = 0; i < 8; i++){ 39 | if(i < 4) mean(i) = mean_pos(i); 40 | else mean(i) = mean_vel(i - 4); 41 | } 42 | 43 | KAL_MEAN std; 44 | std(0) = 2 * _std_weight_position * measurement[3]; 45 | std(1) = 2 * _std_weight_position * measurement[3]; 46 | std(2) = 1e-2; 47 | std(3) = 2 * _std_weight_position * measurement[3]; 48 | std(4) = 10 * _std_weight_velocity * measurement[3]; 49 | std(5) = 10 * _std_weight_velocity * measurement[3]; 50 | std(6) = 1e-5; 51 | std(7) = 10 * _std_weight_velocity * measurement[3]; 52 | 53 | KAL_MEAN tmp = std.array().square(); 54 | KAL_COVA var = tmp.asDiagonal(); 55 | return std::make_pair(mean, var); 56 | } 57 | 58 | void KalmanFilter::predict(KAL_MEAN &mean, KAL_COVA &covariance) 59 | { 60 | //revise the data; 61 | DETECTBOX std_pos; 62 | std_pos << _std_weight_position * mean(3), 63 | _std_weight_position * mean(3), 64 | 1e-2, 65 | _std_weight_position * mean(3); 66 | DETECTBOX std_vel; 67 | std_vel << _std_weight_velocity * mean(3), 68 | _std_weight_velocity * mean(3), 69 | 1e-5, 70 | _std_weight_velocity * mean(3); 71 | KAL_MEAN tmp; 72 | tmp.block<1,4>(0,0) = std_pos; 73 | tmp.block<1,4>(0,4) = std_vel; 74 | tmp = tmp.array().square(); 75 | KAL_COVA motion_cov = tmp.asDiagonal(); 76 | KAL_MEAN mean1 = this->_motion_mat * mean.transpose(); 77 | KAL_COVA covariance1 = this->_motion_mat * covariance *(_motion_mat.transpose()); 78 | covariance1 += motion_cov; 79 | 80 | mean = mean1; 81 | covariance = covariance1; 82 | } 83 | 84 | KAL_HDATA KalmanFilter::project(const KAL_MEAN &mean, const KAL_COVA &covariance) 85 | { 86 | DETECTBOX std; 87 | std << _std_weight_position * mean(3), _std_weight_position * mean(3), 88 | 1e-1, _std_weight_position * mean(3); 89 | KAL_HMEAN mean1 = _update_mat * mean.transpose(); 90 | KAL_HCOVA covariance1 = _update_mat * covariance * (_update_mat.transpose()); 91 | Eigen::Matrix diag = std.asDiagonal(); 92 | diag = diag.array().square().matrix(); 93 | covariance1 += diag; 94 | // covariance1.diagonal() << diag; 95 | return std::make_pair(mean1, covariance1); 96 | } 97 | 98 | KAL_DATA 99 | KalmanFilter::update( 100 | const KAL_MEAN &mean, 101 | const KAL_COVA &covariance, 102 | const DETECTBOX &measurement) 103 | { 104 | KAL_HDATA pa = project(mean, covariance); 105 | KAL_HMEAN projected_mean = pa.first; 106 | KAL_HCOVA projected_cov = pa.second; 107 | 108 | //chol_factor, lower = 109 | //scipy.linalg.cho_factor(projected_cov, lower=True, check_finite=False) 110 | //kalmain_gain = 111 | //scipy.linalg.cho_solve((cho_factor, lower), 112 | //np.dot(covariance, self._upadte_mat.T).T, 113 | //check_finite=False).T 114 | Eigen::Matrix B = (covariance * (_update_mat.transpose())).transpose(); 115 | Eigen::Matrix kalman_gain = (projected_cov.llt().solve(B)).transpose(); // eg.8x4 116 | Eigen::Matrix innovation = measurement - projected_mean; //eg.1x4 117 | auto tmp = innovation*(kalman_gain.transpose()); 118 | KAL_MEAN new_mean = (mean.array() + tmp.array()).matrix(); 119 | KAL_COVA new_covariance = covariance - kalman_gain*projected_cov*(kalman_gain.transpose()); 120 | return std::make_pair(new_mean, new_covariance); 121 | } 122 | 123 | Eigen::Matrix 124 | KalmanFilter::gating_distance( 125 | const KAL_MEAN &mean, 126 | const KAL_COVA &covariance, 127 | const std::vector &measurements, 128 | bool only_position) 129 | { 130 | KAL_HDATA pa = this->project(mean, covariance); 131 | if(only_position) { 132 | printf("not implement!"); 133 | exit(0); 134 | } 135 | KAL_HMEAN mean1 = pa.first; 136 | KAL_HCOVA covariance1 = pa.second; 137 | 138 | // Eigen::Matrix d(size, 4); 139 | DETECTBOXSS d(measurements.size(), 4); 140 | int pos = 0; 141 | for(DETECTBOX box:measurements) { 142 | d.row(pos++) = box - mean1; 143 | } 144 | Eigen::Matrix factor = covariance1.llt().matrixL(); 145 | Eigen::Matrix z = factor.triangularView().solve(d).transpose(); 146 | auto zz = ((z.array())*(z.array())).matrix(); 147 | auto square_maha = zz.colwise().sum(); 148 | return square_maha; 149 | } 150 | 151 | -------------------------------------------------------------------------------- /src/matching/kalmanfilter.h: -------------------------------------------------------------------------------- 1 | #ifndef KALMANFILTER_H 2 | #define KALMANFILTER_H 3 | 4 | #include "../feature/dataType.h" 5 | 6 | class KalmanFilter 7 | { 8 | public: 9 | static const double chi2inv95[10]; 10 | KalmanFilter(); 11 | KAL_DATA initiate(const DETECTBOX& measurement); 12 | void predict(KAL_MEAN& mean, KAL_COVA& covariance); 13 | KAL_HDATA project(const KAL_MEAN& mean, const KAL_COVA& covariance); 14 | KAL_DATA update(const KAL_MEAN& mean, 15 | const KAL_COVA& covariance, 16 | const DETECTBOX& measurement); 17 | 18 | Eigen::Matrix gating_distance( 19 | const KAL_MEAN& mean, 20 | const KAL_COVA& covariance, 21 | const std::vector& measurements, 22 | bool only_position = false); 23 | 24 | private: 25 | Eigen::Matrix _motion_mat; 26 | Eigen::Matrix _update_mat; 27 | float _std_weight_position; 28 | float _std_weight_velocity; 29 | }; 30 | 31 | #endif // KALMANFILTER_H 32 | -------------------------------------------------------------------------------- /src/matching/linear_assignment.h: -------------------------------------------------------------------------------- 1 | #ifndef LINEAR_ASSIGNMENT_H 2 | #define LINEAR_ASSIGNMENT_H 3 | #include "../feature/dataType.h" 4 | #include "tracker.h" 5 | 6 | #define INFTY_COST 1e5 7 | class tracker; 8 | //for matching; 9 | class linear_assignment 10 | { 11 | linear_assignment(); 12 | linear_assignment(const linear_assignment& ); 13 | linear_assignment& operator=(const linear_assignment&); 14 | static linear_assignment* instance; 15 | 16 | public: 17 | static linear_assignment* getInstance(); 18 | TRACHER_MATCHD matching_cascade(tracker* distance_metric, 19 | tracker::GATED_METRIC_FUNC distance_metric_func, 20 | float max_distance, 21 | int cascade_depth, 22 | std::vector& tracks, 23 | const DETECTIONS& detections, 24 | std::vector &track_indices, 25 | std::vector detection_indices = std::vector()); 26 | TRACHER_MATCHD min_cost_matching( 27 | tracker* distance_metric, 28 | tracker::GATED_METRIC_FUNC distance_metric_func, 29 | float max_distance, 30 | std::vector& tracks, 31 | const DETECTIONS& detections, 32 | std::vector& track_indices, 33 | std::vector& detection_indices); 34 | DYNAMICM gate_cost_matrix( 35 | KalmanFilter* kf, 36 | DYNAMICM& cost_matrix, 37 | std::vector& tracks, 38 | const DETECTIONS& detections, 39 | const std::vector& track_indices, 40 | const std::vector& detection_indices, 41 | float gated_cost = INFTY_COST, 42 | bool only_position = false); 43 | }; 44 | 45 | #endif // LINEAR_ASSIGNMENT_H 46 | -------------------------------------------------------------------------------- /src/matching/nn_matching.cpp: -------------------------------------------------------------------------------- 1 | #include "nn_matching.h" 2 | #include "../errmsg.h" 3 | 4 | using namespace Eigen; 5 | 6 | NearNeighborDisMetric::NearNeighborDisMetric( 7 | NearNeighborDisMetric::METRIC_TYPE metric, 8 | float matching_threshold, int budget) 9 | { 10 | if(metric == euclidean) { 11 | _metric = &NearNeighborDisMetric::_nneuclidean_distance; 12 | } else if (metric == cosine) { 13 | _metric = &NearNeighborDisMetric::_nncosine_distance; 14 | } else { 15 | errMsg::getInstance()->out( 16 | "nn_matching.cpp", 17 | "NearestNeighborDistanceMetric::NearestNeighborDistanceMetric", 18 | "Invalid metric; must be either 'euclidean' or 'cosine'", true); 19 | } 20 | this->mating_threshold = matching_threshold; 21 | this->budget = budget; 22 | this->samples.clear(); 23 | } 24 | 25 | /* 26 | void 27 | NearNeighborDisMetric::partial_fit( 28 | FEATURESS& features, 29 | std::vector targets, 30 | std::vector active_targets) 31 | { 32 | int size = targets.size(); 33 | for(int i = 0; i < size; i++) { 34 | FEATURE feature = features.row(i); 35 | int target = targets[i]; 36 | 37 | bool isActive = false; 38 | for(int k:active_targets) { 39 | if(k == target) { 40 | isActive = true; 41 | break; 42 | } 43 | } 44 | if(samples.find(target) != samples.end()) {//exist 45 | } else {//not exist 46 | // 47 | } 48 | }//each (feature,target) 49 | }*/ 50 | 51 | DYNAMICM 52 | NearNeighborDisMetric::distance( 53 | const FEATURESS &features, 54 | const std::vector& targets) 55 | { 56 | DYNAMICM cost_matrix = Eigen::MatrixXf::Zero(targets.size(), features.rows()); 57 | int idx = 0; 58 | for(int target:targets) { 59 | cost_matrix.row(idx) = (this->*_metric)(this->samples[target], features); 60 | idx++; 61 | } 62 | return cost_matrix; 63 | } 64 | 65 | void 66 | NearNeighborDisMetric::partial_fit( 67 | std::vector &tid_feats, 68 | std::vector &active_targets) 69 | { 70 | /*python code: 71 | * let feature(target_id) append to samples; 72 | * && delete not comfirmed target_id from samples. 73 | * update samples; 74 | */ 75 | for(TRACKER_DATA& data:tid_feats) { 76 | int track_id = data.first; 77 | FEATURESS newFeatOne = data.second; 78 | 79 | if(samples.find(track_id) != samples.end()) {//append 80 | int oldSize = samples[track_id].rows(); 81 | int addSize = newFeatOne.rows(); 82 | int newSize = oldSize + addSize; 83 | 84 | if(newSize <= this->budget) { 85 | FEATURESS newSampleFeatures(newSize, 128); 86 | newSampleFeatures.block(0,0, oldSize, 128) = samples[track_id]; 87 | newSampleFeatures.block(oldSize, 0, addSize, 128) = newFeatOne; 88 | samples[track_id] = newSampleFeatures; 89 | } else { 90 | if(oldSize < this->budget) {//original space is not enough; 91 | FEATURESS newSampleFeatures(this->budget, 128); 92 | if(addSize >= this->budget) { 93 | newSampleFeatures = newFeatOne.block(0, 0, this->budget, 128); 94 | } else { 95 | newSampleFeatures.block(0, 0, this->budget-addSize, 128) = 96 | samples[track_id].block(addSize-1, 0, this->budget-addSize, 128).eval(); 97 | newSampleFeatures.block(this->budget-addSize, 0, addSize, 128) = newFeatOne; 98 | } 99 | samples[track_id] = newSampleFeatures; 100 | } else {//original space is ok; 101 | if(addSize >= this->budget) { 102 | samples[track_id] = newFeatOne.block(0,0, this->budget, 128); 103 | } else { 104 | samples[track_id].block(0, 0, this->budget-addSize, 128) = 105 | samples[track_id].block(addSize-1, 0, this->budget-addSize, 128).eval(); 106 | samples[track_id].block(this->budget-addSize, 0, addSize, 128) = newFeatOne; 107 | } 108 | } 109 | } 110 | } else {//not exit, create new one; 111 | samples[track_id] = newFeatOne; 112 | } 113 | }//add features; 114 | 115 | //erase the samples which not in active_targets; 116 | for(std::map::iterator i = samples.begin(); i != samples.end();) { 117 | bool flag = false; 118 | for(int j:active_targets) if(j == i->first) { flag=true; break; } 119 | if(flag == false) samples.erase(i++); 120 | else i++; 121 | } 122 | } 123 | 124 | Eigen::VectorXf 125 | NearNeighborDisMetric::_nncosine_distance( 126 | const FEATURESS &x, const FEATURESS &y) 127 | { 128 | MatrixXf distances = _cosine_distance(x,y); 129 | VectorXf res = distances.colwise().minCoeff().transpose(); 130 | return res; 131 | } 132 | 133 | Eigen::VectorXf 134 | NearNeighborDisMetric::_nneuclidean_distance( 135 | const FEATURESS &x, const FEATURESS &y) 136 | { 137 | MatrixXf distances = _pdist(x,y); 138 | VectorXf res = distances.colwise().maxCoeff().transpose(); 139 | res = res.array().max(VectorXf::Zero(res.rows()).array()); 140 | return res; 141 | } 142 | 143 | Eigen::MatrixXf 144 | NearNeighborDisMetric::_pdist(const FEATURESS &x, const FEATURESS &y) 145 | { 146 | int len1 = x.rows(), len2 = y.rows(); 147 | if(len1 == 0 || len2 == 0) { 148 | return Eigen::MatrixXf::Zero(len1, len2); 149 | } 150 | MatrixXf res = x * y.transpose()* -2; 151 | res = res.colwise() + x.rowwise().squaredNorm(); 152 | res = res.rowwise() + y.rowwise().squaredNorm().transpose(); 153 | res = res.array().max(MatrixXf::Zero(res.rows(), res.cols()).array()); 154 | return res; 155 | } 156 | 157 | Eigen::MatrixXf 158 | NearNeighborDisMetric::_cosine_distance( 159 | const FEATURESS & a, 160 | const FEATURESS& b, bool data_is_normalized) { 161 | if(data_is_normalized == true) { 162 | //undo: 163 | assert(false); 164 | } 165 | MatrixXf res = 1. - (a*b.transpose()).array(); 166 | return res; 167 | } 168 | -------------------------------------------------------------------------------- /src/matching/nn_matching.h: -------------------------------------------------------------------------------- 1 | #ifndef NN_MATCHING_H 2 | #define NN_MATCHING_H 3 | 4 | #include "../feature/dataType.h" 5 | 6 | #include 7 | 8 | //A tool to calculate distance; 9 | class NearNeighborDisMetric{ 10 | public: 11 | enum METRIC_TYPE{euclidean=1, cosine}; 12 | NearNeighborDisMetric(METRIC_TYPE metric, 13 | float matching_threshold, 14 | int budget); 15 | DYNAMICM distance(const FEATURESS& features, const std::vector &targets); 16 | // void partial_fit(FEATURESS& features, std::vector targets, std::vector active_targets); 17 | void partial_fit(std::vector& tid_feats, std::vector& active_targets); 18 | float mating_threshold; 19 | 20 | private: 21 | typedef Eigen::VectorXf (NearNeighborDisMetric::*PTRFUN)(const FEATURESS&, const FEATURESS&); 22 | Eigen::VectorXf _nncosine_distance(const FEATURESS& x, const FEATURESS& y); 23 | Eigen::VectorXf _nneuclidean_distance(const FEATURESS& x, const FEATURESS& y); 24 | 25 | Eigen::MatrixXf _pdist(const FEATURESS& x, const FEATURESS& y); 26 | Eigen::MatrixXf _cosine_distance(const FEATURESS & a, const FEATURESS& b, bool data_is_normalized = false); 27 | private: 28 | PTRFUN _metric; 29 | int budget; 30 | std::map samples; 31 | }; 32 | 33 | #endif // NN_MATCHING_H 34 | -------------------------------------------------------------------------------- /src/matching/track.cpp: -------------------------------------------------------------------------------- 1 | #include "track.h" 2 | 3 | Track::Track(KAL_MEAN& mean, KAL_COVA& covariance, int track_id, int n_init, int max_age, const FEATURE& feature) 4 | { 5 | this->mean = mean; 6 | this->covariance = covariance; 7 | this->track_id = track_id; 8 | this->hits = 1; 9 | this->age = 1; 10 | this->time_since_update = 0; 11 | this->state = TrackState::Tentative; 12 | features = FEATURESS(1, 128); 13 | features.row(0) = feature;//features.rows() must = 0; 14 | 15 | this->_n_init = n_init; 16 | this->_max_age = max_age; 17 | } 18 | 19 | void Track::predit(KalmanFilter *kf) 20 | { 21 | /*Propagate the state distribution to the current time step using a 22 | Kalman filter prediction step. 23 | 24 | Parameters 25 | ---------- 26 | kf : kalman_filter.KalmanFilter 27 | The Kalman filter. 28 | */ 29 | 30 | kf->predict(this->mean, this->covariance); 31 | this->age += 1; 32 | this->time_since_update += 1; 33 | } 34 | 35 | void Track::update(KalmanFilter * const kf, const DETECTION_ROW& detection) 36 | { 37 | KAL_DATA pa = kf->update(this->mean, this->covariance, detection.to_xyah()); 38 | this->mean = pa.first; 39 | this->covariance = pa.second; 40 | 41 | featuresAppendOne(detection.feature); 42 | // this->features.row(features.rows()) = detection.feature; 43 | this->hits += 1; 44 | this->time_since_update = 0; 45 | if(this->state == TrackState::Tentative && this->hits >= this->_n_init) { 46 | this->state = TrackState::Confirmed; 47 | } 48 | } 49 | 50 | void Track::mark_missed() 51 | { 52 | if(this->state == TrackState::Tentative) { 53 | this->state = TrackState::Deleted; 54 | } else if(this->time_since_update > this->_max_age) { 55 | this->state = TrackState::Deleted; 56 | } 57 | } 58 | 59 | bool Track::is_confirmed() 60 | { 61 | return this->state == TrackState::Confirmed; 62 | } 63 | 64 | bool Track::is_deleted() 65 | { 66 | return this->state == TrackState::Deleted; 67 | } 68 | 69 | bool Track::is_tentative() 70 | { 71 | return this->state == TrackState::Tentative; 72 | } 73 | 74 | DETECTBOX Track::to_tlwh() 75 | { 76 | DETECTBOX ret = mean.leftCols(4); 77 | ret(2) *= ret(3); 78 | ret.leftCols(2) -= (ret.rightCols(2)/2); 79 | return ret; 80 | } 81 | 82 | void Track::featuresAppendOne(const FEATURE &f) 83 | { 84 | int size = this->features.rows(); 85 | FEATURESS newfeatures = FEATURESS(size+1, 128); 86 | newfeatures.block(0, 0, size, 128) = this->features; 87 | newfeatures.row(size) = f; 88 | features = newfeatures; 89 | } 90 | -------------------------------------------------------------------------------- /src/matching/track.h: -------------------------------------------------------------------------------- 1 | #ifndef TRACK_H 2 | #define TRACK_H 3 | 4 | #include "../feature/dataType.h" 5 | 6 | #include "kalmanfilter.h" 7 | #include "../feature/model.h" 8 | 9 | class Track 10 | { 11 | /*""" 12 | A single target track with state space `(x, y, a, h)` and associated 13 | velocities, where `(x, y)` is the center of the bounding box, `a` is the 14 | aspect ratio and `h` is the height. 15 | 16 | Parameters 17 | ---------- 18 | mean : ndarray 19 | Mean vector of the initial state distribution. 20 | covariance : ndarray 21 | Covariance matrix of the initial state distribution. 22 | track_id : int 23 | A unique track identifier. 24 | n_init : int 25 | Number of consecutive detections before the track is confirmed. The 26 | track state is set to `Deleted` if a miss occurs within the first 27 | `n_init` frames. 28 | max_age : int 29 | The maximum number of consecutive misses before the track state is 30 | set to `Deleted`. 31 | feature : Optional[ndarray] 32 | Feature vector of the detection this track originates from. If not None, 33 | this feature is added to the `features` cache. 34 | 35 | Attributes 36 | ---------- 37 | mean : ndarray 38 | Mean vector of the initial state distribution. 39 | covariance : ndarray 40 | Covariance matrix of the initial state distribution. 41 | track_id : int 42 | A unique track identifier. 43 | hits : int 44 | Total number of measurement updates. 45 | age : int 46 | Total number of frames since first occurance. 47 | time_since_update : int 48 | Total number of frames since last measurement update. 49 | state : TrackState 50 | The current track state. 51 | features : List[ndarray] 52 | A cache of features. On each measurement update, the associated feature 53 | vector is added to this list. 54 | 55 | """*/ 56 | enum TrackState {Tentative = 1, Confirmed, Deleted}; 57 | 58 | public: 59 | Track(KAL_MEAN& mean, KAL_COVA& covariance, int track_id, 60 | int n_init, int max_age, const FEATURE& feature); 61 | void predit(KalmanFilter *kf); 62 | void update(KalmanFilter * const kf, const DETECTION_ROW &detection); 63 | void mark_missed(); 64 | bool is_confirmed(); 65 | bool is_deleted(); 66 | bool is_tentative(); 67 | DETECTBOX to_tlwh(); 68 | int time_since_update; 69 | int track_id; 70 | FEATURESS features; 71 | KAL_MEAN mean; 72 | KAL_COVA covariance; 73 | 74 | int hits; 75 | int age; 76 | int _n_init; 77 | int _max_age; 78 | TrackState state; 79 | private: 80 | void featuresAppendOne(const FEATURE& f); 81 | }; 82 | 83 | #endif // TRACK_H 84 | -------------------------------------------------------------------------------- /src/matching/tracker.h: -------------------------------------------------------------------------------- 1 | #ifndef TRACKER_H 2 | #define TRACKER_H 3 | #include 4 | 5 | 6 | #include "kalmanfilter.h" 7 | #include "track.h" 8 | #include "../feature/model.h" 9 | 10 | class NearNeighborDisMetric; 11 | 12 | class tracker 13 | { 14 | public: 15 | NearNeighborDisMetric* metric; 16 | float max_iou_distance; 17 | int max_age; 18 | int n_init; 19 | 20 | KalmanFilter* kf; 21 | 22 | int _next_idx; 23 | public: 24 | std::vector tracks; 25 | tracker(/*NearNeighborDisMetric* metric,*/ 26 | float max_cosine_distance, int nn_budget, 27 | float max_iou_distance = 0.7, 28 | int max_age = 30, int n_init=3); 29 | void predict(); 30 | void update(const DETECTIONS& detections); 31 | typedef DYNAMICM (tracker::* GATED_METRIC_FUNC)( 32 | std::vector& tracks, 33 | const DETECTIONS& dets, 34 | const std::vector& track_indices, 35 | const std::vector& detection_indices); 36 | private: 37 | void _match(const DETECTIONS& detections, TRACHER_MATCHD& res); 38 | void _initiate_track(const DETECTION_ROW& detection); 39 | public: 40 | DYNAMICM gated_matric( 41 | std::vector& tracks, 42 | const DETECTIONS& dets, 43 | const std::vector& track_indices, 44 | const std::vector& detection_indices); 45 | DYNAMICM iou_cost( 46 | std::vector& tracks, 47 | const DETECTIONS& dets, 48 | const std::vector& track_indices, 49 | const std::vector& detection_indices); 50 | Eigen::VectorXf iou(DETECTBOX& bbox, 51 | DETECTBOXSS &candidates); 52 | }; 53 | 54 | #endif // TRACKER_H 55 | -------------------------------------------------------------------------------- /src/thirdPart/hungarianoper.cpp: -------------------------------------------------------------------------------- 1 | #include "hungarianoper.h" 2 | 3 | Eigen::Matrix HungarianOper::Solve(const DYNAMICM &cost_matrix) 4 | { 5 | int rows = cost_matrix.rows(); 6 | int cols = cost_matrix.cols(); 7 | Matrix matrix(rows, cols); 8 | for (int row = 0; row < rows; row++) { 9 | for (int col = 0; col < cols; col++) { 10 | matrix(row, col) = cost_matrix(row, col); 11 | } 12 | } 13 | //Munkres get matrix; 14 | Munkres m; 15 | m.solve(matrix); 16 | 17 | // 18 | std::vector> pairs; 19 | for (int row = 0; row < rows; row++) { 20 | for (int col = 0; col < cols; col++) { 21 | int tmp = (int)matrix(row, col); 22 | if (tmp == 0) pairs.push_back(std::make_pair(row, col)); 23 | } 24 | } 25 | // 26 | int count = pairs.size(); 27 | Eigen::Matrix re(count, 2); 28 | for (int i = 0; i < count; i++) { 29 | re(i, 0) = pairs[i].first; 30 | re(i, 1) = pairs[i].second; 31 | } 32 | return re; 33 | }//end Solve; 34 | -------------------------------------------------------------------------------- /src/thirdPart/hungarianoper.h: -------------------------------------------------------------------------------- 1 | #ifndef HUNGARIANOPER_H 2 | #define HUNGARIANOPER_H 3 | #include "munkres/munkres.h" 4 | #include "munkres/adapters/boostmatrixadapter.h" 5 | #include "../feature/dataType.h" 6 | 7 | class HungarianOper { 8 | public: 9 | static Eigen::Matrix Solve(const DYNAMICM &cost_matrix); 10 | }; 11 | 12 | #endif // HUNGARIANOPER_H 13 | -------------------------------------------------------------------------------- /src/thirdPart/munkres/adapters/adapter.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015 Miroslav Krajicek 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation; either version 2 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | */ 18 | 19 | #include "adapter.h" 20 | -------------------------------------------------------------------------------- /src/thirdPart/munkres/adapters/adapter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015 Miroslav Krajicek 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation; either version 2 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | */ 18 | 19 | #ifndef _ADAPTER_H_ 20 | #define _ADAPTER_H_ 21 | 22 | #include "../matrix.h" 23 | #include "../munkres.h" 24 | 25 | template class Adapter 26 | { 27 | public: 28 | virtual Matrix convertToMatrix(const Container &con) const = 0; 29 | virtual void convertFromMatrix(Container &con, const Matrix &matrix) const = 0; 30 | virtual void solve(Container &con) 31 | { 32 | auto matrix = convertToMatrix(con); 33 | m_munkres.solve(matrix); 34 | convertFromMatrix(con, matrix); 35 | } 36 | protected: 37 | Munkres m_munkres; 38 | }; 39 | 40 | #endif /* _ADAPTER_H_ */ 41 | -------------------------------------------------------------------------------- /src/thirdPart/munkres/adapters/boostmatrixadapter.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015 Miroslav Krajicek 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation; either version 2 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | */ 18 | 19 | #include "boostmatrixadapter.h" 20 | 21 | //template class BoostMatrixAdapter; 22 | //template class BoostMatrixAdapter; 23 | //template class BoostMatrixAdapter; 24 | -------------------------------------------------------------------------------- /src/thirdPart/munkres/adapters/boostmatrixadapter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015 Miroslav Krajicek 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation; either version 2 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program; if not, write to the Free Software 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 | */ 18 | 19 | #ifndef _BOOSTMATRIXADAPTER_H_ 20 | #define _BOOSTMATRIXADAPTER_H_ 21 | 22 | #include "adapter.h" 23 | #ifndef WIN32 24 | #include 25 | #endif 26 | #include 27 | 28 | template class BoostMatrixAdapter : public Adapter > 29 | { 30 | public: 31 | virtual Matrix convertToMatrix(const boost::numeric::ublas::matrix &boost_matrix) const override 32 | { 33 | const auto rows = boost_matrix.size1 (); 34 | const auto columns = boost_matrix.size2 (); 35 | Matrix matrix (rows, columns); 36 | for (int i = 0; i < rows; ++i) { 37 | for (int j = 0; j < columns; ++j) { 38 | matrix (i, j) = boost_matrix (i, j); 39 | } 40 | } 41 | return matrix; 42 | } 43 | 44 | virtual void convertFromMatrix(boost::numeric::ublas::matrix &boost_matrix,const Matrix &matrix) const override 45 | { 46 | const auto rows = matrix.rows(); 47 | const auto columns = matrix.columns(); 48 | for (int i = 0; i < rows; ++i) { 49 | for (int j = 0; j < columns; ++j) { 50 | boost_matrix (i, j) = matrix (i, j); 51 | } 52 | } 53 | } 54 | }; 55 | 56 | #endif /* _BOOSTMATRIXADAPTER_H_ */ 57 | -------------------------------------------------------------------------------- /src/thirdPart/munkres/munkres.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007 John Weaver 3 | * Copyright (c) 2015 Miroslav Krajicek 4 | * 5 | * This program is free software; you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation; either version 2 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 | */ 19 | 20 | #include "munkres.h" 21 | 22 | template class Munkres; 23 | template class Munkres; 24 | template class Munkres; 25 | 26 | --------------------------------------------------------------------------------